diff --git a/.gitignore b/.gitignore
index c6335cf4..1529576e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,12 @@ dataset/
 
 # configs
 configs.json
+
+# pyenv
+.python-version
+
+# poetry
+poetry.lock
+
+# wandb
+wandb
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..d1117cb4
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,66 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: sort-simple-yaml
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-symlinks
+  - id: debug-statements
+  - id: check-added-large-files
+- repo: https://github.com/python-poetry/poetry
+  rev: 1.6.0
+  hooks:
+  - id: poetry-check
+  - id: poetry-lock
+    # -   id: poetry-publish
+- repo: https://github.com/psf/black
+  rev: 23.9.1
+  hooks:
+  - id: black
+    args: [--line-length, '120']
+- repo: https://github.com/PyCQA/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+- repo: https://github.com/PyCQA/flake8
+  rev: 6.1.0
+  hooks:
+  - id: flake8
+    args: [--max-line-length=120, --extend-ignore=E203]
+- repo: https://github.com/PyCQA/pydocstyle
+  rev: 6.3.0
+  hooks:
+  - id: pydocstyle
+    args: [--convention=numpy]
+    additional_dependencies: [tomli]
+- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
+  rev: v2.10.0
+  hooks:
+  - id: pretty-format-toml
+    args: [--autofix, --no-sort]
+  - id: pretty-format-yaml
+    args: [--autofix]
+- repo: local
+  hooks:
+  - id: pylint
+    name: pylint
+    entry: poetry run pylint
+    language: system
+    types: [python]
+  - id: poetry-export-requirements
+    name: poetry-export-requirements
+    entry: poetry export --without-hashes --with=main,research -f requirements.txt -o requirements.txt
+    language: system
+    types: [python]
+    pass_filenames: false
+  - id: poetry-export-requirements-dev
+    name: poetry-export-requirements-dev
+    entry: poetry export --without-hashes --only dev -f requirements.txt -o requirements.dev.txt
+    language: system
+    types: [python]
+    pass_filenames: false
diff --git a/README.md b/README.md
index cafb3bff..7f00d26b 100644
--- a/README.md
+++ b/README.md
@@ -1,235 +1,156 @@
-# Nocturne
+# `nocturne_lab`: fast driving simulator 🧪 + 🚗
 
-Nocturne is a 2D, partially observed, driving simulator, built in C++ for speed and exported as a Python library.
+`nocturne_lab` is a maintained fork of [Nocturne](https://github.com/facebookresearch/nocturne); a 2D, partially observed, driving simulator built in C++. Currently, `nocturne_lab` is used internally at the Emerge lab. You can get started with the intro examples 🏎️💨 [here](https://github.com/Emerge-Lab/nocturne_lab/tree/feature/nocturne_fork_cleanup/examples).
 
-It is currently designed to handle traffic scenarios from the [Waymo Open Dataset](https://github.com/waymo-research/waymo-open-dataset), and with some work could be extended to support different driving datasets. Using the Python library `nocturne`, one is able to train controllers for AVs to solve various tasks from the Waymo dataset, which we provide as a benchmark, then use the tools we offer to evaluate the designed controllers.
+## Basic usage
 
-Using this rich data source, Nocturne contains a wide range of scenarios whose solution requires the formation of complex coordination, theory of mind, and handling of partial observability. Below we show replays of the expert data, centered on the light blue agent, with the corresponding view of the agent on the right.
-<!-- <p float="left" align="center">
-  <img src="https://user-images.githubusercontent.com/33672752/174244303-91fb597a-0d3e-4a92-8901-46e1134c28b4.gif" width="250" height="250"/>
-  <img src="https://user-images.githubusercontent.com/33672752/174244860-65865e95-0592-4279-ab5d-f40842092cc7.gif" width="250" height="250"/>
-  <img src="https://user-images.githubusercontent.com/33672752/174244327-51f98409-4afd-424e-88f5-29892e89d796.gif" width="250" height="250"/>
-</p> -->
-![Intersection Scene with Obscured View](./docs/readme_files/git_intersection_combined.gif)
+```python
+from nocturne.envs.base_env import BaseEnv
 
-Nocturne features a rich variety of scenes, ranging from parking lots, to merges, to roundabouts, to unsignalized intersections.
+# Initialize an environment
+env = BaseEnv(config=env_config)
 
-![Intersection Scene with Obscured View](./docs/readme_files/nocturne_3_by_3_scenes.gif)
+# Reset
+obs_dict = env.reset()
 
-More videos can be found [here](https://www.nathanlct.com/research/nocturne).
+# Get info
+agent_ids = [agent_id for agent_id in obs_dict.keys()]
+dead_agent_ids = []
 
-The corresponding paper is available at: [https://arxiv.org/abs/2206.09889](https://arxiv.org/abs/2206.09889). Please cite the paper and not the GitHub repository, using the following citation:
+for step in range(1000):
 
-```bibtex
-@article{nocturne2022,
-  author  = {Vinitsky, Eugene and Lichtlé, Nathan and Yang, Xiaomeng and Amos, Brandon and Foerster, Jakob},
-  journal = {arXiv preprint arXiv:2206.09889},
-  title   = {{Nocturne: a scalable driving benchmark for bringing multi-agent learning one step closer to the real world}},
-  url     = {https://arxiv.org/abs/2206.09889},
-  year    = {2022}
-}
-```
+    # Sample actions
+    action_dict = {
+        agent_id: env.action_space.sample() 
+        for agent_id in agent_ids
+        if agent_id not in dead_agent_ids
+    }
+    
+    # Step in env
+    obs_dict, rew_dict, done_dict, info_dict = env.step(action_dict)
 
-# Installation
+    # Update dead agents
+    for agent_id, is_done in done_dict.items():
+        if is_done and agent_id not in dead_agent_ids:
+            dead_agent_ids.append(agent_id)
 
-**Feel free to [open an issue](https://github.com/facebookresearch/nocturne/issues/new/choose) at any time if you encounter a problem, need some help with installing or using Nocturne, want to ask us any related question, or even propose a new feature. We will be happy to help!**
+    # Reset if all agents are done
+    if done_dict["__all__"]:
+        obs_dict = env.reset()
+        dead_agent_ids = []
 
-## Dependencies
+# Close environment
+env.close()
+```
 
-[CMake](https://cmake.org/) is required to compile the C++ library. 
+## Implemented algorithms
 
-Run `cmake --version` to see whether CMake is already installed in your environment. If not, refer to the CMake website instructions for installation, or you can use:
+| Algorithm                              | Reference                                                  | Code  | Compatible with    | Notes                                                                                                                                                                  |
+| -------------------------------------- | ---------------------------------------------------------- | ----- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| PPO **single-agent** control | [Schulman et al., 2017](https://arxiv.org/pdf/1707.06347.pdf) | [ppo_with_sb3.ipynb](https://github.com/Emerge-Lab/nocturne_lab/blob/feature/nocturne_fork_cleanup/examples/04_ppo_with_sb3.ipynb) | Stable baselines 3 |                                                                                                                                                                        |
+| PPO **multi-agent** control  | [Schulman et al., 2017](https://arxiv.org/pdf/1707.06347.pdf) | `#TODO` | Stable baselines 3 | SB3 doesn't support multi-agent environments. Using the `VecEnv`class to treat observations from multiple agents as a set of vectorized single-agent environments. |
+|                                        |                                                            |       |                    |                                                                                                                                                                        |
+|                                        |                                                            |       |                    |                                                                                                                                                                        |
 
-- `sudo apt-get -y install cmake` (Linux)
-- `brew install cmake` (MacOS)
+## Installation
 
-### All machines besides OS with Mac M1 chip follow instructions below
-Nocturne uses [SFML](https://github.com/SFML/SFML) for drawing and visualization, as well as on [pybind11](https://pybind11.readthedocs.io/en/latest/) for compiling the C++ code as a Python library.
+### Requirements
 
-To install SFML:
+* Python (>=3.10)
 
-- `sudo apt-get install libsfml-dev` (Linux)
-- `brew install sfml` (MacOS)
+### Virtual environment
+Below different options for setting up a virtual environment are described. Either option works although `pyenv` is recommended.
 
-pybind11 is included as a submodule and will be installed in the next step.
+> _Note:_ The virtual environment needs to be **activated each time** before you start working.
 
-### Machines with a Mac M1 chip
-Unfortunately if you have a Mac M1 chip you need to ensure that your SFML version is x86_64 instead of arm64; by default brew will install the arm64 variant. The following instructions will help you do this.
+#### Option 1: `pyenv`
+Create a virtual environment by running:
 
-1. Make sure you have rosetta2 installed. You can do this by running `softwareupdate --install-rosetta` from the command line.
-2. Build an x86_64 version of brew (which you alias to brow) using the instructions here: [stackoverflow](https://stackoverflow.com/questions/64951024/how-can-i-run-two-isolated-installations-of-homebrew).
-3. Now, run `brow install sfml`
-then everything will compile fine.
+```shell
+pyenv virtualenv 3.10.12 nocturne_lab
+```
 
-## Installing Nocturne
+The virtual environment should be activated every time you start a new shell session before running subsequent commands:
 
-Start by cloning the repo:
+```shell
+pyenv shell nocturne_lab
+```
 
-```bash
-git clone https://github.com/facebookresearch/nocturne.git
-cd nocturne
+Fortunately, `pyenv` provides a way to assign a virtual environment to a directory. To set it for this project, run:
+```shell
+pyenv local nocturne_lab
 ```
 
-Then run the following to install git submodules:
+#### Option 2: `conda`
+Create a conda environment by running:
 
-```bash
-git submodule sync
-git submodule update --init --recursive
+```shell
+conda env create -f ./environment.yml
 ```
 
-If you are using [Conda](https://docs.conda.io/en/latest/) (recommended), you can instantiate an environment and install Nocturne into it with the following:
-
-```bash
-# create the environment and install the dependencies
-conda env create -f environment.yml
+This creates a conda environment using Python 3.10 called `nocturne_lab`.
 
-# activate the environment where the Python library should be installed
-conda activate nocturne
+To activate the virtual environment, run:
 
-# run the C++ build and install Nocturne into the simulation environment
-python setup.py develop
+```shell
+conda activate nocturne_lab
 ```
 
-If you are not using Conda, simply run the last command to build and install Nocturne at your default Python path.
+#### Option 3: `venv`
+Create a virtual environment by running:
 
-You should then be all set to use the library. To find an example of constructing a Gym environment, using a basic Simulation, or rendering scenes, go to 
-```examples``` and run respectively, ```create_env.py```, ```nocturne_functions.py``` or ```rendering.py```.
+```shell
+python -m venv .venv
+```
 
-Python tests can be run with `pytest`.
+The virtual environment should be activated every time you start a new shell session before running the subsequent command:
 
-<details>
-<summary><b>Click here for a list of common installation errors</b></summary>
+```shell
+source .venv/bin/activate
+```
 
-### pybind11 installation errors
+### Dependencies
 
-If you are getting errors with pybind11, install it directly in your conda environment (eg. `conda install -c conda-forge pybind11` or `pip install pybind11`, cf. https://pybind11.readthedocs.io/en/latest/installing.html for more info).
-</details>
+`poetry` is used to manage the project and its dependencies. Start by installing `poetry` in your virtual environment:
 
-## Dataset
+```shell
+pip install poetry
+```
+
+Before installing the package, you first need to synchronise and update the git submodules by running:
 
-### Downloading the dataset
-Two versions of the dataset are available:
-- a mini-one that is about 1 GB and consists of 1000 training files and 100 validation / test files at: [Dropbox Link](https://www.dropbox.com/sh/8mxue9rdoizen3h/AADGRrHYBb86pZvDnHplDGvXa?dl=0).
-- the full dataset (150 GB) and consists of 134453 training files and 12205 validation / test files: [Dropbox Link](https://www.dropbox.com/sh/wv75pjd8phxizj3/AABfNPWfjQdoTWvdVxsAjUL_a?dl=0)
+```shell
+# Synchronise and update git submodules
+git submodule sync
+git submodule update --init --recursive
+```
 
-Place the dataset in a folder of your choosing, unzip the folders inside of it, and change the DATA_FOLDER in ```cfgs/config.py``` to point to where you have
-downloaded it.
+Now install the package by running:
 
-### (Optional) Rebuilding the Dataset
-**Warning** this step is not necessary, the dataset has already been downloaded in the prior step. This is only needed if you want to rebuild the dataset from scratch.
+```shell
+poetry install
+```
 
-First, go to [Waymo Open](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb) and follow the instructions to install the required packages. This may require additional steps if you are not on a Linux machine.
+> _Note:_ Under the hood the `nocturne` package uses the `nocturne_cpp` Python package that wraps the Nocturne C++ code base and provides bindings for Python to interact with the C++ code using `pybind11`.
 
-If you do want to rebuild the dataset, download the Waymo Motion version 1.1 files.
-- Open ```cfgs/config.py``` and change ```DATA_FOLDER``` to be the path to your Waymo motion files
-- Run ```python scripts/json_generation/run_waymo_constructor.py --parallel --no_tl --all_files --datatype train valid```. This will construct, in parallel, a dataset of all the train and validation files in the waymo motion data. It should take on the order of 5 minutes with 20 CPUs. If you want to include traffic lights scenes, remove the ```--no_tl``` flag.
-- To ensure that only files that have a guaranteed solution are included (for example, that there are no files where the agent goal is across an apparently uncrossable road edge), run ```python scripts/json_generation/make_solvable_files.py --datatype train valid```.
 
-## C++ build instructions
+### Development setup
+To configure the development setup, run:
+```shell
+# Install poetry dev dependencies
+poetry install --only=dev
 
-If you want to build the C++ library independently of the Python one, run the following:
+# Install pre-commit (for flake8, isort, black, etc.)
+pre-commit install
 
-```bash
-cd nocturne/cpp
-mkdir build
-cd build
-cmake ..
-make
-make install
+# Optional: Install poetry docs dependencies
+poetry install --only=docs
 ```
 
-Subsequently, the C++ tests can be ran with `./tests/nocturne_test` from within the `nocturne/cpp/build` directory.
-
-# Usage
-
-To get a sense of available functionality in Nocturne, we have provided a few examples in the `examples` folder of how to construct the env (`create_env.py`), how to construct particular observations (`nocturne_functions.py`), and how to render results (`rendering.py`).
-
-**Note**: by default, Nocturne will log to ```$NOCTURNE_LOG_DIR``` which is set in ```nocturne/__init__.py``` and defaults to ```<PROJECT_PATH>/logs```. If you'd like to log somewhere else, go to ```nocturne/__init__.py``` and change ```$NOCTURNE_LOG_DIR``` to a different path.
-
-The following goes over how to use training algorithms using the Nocturne environment.
-
-## Running the RL algorithms
-Nocturne comes shipped with a default Gym environment in ```nocturne/envs/base_env.py```. Atop this, we build integration for a few popular RL libraries.
-
-Nocturne by default comes with support for three versions of Proximal Policy Optimization:
-1. Sample Factory, a high throughput asynchronous PPO implementation (https://github.com/alex-petrenko/sample-factory)
-2. RLlib's PPO (https://github.com/ray-project/ray/tree/master/rllib)
-3. Multi-Agent PPO from (https://github.com/marlbenchmark/on-policy)
-Each algorithm is in its corresponding folder in examples and has a corresponding config file in cfgs/
-
-**Warning:** only the Sample Factory code has been extensively swept and tested. The default hyperparameters in there
-should work for training the agents from the corresponding paper. The other versions are provided for convenience
-but are not guaranteed to train a performant agent with the current hyperparameter settings.
-
-### Important hyperparameters to be aware of
-There are a few key hyperparameters that we expect users to care quite a bit about. Each of these can be toggled by adding
-```++<hyperparam_name>=<hyperparam_value>``` to the run command.
-- ```num_files```: this controls how many training scenarios are used. Set to -1 to use all of them.
-- ```max_num_vehicles```: this controls the maximum number of controllable agents in a scenario. If there are more than ```max_num_vehicles``` controllable agents in the scene, we sample ```max_num_vehicles``` randomly from them and set the remainder to be experts. If you want to ensure that all agents are controllable, simply pick a large number like 100.
-
-### Running Sample Factory
-Files from Sample Factory can be run from examples/sample_factory_files and should work by default by running
-```python examples/sample_factory_files/run_sample_factory.py algorithm=APPO```
-Additional config options for hyperparameters can be found in the config file.
-
-Once you have a trained checkpoint, you can visualize the results and make a movie of them by running ```python examples/sample_factory_files/visualize_sample_factory.py <PATH TO OUTPUT MODEL>```.
-
-*Warning*: because of how the algorithm is configured, Sample Factory works best with a fixed number of agents
-operating on a fixed horizon. To enable this, we use the config parameter ```max_num_vehicles``` which initializes the environment with only scenes that have fewer controllable agents than ```max_num_vehicles```. Additionally, if there are fewer than ```max_num_vehicles``` in the scene we add dummy agents that receive a vector of -1 at all timesteps. When a vehicle exits the scene we continue providing it a vector of -1 as an observation and a reward of 0.
-
-### Running RLlib
-Files from RLlib examples can be run from examples/rllib_files and should work by default by running
-```python examples/rllib_files/run_rllib.py```
-
-### Running on-policy PPO
-Files from [MAPPO](https://github.com/marlbenchmark/on-policy) examples can be run from examples/rllib_files and should work by default by running
-```python examples/on_policy_files/nocturne_runner.py algorithm=ppo```
-
-## Running the IL Algorithms
-Nocturne comes with a baseline implementation of behavioral cloning and a corresponding
-DataLoader. This can be run via ```python examples/imitation_learning/train.py```.
-
-# Contributors
-
-<table>
-<tbody>
-<tr>
-<td align="center">
-  <a href="https://github.com/eugenevinitsky">
-    <img src="https://avatars.githubusercontent.com/u/7660397?v=4" width="100px;" alt="Eugene Vinitsky" style="border-radius: 50%" />
-  </a>
-</td>
-<td align="center">
-  <a href="https://github.com/nathanlct">
-    <img src="https://avatars.githubusercontent.com/u/33672752?v=4"  width="100px;" alt="Nathan Lichtlé" style="border-radius: 50%" />
-  </a>
-</td>
-<td align="center">
-  <a href="https://github.com/xiaomengy">
-    <img src="https://avatars.githubusercontent.com/u/3357667?v=4" width="100px;" alt="Xiaomeng Yang" style="border-radius: 50%" />
-  </a>
-</td>
-</tr>
-<tr>
-<td align="center">
-  <a href="https://eugenevinitsky.github.io/">
-    Eugene Vinitsky
-  </a>
-</td>
-<td align="center">
-  <a href="https://nathanlct.com/">
-    Nathan Lichtlé
-  </a>
-</td>
-<td align="center">
-  <a href="https://github.com/xiaomengy">
-    Xiaomeng Yang
-  </a>
-</td>
-</tr>
-</tbody>
-</table>
-
-# License
-
-The majority of Nocturne is licensed under the MIT license, however portions of the project are available under separate license terms. The Waymo Motion Dataset License can be found at https://waymo.com/open/terms/.
+## Ongoing work
+
+Here is a list of features that we are developing:
+
+- @Daphne: Support for SB3's PPO algorithm with multi-agent control
+- @Alex: Logging and unit testing
+- @Tiyas: Random resets
diff --git a/algos/ppo/__init__.py b/algos/ppo/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/algos/ppo/base_runner.py b/algos/ppo/base_runner.py
deleted file mode 100644
index e4656b04..00000000
--- a/algos/ppo/base_runner.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import wandb
-import os
-import numpy as np
-import torch
-from tensorboardX import SummaryWriter
-from algos.ppo.utils.shared_buffer import SharedReplayBuffer
-
-
-def _t2n(x):
-    """Convert torch tensor to a numpy array."""
-    return x.detach().cpu().numpy()
-
-
-class Runner(object):
-    """
-    Base class for training recurrent policies.
-    :param config: (dict) Config dictionary containing parameters for training.
-    """
-
-    def __init__(self, config):
-
-        self.all_args = config['cfg.algo']
-        self.envs = config['envs']
-        self.eval_envs = config['eval_envs']
-        self.device = config['device']
-        self.num_agents = config['num_agents']
-        if config.__contains__("render_envs"):
-            self.render_envs = config['render_envs']
-
-        # parameters
-        # self.env_name = self.all_args.env_name
-        self.algorithm_name = self.all_args.algorithm_name
-        self.experiment_name = self.all_args.experiment
-        self.use_centralized_V = self.all_args.use_centralized_V
-        self.use_obs_instead_of_state = self.all_args.use_obs_instead_of_state
-        self.num_env_steps = self.all_args.num_env_steps
-        self.episode_length = self.all_args.episode_length
-        # self.episodes_per_thread = self.all_args.episodes_per_thread
-        self.n_rollout_threads = self.all_args.n_rollout_threads
-        self.n_eval_rollout_threads = self.all_args.n_eval_rollout_threads
-        self.n_render_rollout_threads = self.all_args.n_render_rollout_threads
-        self.use_linear_lr_decay = self.all_args.use_linear_lr_decay
-        self.hidden_size = self.all_args.hidden_size
-        self.use_wandb = self.all_args.wandb
-        self.use_render = self.all_args.use_render
-        self.recurrent_N = self.all_args.recurrent_N
-
-        # interval
-        self.save_interval = self.all_args.save_interval
-        self.use_eval = self.all_args.use_eval
-        self.eval_interval = self.all_args.eval_interval
-        self.log_interval = self.all_args.log_interval
-
-        # dir
-        self.model_dir = self.all_args.model_dir
-
-        if self.use_wandb:
-            self.save_dir = str(wandb.run.dir)
-            self.run_dir = str(wandb.run.dir)
-        else:
-            self.run_dir = config["logdir"]
-            self.log_dir = str(self.run_dir / 'logs')
-            if not os.path.exists(self.log_dir):
-                os.makedirs(self.log_dir)
-            self.writter = SummaryWriter(self.log_dir)
-            self.save_dir = str(self.run_dir / 'models')
-            if not os.path.exists(self.save_dir):
-                os.makedirs(self.save_dir)
-
-        from algos.ppo.r_mappo.r_mappo import R_MAPPO as TrainAlgo
-        from algos.ppo.r_mappo.algorithm.rMAPPOPolicy import R_MAPPOPolicy as Policy
-        share_observation_space = self.envs.share_observation_space[
-            0] if self.use_centralized_V else self.envs.observation_space[0]
-
-        # policy network
-        self.policy = Policy(self.all_args,
-                             self.envs.observation_space[0],
-                             share_observation_space,
-                             self.envs.action_space[0],
-                             device=self.device)
-
-        if self.model_dir is not None:
-            self.restore()
-
-        # algorithm
-        self.trainer = TrainAlgo(self.all_args,
-                                 self.policy,
-                                 device=self.device)
-
-        # buffer
-        self.buffer = SharedReplayBuffer(self.all_args, self.num_agents,
-                                         self.envs.observation_space[0],
-                                         share_observation_space,
-                                         self.envs.action_space[0])
-
-    def run(self):
-        """Collect training data, perform training updates, and evaluate policy."""
-        raise NotImplementedError
-
-    def warmup(self):
-        """Collect warmup pre-training data."""
-        raise NotImplementedError
-
-    def collect(self, step):
-        """Collect rollouts for training."""
-        raise NotImplementedError
-
-    def insert(self, data):
-        """
-        Insert data into buffer.
-        :param data: (Tuple) data to insert into training buffer.
-        """
-        raise NotImplementedError
-
-    @torch.no_grad()
-    def compute(self):
-        """Calculate returns for the collected data."""
-        self.trainer.prep_rollout()
-        next_values = self.trainer.policy.get_values(
-            np.concatenate(self.buffer.share_obs[-1]),
-            np.concatenate(self.buffer.rnn_states_critic[-1]),
-            np.concatenate(self.buffer.masks[-1]))
-        next_values = np.array(
-            np.split(_t2n(next_values), self.n_rollout_threads))
-        self.buffer.compute_returns(next_values, self.trainer.value_normalizer)
-
-    def train(self):
-        """Train policies with data in buffer. """
-        self.trainer.prep_training()
-        train_infos = self.trainer.train(self.buffer)
-        self.buffer.after_update()
-        return train_infos
-
-    def save(self):
-        """Save policy's actor and critic networks."""
-        policy_actor = self.trainer.policy.actor
-        torch.save(policy_actor.state_dict(), str(self.save_dir) + "/actor.pt")
-        policy_critic = self.trainer.policy.critic
-        torch.save(policy_critic.state_dict(),
-                   str(self.save_dir) + "/critic.pt")
-
-    def restore(self):
-        """Restore policy's networks from a saved model."""
-        policy_actor_state_dict = torch.load(str(self.model_dir) + '/actor.pt')
-        self.policy.actor.load_state_dict(policy_actor_state_dict)
-        if not self.all_args.use_render:
-            policy_critic_state_dict = torch.load(
-                str(self.model_dir) + '/critic.pt')
-            self.policy.critic.load_state_dict(policy_critic_state_dict)
-
-    def log_train(self, train_infos, total_num_steps):
-        """
-        Log training info.
-        :param train_infos: (dict) information about training update.
-        :param total_num_steps: (int) total number of training env steps.
-        """
-        for k, v in train_infos.items():
-            if self.use_wandb:
-                wandb.log({k: v}, step=total_num_steps)
-            else:
-                self.writter.add_scalars(k, {k: v}, total_num_steps)
-
-    def log_env(self, env_infos, total_num_steps):
-        """
-        Log env info.
-        :param env_infos: (dict) information about env state.
-        :param total_num_steps: (int) total number of training env steps.
-        """
-        for k, v in env_infos.items():
-            if len(v) > 0:
-                if self.use_wandb:
-                    wandb.log({k: np.mean(v)}, step=total_num_steps)
-                else:
-                    self.writter.add_scalars(k, {k: np.mean(v)},
-                                             total_num_steps)
diff --git a/algos/ppo/env_wrappers.py b/algos/ppo/env_wrappers.py
deleted file mode 100644
index eb0191d8..00000000
--- a/algos/ppo/env_wrappers.py
+++ /dev/null
@@ -1,867 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-"""
-Modified from OpenAI Baselines code to work with multi-agent envs
-"""
-import numpy as np
-import torch
-from multiprocessing import Process, Pipe
-from abc import ABC, abstractmethod
-from algos.ppo.utils.util import tile_images
-
-
-class CloudpickleWrapper(object):
-    """
-    Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
-    """
-
-    def __init__(self, x):
-        self.x = x
-
-    def __getstate__(self):
-        import cloudpickle
-        return cloudpickle.dumps(self.x)
-
-    def __setstate__(self, ob):
-        import pickle
-        self.x = pickle.loads(ob)
-
-
-class ShareVecEnv(ABC):
-    """
-    An abstract asynchronous, vectorized environment.
-    Used to batch data from multiple copies of an environment, so that
-    each observation becomes an batch of observations, and expected action is a batch of actions to
-    be applied per-environment.
-    """
-    closed = False
-    viewer = None
-
-    metadata = {'render.modes': ['human', 'rgb_array']}
-
-    def __init__(self, num_envs, observation_space, share_observation_space,
-                 action_space):
-        self.num_envs = num_envs
-        self.observation_space = observation_space
-        self.share_observation_space = share_observation_space
-        self.action_space = action_space
-
-    @abstractmethod
-    def reset(self):
-        """
-        Reset all the environments and return an array of
-        observations, or a dict of observation arrays.
-        If step_async is still doing work, that work will
-        be cancelled and step_wait() should not be called
-        until step_async() is invoked again.
-        """
-        pass
-
-    @abstractmethod
-    def step_async(self, actions):
-        """
-        Tell all the environments to start taking a step
-        with the given actions.
-        Call step_wait() to get the results of the step.
-        You should not call this if a step_async run is
-        already pending.
-        """
-        pass
-
-    @abstractmethod
-    def step_wait(self):
-        """
-        Wait for the step taken with step_async().
-        Returns (obs, rews, dones, infos):
-         - obs: an array of observations, or a dict of
-                arrays of observations.
-         - rews: an array of rewards
-         - dones: an array of "episode done" booleans
-         - infos: a sequence of info objects
-        """
-        pass
-
-    def close_extras(self):
-        """
-        Clean up the  extra resources, beyond what's in this base class.
-        Only runs when not self.closed.
-        """
-        pass
-
-    def close(self):
-        if self.closed:
-            return
-        if self.viewer is not None:
-            self.viewer.close()
-        self.close_extras()
-        self.closed = True
-
-    def step(self, actions):
-        """
-        Step the environments synchronously.
-        This is available for backwards compatibility.
-        """
-        self.step_async(actions)
-        return self.step_wait()
-
-    def render(self, mode='human'):
-        imgs = self.get_images()
-        bigimg = tile_images(imgs)
-        if mode == 'human':
-            self.get_viewer().imshow(bigimg)
-            return self.get_viewer().isopen
-        elif mode == 'rgb_array':
-            return bigimg
-        else:
-            raise NotImplementedError
-
-    def get_images(self):
-        """
-        Return RGB images from each environment
-        """
-        raise NotImplementedError
-
-    @property
-    def unwrapped(self):
-        if isinstance(self, VecEnvWrapper):
-            return self.venv.unwrapped
-        else:
-            return self
-
-    def get_viewer(self):
-        if self.viewer is None:
-            from gym.envs.classic_control import rendering
-            self.viewer = rendering.SimpleImageViewer()
-        return self.viewer
-
-
-def worker(remote, parent_remote, env_fn_wrapper):
-    parent_remote.close()
-    env = env_fn_wrapper.x()
-    while True:
-        cmd, data = remote.recv()
-        if cmd == 'step':
-            ob, reward, done, info = env.step(data)
-            if 'bool' in done.__class__.__name__:
-                if done:
-                    ob = env.reset()
-            else:
-                if np.all(done):
-                    ob = env.reset()
-
-            remote.send((ob, reward, done, info))
-        elif cmd == 'reset':
-            ob = env.reset()
-            remote.send((ob))
-        elif cmd == 'render':
-            if data == "rgb_array":
-                fr = env.render(mode=data)
-                remote.send(fr)
-            elif data == "human":
-                env.render(mode=data)
-        elif cmd == 'reset_task':
-            ob = env.reset_task()
-            remote.send(ob)
-        elif cmd == 'close':
-            env.close()
-            remote.close()
-            break
-        elif cmd == 'get_spaces':
-            remote.send((env.observation_space, env.share_observation_space,
-                         env.action_space))
-        else:
-            raise NotImplementedError
-
-
-class GuardSubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=worker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = False  # could cause zombie process
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, rews, dones, infos = zip(*results)
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def reset(self):
-        for remote in self.remotes:
-            remote.send(('reset', None))
-        obs = [remote.recv() for remote in self.remotes]
-        return np.stack(obs)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-
-class SubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=worker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = True  # if the main process crashes, we should not cause things to hang
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, rews, dones, infos = zip(*results)
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def reset(self):
-        for remote in self.remotes:
-            remote.send(('reset', None))
-        obs = [remote.recv() for remote in self.remotes]
-        return np.stack(obs)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-    def render(self, mode="rgb_array"):
-        for remote in self.remotes:
-            remote.send(('render', mode))
-        if mode == "rgb_array":
-            frame = [remote.recv() for remote in self.remotes]
-            return np.stack(frame)
-
-
-def shareworker(remote, parent_remote, env_fn_wrapper):
-    parent_remote.close()
-    env = env_fn_wrapper.x()
-    while True:
-        cmd, data = remote.recv()
-        if cmd == 'step':
-            ob, s_ob, reward, done, info, available_actions = env.step(data)
-            if 'bool' in done.__class__.__name__:
-                if done:
-                    ob, s_ob, available_actions = env.reset()
-            else:
-                if np.all(done):
-                    ob, s_ob, available_actions = env.reset()
-
-            remote.send((ob, s_ob, reward, done, info, available_actions))
-        elif cmd == 'reset':
-            ob, s_ob, available_actions = env.reset()
-            remote.send((ob, s_ob, available_actions))
-        elif cmd == 'reset_task':
-            ob = env.reset_task()
-            remote.send(ob)
-        elif cmd == 'render':
-            if data == "rgb_array":
-                fr = env.render(mode=data)
-                remote.send(fr)
-            elif data == "human":
-                env.render(mode=data)
-        elif cmd == 'close':
-            env.close()
-            remote.close()
-            break
-        elif cmd == 'get_spaces':
-            remote.send((env.observation_space, env.share_observation_space,
-                         env.action_space))
-        elif cmd == 'render_vulnerability':
-            fr = env.render_vulnerability(data)
-            remote.send((fr))
-        else:
-            raise NotImplementedError
-
-
-class ShareSubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=shareworker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = True  # if the main process crashes, we should not cause things to hang
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, share_obs, rews, dones, infos, available_actions = zip(*results)
-        return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack(
-            dones), infos, np.stack(available_actions)
-
-    def reset(self):
-        for remote in self.remotes:
-            remote.send(('reset', None))
-        results = [remote.recv() for remote in self.remotes]
-        obs, share_obs, available_actions = zip(*results)
-        return np.stack(obs), np.stack(share_obs), np.stack(available_actions)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-
-def choosesimpleworker(remote, parent_remote, env_fn_wrapper):
-    parent_remote.close()
-    env = env_fn_wrapper.x()
-    while True:
-        cmd, data = remote.recv()
-        if cmd == 'step':
-            ob, reward, done, info = env.step(data)
-            remote.send((ob, reward, done, info))
-        elif cmd == 'reset':
-            ob = env.reset(data)
-            remote.send((ob))
-        elif cmd == 'reset_task':
-            ob = env.reset_task()
-            remote.send(ob)
-        elif cmd == 'close':
-            env.close()
-            remote.close()
-            break
-        elif cmd == 'render':
-            if data == "rgb_array":
-                fr = env.render(mode=data)
-                remote.send(fr)
-            elif data == "human":
-                env.render(mode=data)
-        elif cmd == 'get_spaces':
-            remote.send((env.observation_space, env.share_observation_space,
-                         env.action_space))
-        else:
-            raise NotImplementedError
-
-
-class ChooseSimpleSubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=choosesimpleworker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = True  # if the main process crashes, we should not cause things to hang
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, rews, dones, infos = zip(*results)
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def reset(self, reset_choose):
-        for remote, choose in zip(self.remotes, reset_choose):
-            remote.send(('reset', choose))
-        obs = [remote.recv() for remote in self.remotes]
-        return np.stack(obs)
-
-    def render(self, mode="rgb_array"):
-        for remote in self.remotes:
-            remote.send(('render', mode))
-        if mode == "rgb_array":
-            frame = [remote.recv() for remote in self.remotes]
-            return np.stack(frame)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-
-def chooseworker(remote, parent_remote, env_fn_wrapper):
-    parent_remote.close()
-    env = env_fn_wrapper.x()
-    while True:
-        cmd, data = remote.recv()
-        if cmd == 'step':
-            ob, s_ob, reward, done, info, available_actions = env.step(data)
-            remote.send((ob, s_ob, reward, done, info, available_actions))
-        elif cmd == 'reset':
-            ob, s_ob, available_actions = env.reset(data)
-            remote.send((ob, s_ob, available_actions))
-        elif cmd == 'reset_task':
-            ob = env.reset_task()
-            remote.send(ob)
-        elif cmd == 'close':
-            env.close()
-            remote.close()
-            break
-        elif cmd == 'render':
-            remote.send(env.render(mode='rgb_array'))
-        elif cmd == 'get_spaces':
-            remote.send((env.observation_space, env.share_observation_space,
-                         env.action_space))
-        else:
-            raise NotImplementedError
-
-
-class ChooseSubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=chooseworker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = True  # if the main process crashes, we should not cause things to hang
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, share_obs, rews, dones, infos, available_actions = zip(*results)
-        return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack(
-            dones), infos, np.stack(available_actions)
-
-    def reset(self, reset_choose):
-        for remote, choose in zip(self.remotes, reset_choose):
-            remote.send(('reset', choose))
-        results = [remote.recv() for remote in self.remotes]
-        obs, share_obs, available_actions = zip(*results)
-        return np.stack(obs), np.stack(share_obs), np.stack(available_actions)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-
-def chooseguardworker(remote, parent_remote, env_fn_wrapper):
-    parent_remote.close()
-    env = env_fn_wrapper.x()
-    while True:
-        cmd, data = remote.recv()
-        if cmd == 'step':
-            ob, reward, done, info = env.step(data)
-            remote.send((ob, reward, done, info))
-        elif cmd == 'reset':
-            ob = env.reset(data)
-            remote.send((ob))
-        elif cmd == 'reset_task':
-            ob = env.reset_task()
-            remote.send(ob)
-        elif cmd == 'close':
-            env.close()
-            remote.close()
-            break
-        elif cmd == 'get_spaces':
-            remote.send((env.observation_space, env.share_observation_space,
-                         env.action_space))
-        else:
-            raise NotImplementedError
-
-
-class ChooseGuardSubprocVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns, spaces=None):
-        """
-        envs: list of gym environments to run in subprocesses
-        """
-        self.waiting = False
-        self.closed = False
-        nenvs = len(env_fns)
-        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
-        self.ps = [
-            Process(target=chooseguardworker,
-                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
-            for (work_remote, remote,
-                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
-        ]
-        for p in self.ps:
-            p.daemon = False  # if the main process crashes, we should not cause things to hang
-            p.start()
-        for remote in self.work_remotes:
-            remote.close()
-        self.remotes[0].send(('get_spaces', None))
-        observation_space, share_observation_space, action_space = self.remotes[
-            0].recv()
-        ShareVecEnv.__init__(self, len(env_fns), observation_space,
-                             share_observation_space, action_space)
-
-    def step_async(self, actions):
-        for remote, action in zip(self.remotes, actions):
-            remote.send(('step', action))
-        self.waiting = True
-
-    def step_wait(self):
-        results = [remote.recv() for remote in self.remotes]
-        self.waiting = False
-        obs, rews, dones, infos = zip(*results)
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def reset(self, reset_choose):
-        for remote, choose in zip(self.remotes, reset_choose):
-            remote.send(('reset', choose))
-        obs = [remote.recv() for remote in self.remotes]
-        return np.stack(obs)
-
-    def reset_task(self):
-        for remote in self.remotes:
-            remote.send(('reset_task', None))
-        return np.stack([remote.recv() for remote in self.remotes])
-
-    def close(self):
-        if self.closed:
-            return
-        if self.waiting:
-            for remote in self.remotes:
-                remote.recv()
-        for remote in self.remotes:
-            remote.send(('close', None))
-        for p in self.ps:
-            p.join()
-        self.closed = True
-
-
-# single env
-class DummyVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns):
-        self.envs = [fn() for fn in env_fns]
-        env = self.envs[0]
-        ShareVecEnv.__init__(self, len(env_fns), env.observation_space,
-                             env.share_observation_space, env.action_space)
-        self.actions = None
-
-    def step_async(self, actions):
-        self.actions = actions
-
-    def step_wait(self):
-        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
-        # TODO(eugenevinitsky) remove this
-        obs, rews, dones, infos = map(np.array, zip(*results))
-
-        for (i, done) in enumerate(dones):
-            if 'bool' in done.__class__.__name__:
-                if done:
-                    obs[i] = self.envs[i].reset()
-            else:
-                if np.all(done):
-                    obs[i] = self.envs[i].reset()
-
-        self.actions = None
-        return obs, rews, dones, infos
-
-    def reset(self):
-        obs = [env.reset() for env in self.envs]
-        return np.array(obs)
-
-    def close(self):
-        for env in self.envs:
-            env.close()
-
-    def render(self, mode="human"):
-        if mode == "rgb_array":
-            return np.array([env.render(mode=mode) for env in self.envs])
-        elif mode == "human":
-            for env in self.envs:
-                env.render(mode=mode)
-        else:
-            raise NotImplementedError
-
-
-class ShareDummyVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns):
-        self.envs = [fn() for fn in env_fns]
-        env = self.envs[0]
-        ShareVecEnv.__init__(self, len(env_fns), env.observation_space,
-                             env.share_observation_space, env.action_space)
-        self.actions = None
-
-    def step_async(self, actions):
-        self.actions = actions
-
-    def step_wait(self):
-        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
-        obs, share_obs, rews, dones, infos, available_actions = map(
-            np.array, zip(*results))
-
-        for (i, done) in enumerate(dones):
-            if 'bool' in done.__class__.__name__:
-                if done:
-                    obs[i], share_obs[i], available_actions[i] = self.envs[
-                        i].reset()
-            else:
-                if np.all(done):
-                    obs[i], share_obs[i], available_actions[i] = self.envs[
-                        i].reset()
-        self.actions = None
-
-        return obs, share_obs, rews, dones, infos, available_actions
-
-    def reset(self):
-        results = [env.reset() for env in self.envs]
-        obs, share_obs, available_actions = map(np.array, zip(*results))
-        return obs, share_obs, available_actions
-
-    def close(self):
-        for env in self.envs:
-            env.close()
-
-    def render(self, mode="human"):
-        if mode == "rgb_array":
-            return np.array([env.render(mode=mode) for env in self.envs])
-        elif mode == "human":
-            for env in self.envs:
-                env.render(mode=mode)
-        else:
-            raise NotImplementedError
-
-
-class ChooseDummyVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns):
-        self.envs = [fn() for fn in env_fns]
-        env = self.envs[0]
-        ShareVecEnv.__init__(self, len(env_fns), env.observation_space,
-                             env.share_observation_space, env.action_space)
-        self.actions = None
-
-    def step_async(self, actions):
-        self.actions = actions
-
-    def step_wait(self):
-        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
-        obs, share_obs, rews, dones, infos, available_actions = map(
-            np.array, zip(*results))
-        self.actions = None
-        return obs, share_obs, rews, dones, infos, available_actions
-
-    def reset(self, reset_choose):
-        results = [
-            env.reset(choose) for (env, choose) in zip(self.envs, reset_choose)
-        ]
-        obs, share_obs, available_actions = map(np.array, zip(*results))
-        return obs, share_obs, available_actions
-
-    def close(self):
-        for env in self.envs:
-            env.close()
-
-    def render(self, mode="human"):
-        if mode == "rgb_array":
-            return np.array([env.render(mode=mode) for env in self.envs])
-        elif mode == "human":
-            for env in self.envs:
-                env.render(mode=mode)
-        else:
-            raise NotImplementedError
-
-
-class ChooseSimpleDummyVecEnv(ShareVecEnv):
-
-    def __init__(self, env_fns):
-        self.envs = [fn() for fn in env_fns]
-        env = self.envs[0]
-        ShareVecEnv.__init__(self, len(env_fns), env.observation_space,
-                             env.share_observation_space, env.action_space)
-        self.actions = None
-
-    def step_async(self, actions):
-        self.actions = actions
-
-    def step_wait(self):
-        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
-        obs, rews, dones, infos = map(np.array, zip(*results))
-        self.actions = None
-        return obs, rews, dones, infos
-
-    def reset(self, reset_choose):
-        obs = [
-            env.reset(choose) for (env, choose) in zip(self.envs, reset_choose)
-        ]
-        return np.array(obs)
-
-    def close(self):
-        for env in self.envs:
-            env.close()
-
-    def render(self, mode="human"):
-        if mode == "rgb_array":
-            return np.array([env.render(mode=mode) for env in self.envs])
-        elif mode == "human":
-            for env in self.envs:
-                env.render(mode=mode)
-        else:
-            raise NotImplementedError
diff --git a/algos/ppo/ppo_utils/act.py b/algos/ppo/ppo_utils/act.py
deleted file mode 100644
index 387c9b3e..00000000
--- a/algos/ppo/ppo_utils/act.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-from .distributions import Bernoulli, Categorical, DiagGaussian
-import torch
-import torch.nn as nn
-
-
-class ACTLayer(nn.Module):
-    """
-    MLP Module to compute actions.
-    :param action_space: (gym.Space) action space.
-    :param inputs_dim: (int) dimension of network input.
-    :param use_orthogonal: (bool) whether to use orthogonal initialization.
-    :param gain: (float) gain of the output layer of the network.
-    """
-
-    def __init__(self, action_space, inputs_dim, use_orthogonal, gain, device):
-        super(ACTLayer, self).__init__()
-        self.mixed_action = False
-        self.multi_discrete = False
-
-        if action_space.__class__.__name__ == "Discrete":
-            action_dim = action_space.n
-            self.action_out = Categorical(inputs_dim, action_dim,
-                                          use_orthogonal, gain)
-        elif action_space.__class__.__name__ == "Box":
-            action_dim = action_space.shape[0]
-            self.action_out = DiagGaussian(inputs_dim, action_dim,
-                                           use_orthogonal, gain, device)
-        elif action_space.__class__.__name__ == "MultiBinary":
-            action_dim = action_space.shape[0]
-            self.action_out = Bernoulli(inputs_dim, action_dim, use_orthogonal,
-                                        gain)
-        elif action_space.__class__.__name__ == "MultiDiscrete":
-            self.multi_discrete = True
-            action_dims = action_space.high - action_space.low + 1
-            self.action_outs = []
-            for action_dim in action_dims:
-                self.action_outs.append(
-                    Categorical(inputs_dim, action_dim, use_orthogonal, gain))
-            self.action_outs = nn.ModuleList(self.action_outs)
-        else:  # discrete + continous
-            self.mixed_action = True
-            continous_dim = action_space[0].shape[0]
-            discrete_dim = action_space[1].n
-            self.action_outs = nn.ModuleList([
-                DiagGaussian(inputs_dim, continous_dim, use_orthogonal, gain),
-                Categorical(inputs_dim, discrete_dim, use_orthogonal, gain)
-            ])
-
-        self.to(device)
-
-    def forward(self, x, available_actions=None, deterministic=False):
-        """
-        Compute actions and action logprobs from given input.
-        :param x: (torch.Tensor) input to network.
-        :param available_actions: (torch.Tensor) denotes which actions are available to agent
-                                  (if None, all actions available)
-        :param deterministic: (bool) whether to sample from action distribution or return the mode.
-
-        :return actions: (torch.Tensor) actions to take.
-        :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
-        """
-        if self.mixed_action:
-            actions = []
-            action_log_probs = []
-            for action_out in self.action_outs:
-                action_logit = action_out(x)
-                action = action_logit.mode(
-                ) if deterministic else action_logit.sample()
-                action_log_prob = action_logit.log_probs(action)
-                actions.append(action.float())
-                action_log_probs.append(action_log_prob)
-
-            actions = torch.cat(actions, -1)
-            action_log_probs = torch.sum(torch.cat(action_log_probs, -1),
-                                         -1,
-                                         keepdim=True)
-
-        elif self.multi_discrete:
-            actions = []
-            action_log_probs = []
-            for action_out in self.action_outs:
-                action_logit = action_out(x)
-                action = action_logit.mode(
-                ) if deterministic else action_logit.sample()
-                action_log_prob = action_logit.log_probs(action)
-                actions.append(action)
-                action_log_probs.append(action_log_prob)
-
-            actions = torch.cat(actions, -1)
-            action_log_probs = torch.cat(action_log_probs, -1)
-
-        else:
-            action_logits = self.action_out(x)
-            actions = action_logits.mode(
-            ) if deterministic else action_logits.sample()
-            action_log_probs = action_logits.log_probs(actions)
-
-        return actions, action_log_probs
-
-    def get_probs(self, x, available_actions=None):
-        """
-        Compute action probabilities from inputs.
-        :param x: (torch.Tensor) input to network.
-        :param available_actions: (torch.Tensor) denotes which actions are available to agent
-                                  (if None, all actions available)
-
-        :return action_probs: (torch.Tensor)
-        """
-        if self.mixed_action or self.multi_discrete:
-            action_probs = []
-            for action_out in self.action_outs:
-                action_logit = action_out(x)
-                action_prob = action_logit.probs
-                action_probs.append(action_prob)
-            action_probs = torch.cat(action_probs, -1)
-        else:
-            action_logits = self.action_out(x, available_actions)
-            action_probs = action_logits.probs
-
-        return action_probs
-
-    def evaluate_actions(self,
-                         x,
-                         action,
-                         available_actions=None,
-                         active_masks=None):
-        """
-        Compute log probability and entropy of given actions.
-        :param x: (torch.Tensor) input to network.
-        :param action: (torch.Tensor) actions whose entropy and log probability to evaluate.
-        :param available_actions: (torch.Tensor) denotes which actions are available to agent
-                                                              (if None, all actions available)
-        :param active_masks: (torch.Tensor) denotes whether an agent is active or dead.
-
-        :return action_log_probs: (torch.Tensor) log probabilities of the input actions.
-        :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs.
-        """
-        if self.mixed_action:
-            a, b = action.split((2, 1), -1)
-            b = b.long()
-            action = [a, b]
-            action_log_probs = []
-            dist_entropy = []
-            for action_out, act in zip(self.action_outs, action):
-                action_logit = action_out(x)
-                action_log_probs.append(action_logit.log_probs(act))
-                if active_masks is not None:
-                    if len(action_logit.entropy().shape) == len(
-                            active_masks.shape):
-                        dist_entropy.append(
-                            (action_logit.entropy() * active_masks).sum() /
-                            active_masks.sum())
-                    else:
-                        dist_entropy.append((action_logit.entropy() *
-                                             active_masks.squeeze(-1)).sum() /
-                                            active_masks.sum())
-                else:
-                    dist_entropy.append(action_logit.entropy().mean())
-
-            action_log_probs = torch.sum(torch.cat(action_log_probs, -1),
-                                         -1,
-                                         keepdim=True)
-            dist_entropy = dist_entropy[0] / 2.0 + dist_entropy[
-                1] / 0.98  #! dosen't make sense
-
-        elif self.multi_discrete:
-            action = torch.transpose(action, 0, 1)
-            action_log_probs = []
-            dist_entropy = []
-            for action_out, act in zip(self.action_outs, action):
-                action_logit = action_out(x)
-                action_log_probs.append(action_logit.log_probs(act))
-                if active_masks is not None:
-                    dist_entropy.append(
-                        (action_logit.entropy() *
-                         active_masks.squeeze(-1)).sum() / active_masks.sum())
-                else:
-                    dist_entropy.append(action_logit.entropy().mean())
-
-            action_log_probs = torch.cat(action_log_probs,
-                                         -1)  # ! could be wrong
-            dist_entropy = torch.tensor(dist_entropy).mean()
-
-        else:
-            action_logits = self.action_out(x, available_actions)
-            action_log_probs = action_logits.log_probs(action)
-            if active_masks is not None:
-                dist_entropy = (
-                    action_logits.entropy() *
-                    active_masks.squeeze(-1)).sum() / active_masks.sum()
-            else:
-                dist_entropy = action_logits.entropy().mean()
-
-        return action_log_probs, dist_entropy
diff --git a/algos/ppo/ppo_utils/cnn.py b/algos/ppo/ppo_utils/cnn.py
deleted file mode 100644
index 95fb8218..00000000
--- a/algos/ppo/ppo_utils/cnn.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-from torchvision import transforms
-import torch.nn as nn
-from .util import init
-"""CNN Modules and utils."""
-
-
-class Flatten(nn.Module):
-
-    def forward(self, x):
-        return x.view(x.size(0), -1)
-
-
-class CNNLayer(nn.Module):
-
-    def __init__(self,
-                 obs_shape,
-                 hidden_size,
-                 use_orthogonal,
-                 use_ReLU,
-                 kernel_size=3,
-                 stride=1):
-        super(CNNLayer, self).__init__()
-
-        active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][use_orthogonal]
-        gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
-
-        self.resize = transforms.Resize(84)
-
-        def init_(m):
-            return init(m,
-                        init_method,
-                        lambda x: nn.init.constant_(x, 0),
-                        gain=gain)
-
-        input_channel = obs_shape[0]
-        input_width = obs_shape[1]
-        input_height = obs_shape[2]
-
-        self.cnn = nn.Sequential(
-            init_(
-                nn.Conv2d(in_channels=input_channel,
-                          out_channels=hidden_size // 2,
-                          kernel_size=kernel_size,
-                          stride=stride)), active_func, Flatten(),
-            init_(
-                nn.Linear(
-                    hidden_size // 2 * (input_width - kernel_size + stride) *
-                    (input_height - kernel_size + stride),
-                    hidden_size)), active_func,
-            init_(nn.Linear(hidden_size, hidden_size)), active_func)
-
-    def forward(self, x):
-        # TODO(eugenevinitsky) hardcoding is bad
-        x = self.resize(x) / 255.0
-        x = self.cnn(x)
-        return x
-
-
-class CNNBase(nn.Module):
-
-    def __init__(self, args, obs_shape):
-        super(CNNBase, self).__init__()
-
-        self._use_orthogonal = args.use_orthogonal
-        self._use_ReLU = args.use_ReLU
-        self.hidden_size = args.hidden_size
-
-        self.cnn = CNNLayer(obs_shape, self.hidden_size, self._use_orthogonal,
-                            self._use_ReLU)
-
-    def forward(self, x):
-        x = self.cnn(x)
-        return x
diff --git a/algos/ppo/ppo_utils/distributions.py b/algos/ppo/ppo_utils/distributions.py
deleted file mode 100644
index 9249d700..00000000
--- a/algos/ppo/ppo_utils/distributions.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-import torch.nn as nn
-from .util import init
-"""
-Modify standard PyTorch distributions so they to make compatible with this codebase. 
-"""
-
-#
-# Standardize distribution interfaces
-#
-
-
-# Categorical
-class FixedCategorical(torch.distributions.Categorical):
-
-    def sample(self):
-        return super().sample().unsqueeze(-1)
-
-    def log_probs(self, actions):
-        return (super().log_prob(actions.squeeze(-1)).view(
-            actions.size(0), -1).sum(-1).unsqueeze(-1))
-
-    def mode(self):
-        return self.probs.argmax(dim=-1, keepdim=True)
-
-
-# Normal
-class FixedNormal(torch.distributions.Normal):
-
-    def log_probs(self, actions):
-        return super().log_prob(actions).sum(-1, keepdim=True)
-
-    def entrop(self):
-        return super.entropy().sum(-1)
-
-    def mode(self):
-        return self.mean
-
-
-# Bernoulli
-class FixedBernoulli(torch.distributions.Bernoulli):
-
-    def log_probs(self, actions):
-        return super.log_prob(actions).view(actions.size(0),
-                                            -1).sum(-1).unsqueeze(-1)
-
-    def entropy(self):
-        return super().entropy().sum(-1)
-
-    def mode(self):
-        return torch.gt(self.probs, 0.5).float()
-
-
-class Categorical(nn.Module):
-
-    def __init__(self,
-                 num_inputs,
-                 num_outputs,
-                 use_orthogonal=True,
-                 gain=0.01):
-        super(Categorical, self).__init__()
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][use_orthogonal]
-
-        def init_(m):
-            return init(m, init_method, lambda x: nn.init.constant_(x, 0),
-                        gain)
-
-        self.linear = init_(nn.Linear(num_inputs, num_outputs))
-
-    def forward(self, x, available_actions=None):
-        x = self.linear(x)
-        if available_actions is not None:
-            x[available_actions == 0] = -1e10
-        return FixedCategorical(logits=x)
-
-
-class DiagGaussian(nn.Module):
-
-    def __init__(self,
-                 num_inputs,
-                 num_outputs,
-                 use_orthogonal=True,
-                 gain=0.01,
-                 device='cpu'):
-        super(DiagGaussian, self).__init__()
-
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][use_orthogonal]
-
-        def init_(m):
-            return init(m, init_method, lambda x: nn.init.constant_(x, 0),
-                        gain)
-
-        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
-        self.logstd = AddBias(torch.zeros(num_outputs))
-        self.to(device)
-        self.device = device
-
-    def forward(self, x):
-        action_mean = self.fc_mean(x)
-
-        #  An ugly hack for my KFAC implementation.
-        zeros = torch.zeros(action_mean.size()).to(self.device)
-        # if x.is_cuda:
-        #     zeros = zeros.cuda()
-
-        action_logstd = self.logstd(zeros)
-        return FixedNormal(action_mean, action_logstd.exp())
-
-
-class Bernoulli(nn.Module):
-
-    def __init__(self,
-                 num_inputs,
-                 num_outputs,
-                 use_orthogonal=True,
-                 gain=0.01):
-        super(Bernoulli, self).__init__()
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][use_orthogonal]
-
-        def init_(m):
-            return init(m, init_method, lambda x: nn.init.constant_(x, 0),
-                        gain)
-
-        self.linear = init_(nn.Linear(num_inputs, num_outputs))
-
-    def forward(self, x):
-        x = self.linear(x)
-        return FixedBernoulli(logits=x)
-
-
-class AddBias(nn.Module):
-
-    def __init__(self, bias):
-        super(AddBias, self).__init__()
-        self._bias = nn.Parameter(bias.unsqueeze(1))
-
-    def forward(self, x):
-        if x.dim() == 2:
-            bias = self._bias.t().view(1, -1)
-        else:
-            bias = self._bias.t().view(1, -1, 1, 1)
-
-        return x + bias
diff --git a/algos/ppo/ppo_utils/encoder.py b/algos/ppo/ppo_utils/encoder.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/algos/ppo/ppo_utils/mlp.py b/algos/ppo/ppo_utils/mlp.py
deleted file mode 100644
index b066a3d2..00000000
--- a/algos/ppo/ppo_utils/mlp.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch.nn as nn
-from .util import init, get_clones
-"""MLP modules."""
-
-
-class MLPLayer(nn.Module):
-
-    def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal,
-                 use_ReLU):
-        super(MLPLayer, self).__init__()
-        self._layer_N = layer_N
-
-        active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][use_orthogonal]
-        gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
-
-        def init_(m):
-            return init(m,
-                        init_method,
-                        lambda x: nn.init.constant_(x, 0),
-                        gain=gain)
-
-        self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)),
-                                 active_func, nn.LayerNorm(hidden_size))
-        self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)),
-                                  active_func, nn.LayerNorm(hidden_size))
-        self.fc2 = get_clones(self.fc_h, self._layer_N)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        for i in range(self._layer_N):
-            x = self.fc2[i](x)
-        return x
-
-
-class MLPBase(nn.Module):
-
-    def __init__(self, args, obs_shape, cat_self=True, attn_internal=False):
-        super(MLPBase, self).__init__()
-
-        self._use_feature_normalization = args.use_feature_normalization
-        self._use_orthogonal = args.use_orthogonal
-        self._use_ReLU = args.use_ReLU
-        self._stacked_frames = args.stacked_frames
-        self._layer_N = args.layer_N
-        self.hidden_size = args.hidden_size
-
-        obs_dim = obs_shape[0]
-
-        if self._use_feature_normalization:
-            self.feature_norm = nn.LayerNorm(obs_dim)
-
-        self.mlp = MLPLayer(obs_dim, self.hidden_size, self._layer_N,
-                            self._use_orthogonal, self._use_ReLU)
-
-    def forward(self, x):
-        if self._use_feature_normalization:
-            x = self.feature_norm(x)
-
-        x = self.mlp(x)
-
-        return x
diff --git a/algos/ppo/ppo_utils/popart.py b/algos/ppo/ppo_utils/popart.py
deleted file mode 100644
index 7dd4be1b..00000000
--- a/algos/ppo/ppo_utils/popart.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class PopArt(torch.nn.Module):
-
-    def __init__(self,
-                 input_shape,
-                 output_shape,
-                 norm_axes=1,
-                 beta=0.99999,
-                 epsilon=1e-5,
-                 device=torch.device("cpu")):
-
-        super(PopArt, self).__init__()
-
-        self.beta = beta
-        self.epsilon = epsilon
-        self.norm_axes = norm_axes
-        self.tpdv = dict(dtype=torch.float32, device=device)
-
-        self.input_shape = input_shape
-        self.output_shape = output_shape
-
-        self.weight = nn.Parameter(torch.Tensor(output_shape,
-                                                input_shape)).to(**self.tpdv)
-        self.bias = nn.Parameter(torch.Tensor(output_shape)).to(**self.tpdv)
-
-        self.stddev = nn.Parameter(torch.ones(output_shape),
-                                   requires_grad=False).to(**self.tpdv)
-        self.mean = nn.Parameter(torch.zeros(output_shape),
-                                 requires_grad=False).to(**self.tpdv)
-        self.mean_sq = nn.Parameter(torch.zeros(output_shape),
-                                    requires_grad=False).to(**self.tpdv)
-        self.debiasing_term = nn.Parameter(torch.tensor(0.0),
-                                           requires_grad=False).to(**self.tpdv)
-
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
-        if self.bias is not None:
-            fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(
-                self.weight)
-            bound = 1 / math.sqrt(fan_in)
-            torch.nn.init.uniform_(self.bias, -bound, bound)
-        self.mean.zero_()
-        self.mean_sq.zero_()
-        self.debiasing_term.zero_()
-
-    def forward(self, input_vector):
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        return F.linear(input_vector, self.weight, self.bias)
-
-    @torch.no_grad()
-    def update(self, input_vector):
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        old_mean, old_var = self.debiased_mean_var()
-        old_stddev = torch.sqrt(old_var)
-
-        batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes)))
-        batch_sq_mean = (input_vector**2).mean(
-            dim=tuple(range(self.norm_axes)))
-
-        self.mean.mul_(self.beta).add_(batch_mean * (1.0 - self.beta))
-        self.mean_sq.mul_(self.beta).add_(batch_sq_mean * (1.0 - self.beta))
-        self.debiasing_term.mul_(self.beta).add_(1.0 * (1.0 - self.beta))
-
-        self.stddev = (self.mean_sq - self.mean**2).sqrt().clamp(min=1e-4)
-
-        new_mean, new_var = self.debiased_mean_var()
-        new_stddev = torch.sqrt(new_var)
-
-        self.weight = self.weight * old_stddev / new_stddev
-        self.bias = (old_stddev * self.bias + old_mean - new_mean) / new_stddev
-
-    def debiased_mean_var(self):
-        debiased_mean = self.mean / self.debiasing_term.clamp(min=self.epsilon)
-        debiased_mean_sq = self.mean_sq / self.debiasing_term.clamp(
-            min=self.epsilon)
-        debiased_var = (debiased_mean_sq - debiased_mean**2).clamp(min=1e-2)
-        return debiased_mean, debiased_var
-
-    def normalize(self, input_vector):
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        mean, var = self.debiased_mean_var()
-        out = (input_vector - mean[(None, ) * self.norm_axes]
-               ) / torch.sqrt(var)[(None, ) * self.norm_axes]
-
-        return out
-
-    def denormalize(self, input_vector):
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        mean, var = self.debiased_mean_var()
-        out = input_vector * torch.sqrt(var)[(None, ) * self.norm_axes] + mean[
-            (None, ) * self.norm_axes]
-
-        out = out.cpu().numpy()
-
-        return out
diff --git a/algos/ppo/ppo_utils/rnn.py b/algos/ppo/ppo_utils/rnn.py
deleted file mode 100644
index 2720be9c..00000000
--- a/algos/ppo/ppo_utils/rnn.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-import torch.nn as nn
-"""RNN modules."""
-
-
-class RNNLayer(nn.Module):
-
-    def __init__(self, inputs_dim, outputs_dim, recurrent_N, use_orthogonal,
-                 device):
-        super(RNNLayer, self).__init__()
-        self._recurrent_N = recurrent_N
-        self._use_orthogonal = use_orthogonal
-
-        self.rnn = nn.GRU(inputs_dim,
-                          outputs_dim,
-                          num_layers=self._recurrent_N)
-        for name, param in self.rnn.named_parameters():
-            if 'bias' in name:
-                nn.init.constant_(param, 0)
-            elif 'weight' in name:
-                if self._use_orthogonal:
-                    nn.init.orthogonal_(param)
-                else:
-                    nn.init.xavier_uniform_(param)
-        self.norm = nn.LayerNorm(outputs_dim)
-        self.to(device)
-
-    def forward(self, x, hxs, masks):
-        if x.size(0) == hxs.size(0):
-            x, hxs = self.rnn(
-                x.unsqueeze(0),
-                (hxs *
-                 masks.repeat(1, self._recurrent_N).unsqueeze(-1)).transpose(
-                     0, 1).contiguous())
-            x = x.squeeze(0)
-            hxs = hxs.transpose(0, 1)
-        else:
-            # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
-            N = hxs.size(0)
-            T = int(x.size(0) / N)
-
-            # unflatten
-            x = x.view(T, N, x.size(1))
-
-            # Same deal with masks
-            masks = masks.view(T, N)
-
-            # Let's figure out which steps in the sequence have a zero for any agent
-            # We will always assume t=0 has a zero in it as that makes the logic cleaner
-            has_zeros = ((masks[1:] == 0.0).any(
-                dim=-1).nonzero().squeeze().cpu())
-
-            # +1 to correct the masks[1:]
-            if has_zeros.dim() == 0:
-                # Deal with scalar
-                has_zeros = [has_zeros.item() + 1]
-            else:
-                has_zeros = (has_zeros + 1).numpy().tolist()
-
-            # add t=0 and t=T to the list
-            has_zeros = [0] + has_zeros + [T]
-
-            hxs = hxs.transpose(0, 1)
-
-            outputs = []
-            for i in range(len(has_zeros) - 1):
-                # We can now process steps that don't have any zeros in masks together!
-                # This is much faster
-                start_idx = has_zeros[i]
-                end_idx = has_zeros[i + 1]
-                temp = (hxs * masks[start_idx].view(1, -1, 1).repeat(
-                    self._recurrent_N, 1, 1)).contiguous()
-                rnn_scores, hxs = self.rnn(x[start_idx:end_idx], temp)
-                outputs.append(rnn_scores)
-
-            # assert len(outputs) == T
-            # x is a (T, N, -1) tensor
-            x = torch.cat(outputs, dim=0)
-
-            # flatten
-            x = x.reshape(T * N, -1)
-            hxs = hxs.transpose(0, 1)
-
-        x = self.norm(x)
-        return x, hxs
diff --git a/algos/ppo/ppo_utils/util.py b/algos/ppo/ppo_utils/util.py
deleted file mode 100644
index 6f2735cc..00000000
--- a/algos/ppo/ppo_utils/util.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import copy
-import numpy as np
-
-import torch
-import torch.nn as nn
-
-
-def init(module, weight_init, bias_init, gain=1):
-    weight_init(module.weight.data, gain=gain)
-    bias_init(module.bias.data)
-    return module
-
-
-def get_clones(module, N):
-    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
-
-
-def check(input):
-    output = torch.from_numpy(input) if type(input) == np.ndarray else input
-    return output
diff --git a/algos/ppo/r_mappo/__init__.py b/algos/ppo/r_mappo/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py b/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py
deleted file mode 100644
index c211cdb6..00000000
--- a/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-from algos.ppo.r_mappo.algorithm.r_actor_critic import R_Actor, R_Critic
-from algos.ppo.utils.util import update_linear_schedule
-
-
-class R_MAPPOPolicy:
-    """
-    MAPPO Policy  class. Wraps actor and critic networks to compute actions and value function predictions.
-
-    :param args: (argparse.Namespace) arguments containing relevant model and policy information.
-    :param obs_space: (gym.Space) observation space.
-    :param cent_obs_space: (gym.Space) value function input space (centralized input for MAPPO, decentralized for IPPO).
-    :param action_space: (gym.Space) action space.
-    :param device: (torch.device) specifies the device to run on (cpu/gpu).
-    """
-
-    def __init__(self,
-                 args,
-                 obs_space,
-                 cent_obs_space,
-                 act_space,
-                 device=torch.device("cpu")):
-        self.device = device
-        self.lr = args.lr
-        self.critic_lr = args.critic_lr
-        self.opti_eps = args.opti_eps
-        self.weight_decay = args.weight_decay
-
-        self.obs_space = obs_space
-        self.share_obs_space = cent_obs_space
-        self.act_space = act_space
-
-        self.actor = R_Actor(args, self.obs_space, self.act_space, self.device)
-        self.critic = R_Critic(args, self.share_obs_space, self.device)
-
-        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
-                                                lr=self.lr,
-                                                eps=self.opti_eps,
-                                                weight_decay=self.weight_decay)
-        self.critic_optimizer = torch.optim.Adam(
-            self.critic.parameters(),
-            lr=self.critic_lr,
-            eps=self.opti_eps,
-            weight_decay=self.weight_decay)
-
-    def lr_decay(self, episode, episodes):
-        """
-        Decay the actor and critic learning rates.
-        :param episode: (int) current training episode.
-        :param episodes: (int) total number of training episodes.
-        """
-        update_linear_schedule(self.actor_optimizer, episode, episodes,
-                               self.lr)
-        update_linear_schedule(self.critic_optimizer, episode, episodes,
-                               self.critic_lr)
-
-    def get_actions(self,
-                    cent_obs,
-                    obs,
-                    rnn_states_actor,
-                    rnn_states_critic,
-                    masks,
-                    available_actions=None,
-                    deterministic=False):
-        """
-        Compute actions and value function predictions for the given inputs.
-        :param cent_obs (np.ndarray): centralized input to the critic.
-        :param obs (np.ndarray): local agent inputs to the actor.
-        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
-        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
-        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
-        :param available_actions: (np.ndarray) denotes which actions are available to agent
-                                  (if None, all actions available)
-        :param deterministic: (bool) whether the action should be mode of distribution or should be sampled.
-
-        :return values: (torch.Tensor) value function predictions.
-        :return actions: (torch.Tensor) actions to take.
-        :return action_log_probs: (torch.Tensor) log probabilities of chosen actions.
-        :return rnn_states_actor: (torch.Tensor) updated actor network RNN states.
-        :return rnn_states_critic: (torch.Tensor) updated critic network RNN states.
-        """
-        actions, action_log_probs, rnn_states_actor = self.actor(
-            obs, rnn_states_actor, masks, available_actions, deterministic)
-
-        values, rnn_states_critic = self.critic(cent_obs, rnn_states_critic,
-                                                masks)
-        return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic
-
-    def get_values(self, cent_obs, rnn_states_critic, masks):
-        """
-        Get value function predictions.
-        :param cent_obs (np.ndarray): centralized input to the critic.
-        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
-        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
-
-        :return values: (torch.Tensor) value function predictions.
-        """
-        values, _ = self.critic(cent_obs, rnn_states_critic, masks)
-        return values
-
-    def evaluate_actions(self,
-                         cent_obs,
-                         obs,
-                         rnn_states_actor,
-                         rnn_states_critic,
-                         action,
-                         masks,
-                         available_actions=None,
-                         active_masks=None):
-        """
-        Get action logprobs / entropy and value function predictions for actor update.
-        :param cent_obs (np.ndarray): centralized input to the critic.
-        :param obs (np.ndarray): local agent inputs to the actor.
-        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
-        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
-        :param action: (np.ndarray) actions whose log probabilites and entropy to compute.
-        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
-        :param available_actions: (np.ndarray) denotes which actions are available to agent
-                                  (if None, all actions available)
-        :param active_masks: (torch.Tensor) denotes whether an agent is active or dead.
-
-        :return values: (torch.Tensor) value function predictions.
-        :return action_log_probs: (torch.Tensor) log probabilities of the input actions.
-        :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs.
-        """
-        action_log_probs, dist_entropy = self.actor.evaluate_actions(
-            obs, rnn_states_actor, action, masks, available_actions,
-            active_masks)
-
-        values, _ = self.critic(cent_obs, rnn_states_critic, masks)
-        return values, action_log_probs, dist_entropy
-
-    def act(self,
-            obs,
-            rnn_states_actor,
-            masks,
-            available_actions=None,
-            deterministic=False):
-        """
-        Compute actions using the given inputs.
-        :param obs (np.ndarray): local agent inputs to the actor.
-        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
-        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
-        :param available_actions: (np.ndarray) denotes which actions are available to agent
-                                  (if None, all actions available)
-        :param deterministic: (bool) whether the action should be mode of distribution or should be sampled.
-        """
-        actions, _, rnn_states_actor = self.actor(obs, rnn_states_actor, masks,
-                                                  available_actions,
-                                                  deterministic)
-        return actions, rnn_states_actor
diff --git a/algos/ppo/r_mappo/algorithm/r_actor_critic.py b/algos/ppo/r_mappo/algorithm/r_actor_critic.py
deleted file mode 100644
index ee9dfdf0..00000000
--- a/algos/ppo/r_mappo/algorithm/r_actor_critic.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-import torch.nn as nn
-from algos.ppo.ppo_utils.util import init, check
-from algos.ppo.ppo_utils.mlp import MLPBase
-from algos.ppo.ppo_utils.rnn import RNNLayer
-from algos.ppo.ppo_utils.act import ACTLayer
-from algos.ppo.ppo_utils.popart import PopArt
-from algos.ppo.utils.util import get_shape_from_obs_space
-
-
-class R_Actor(nn.Module):
-    """
-    Actor network class for MAPPO. Outputs actions given observations.
-    :param args: (argparse.Namespace) arguments containing relevant model information.
-    :param obs_space: (gym.Space) observation space.
-    :param action_space: (gym.Space) action space.
-    :param device: (torch.device) specifies the device to run on (cpu/gpu).
-    """
-
-    def __init__(self,
-                 args,
-                 obs_space,
-                 action_space,
-                 device=torch.device("cpu")):
-        super(R_Actor, self).__init__()
-        self.hidden_size = args.hidden_size
-
-        self._gain = args.gain
-        self._use_orthogonal = args.use_orthogonal
-        self._use_policy_active_masks = args.use_policy_active_masks
-        self._use_naive_recurrent_policy = args.use_naive_recurrent_policy
-        self._use_recurrent_policy = args.use_recurrent_policy
-        self._recurrent_N = args.recurrent_N
-        self.tpdv = dict(dtype=torch.float32, device=device)
-
-        obs_shape = get_shape_from_obs_space(obs_space)
-        base = MLPBase
-        self.base = base(args, obs_shape)
-
-        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
-            self.rnn = RNNLayer(self.hidden_size, self.hidden_size,
-                                self._recurrent_N, self._use_orthogonal,
-                                device)
-
-        self.act = ACTLayer(action_space, self.hidden_size,
-                            self._use_orthogonal, self._gain, device)
-
-        self.to(device)
-
-    def forward(self,
-                obs,
-                rnn_states,
-                masks,
-                available_actions=None,
-                deterministic=False):
-        """
-        Compute actions from the given inputs.
-        :param obs: (np.ndarray / torch.Tensor) observation inputs into network.
-        :param rnn_states: (np.ndarray / torch.Tensor) if RNN network, hidden states for RNN.
-        :param masks: (np.ndarray / torch.Tensor) mask tensor denoting if hidden states should be reinitialized to zeros.
-        :param available_actions: (np.ndarray / torch.Tensor) denotes which actions are available to agent
-                                                              (if None, all actions available)
-        :param deterministic: (bool) whether to sample from action distribution or return the mode.
-
-        :return actions: (torch.Tensor) actions to take.
-        :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
-        :return rnn_states: (torch.Tensor) updated RNN hidden states.
-        """
-        obs = check(obs).to(**self.tpdv)
-        rnn_states = check(rnn_states).to(**self.tpdv)
-        masks = check(masks).to(**self.tpdv)
-        if available_actions is not None:
-            available_actions = check(available_actions).to(**self.tpdv)
-
-        actor_features = self.base(obs)
-
-        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
-            actor_features, rnn_states = self.rnn(actor_features, rnn_states,
-                                                  masks)
-
-        actions, action_log_probs = self.act(actor_features, available_actions,
-                                             deterministic)
-
-        return actions, action_log_probs, rnn_states
-
-    def evaluate_actions(self,
-                         obs,
-                         rnn_states,
-                         action,
-                         masks,
-                         available_actions=None,
-                         active_masks=None):
-        """
-        Compute log probability and entropy of given actions.
-        :param obs: (torch.Tensor) observation inputs into network.
-        :param action: (torch.Tensor) actions whose entropy and log probability to evaluate.
-        :param rnn_states: (torch.Tensor) if RNN network, hidden states for RNN.
-        :param masks: (torch.Tensor) mask tensor denoting if hidden states should be reinitialized to zeros.
-        :param available_actions: (torch.Tensor) denotes which actions are available to agent
-                                                              (if None, all actions available)
-        :param active_masks: (torch.Tensor) denotes whether an agent is active or dead.
-
-        :return action_log_probs: (torch.Tensor) log probabilities of the input actions.
-        :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs.
-        """
-        obs = check(obs).to(**self.tpdv)
-        rnn_states = check(rnn_states).to(**self.tpdv)
-        action = check(action).to(**self.tpdv)
-        masks = check(masks).to(**self.tpdv)
-        if available_actions is not None:
-            available_actions = check(available_actions).to(**self.tpdv)
-
-        if active_masks is not None:
-            active_masks = check(active_masks).to(**self.tpdv)
-
-        actor_features = self.base(obs)
-
-        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
-            actor_features, rnn_states = self.rnn(actor_features, rnn_states,
-                                                  masks)
-
-        action_log_probs, dist_entropy = self.act.evaluate_actions(
-            actor_features,
-            action,
-            available_actions,
-            active_masks=active_masks
-            if self._use_policy_active_masks else None)
-
-        return action_log_probs, dist_entropy
-
-
-class R_Critic(nn.Module):
-    """
-    Critic network class for MAPPO. Outputs value function predictions given centralized input (MAPPO) or
-                            local observations (IPPO).
-    :param args: (argparse.Namespace) arguments containing relevant model information.
-    :param cent_obs_space: (gym.Space) (centralized) observation space.
-    :param device: (torch.device) specifies the device to run on (cpu/gpu).
-    """
-
-    def __init__(self, args, cent_obs_space, device=torch.device("cpu")):
-        super(R_Critic, self).__init__()
-        self.hidden_size = args.hidden_size
-        self._use_orthogonal = args.use_orthogonal
-        self._use_naive_recurrent_policy = args.use_naive_recurrent_policy
-        self._use_recurrent_policy = args.use_recurrent_policy
-        self._recurrent_N = args.recurrent_N
-        self._use_popart = args.use_popart
-        self.tpdv = dict(dtype=torch.float32, device=device)
-        init_method = [nn.init.xavier_uniform_,
-                       nn.init.orthogonal_][self._use_orthogonal]
-
-        cent_obs_shape = get_shape_from_obs_space(cent_obs_space)
-        base = MLPBase
-        self.base = base(args, cent_obs_shape)
-
-        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
-            self.rnn = RNNLayer(self.hidden_size, self.hidden_size,
-                                self._recurrent_N, self._use_orthogonal,
-                                device)
-
-        def init_(m):
-            return init(m, init_method, lambda x: nn.init.constant_(x, 0))
-
-        if self._use_popart:
-            self.v_out = init_(PopArt(self.hidden_size, 1, device=device))
-        else:
-            self.v_out = init_(nn.Linear(self.hidden_size, 1))
-
-        self.to(device)
-
-    def forward(self, cent_obs, rnn_states, masks):
-        """
-        Compute actions from the given inputs.
-        :param cent_obs: (np.ndarray / torch.Tensor) observation inputs into network.
-        :param rnn_states: (np.ndarray / torch.Tensor) if RNN network, hidden states for RNN.
-        :param masks: (np.ndarray / torch.Tensor) mask tensor denoting if RNN states should be reinitialized to zeros.
-
-        :return values: (torch.Tensor) value function predictions.
-        :return rnn_states: (torch.Tensor) updated RNN hidden states.
-        """
-        cent_obs = check(cent_obs).to(**self.tpdv)
-        rnn_states = check(rnn_states).to(**self.tpdv)
-        masks = check(masks).to(**self.tpdv)
-
-        critic_features = self.base(cent_obs)
-        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
-            critic_features, rnn_states = self.rnn(critic_features, rnn_states,
-                                                   masks)
-        values = self.v_out(critic_features)
-
-        return values, rnn_states
diff --git a/algos/ppo/r_mappo/r_mappo.py b/algos/ppo/r_mappo/r_mappo.py
deleted file mode 100644
index 0bae8b24..00000000
--- a/algos/ppo/r_mappo/r_mappo.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import numpy as np
-import torch
-import torch.nn as nn
-from algos.ppo.utils.util import get_gard_norm, huber_loss, mse_loss
-from algos.ppo.utils.valuenorm import ValueNorm
-from algos.ppo.ppo_utils.util import check
-
-
-class R_MAPPO():
-    """
-    Trainer class for MAPPO to update policies.
-    :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information.
-    :param policy: (R_MAPPO_Policy) policy to update.
-    :param device: (torch.device) specifies the device to run on (cpu/gpu).
-    """
-
-    def __init__(self, args, policy, device=torch.device("cpu")):
-
-        self.device = device
-        self.tpdv = dict(dtype=torch.float32, device=device)
-        self.policy = policy
-
-        self.clip_param = args.clip_param
-        self.ppo_epoch = args.ppo_epoch
-        self.num_mini_batch = args.num_mini_batch
-        self.data_chunk_length = args.data_chunk_length
-        self.value_loss_coef = args.value_loss_coef
-        self.entropy_coef = args.entropy_coef
-        self.max_grad_norm = args.max_grad_norm
-        self.huber_delta = args.huber_delta
-
-        self._use_recurrent_policy = args.use_recurrent_policy
-        self._use_naive_recurrent = args.use_naive_recurrent_policy
-        self._use_max_grad_norm = args.use_max_grad_norm
-        self._use_clipped_value_loss = args.use_clipped_value_loss
-        self._use_huber_loss = args.use_huber_loss
-        self._use_popart = args.use_popart
-        self._use_valuenorm = args.use_valuenorm
-        self._use_value_active_masks = args.use_value_active_masks
-        self._use_policy_active_masks = args.use_policy_active_masks
-
-        assert (self._use_popart and self._use_valuenorm) == False, (
-            "self._use_popart and self._use_valuenorm can not be set True simultaneously"
-        )
-
-        if self._use_popart:
-            self.value_normalizer = self.policy.critic.v_out
-        elif self._use_valuenorm:
-            self.value_normalizer = ValueNorm(1, device=self.device)
-        else:
-            self.value_normalizer = None
-
-    def cal_value_loss(self, values, value_preds_batch, return_batch,
-                       active_masks_batch):
-        """
-        Calculate value function loss.
-        :param values: (torch.Tensor) value function predictions.
-        :param value_preds_batch: (torch.Tensor) "old" value  predictions from data batch (used for value clip loss)
-        :param return_batch: (torch.Tensor) reward to go returns.
-        :param active_masks_batch: (torch.Tensor) denotes if agent is active or dead at a given timesep.
-
-        :return value_loss: (torch.Tensor) value function loss.
-        """
-        value_pred_clipped = value_preds_batch + (
-            values - value_preds_batch).clamp(-self.clip_param,
-                                              self.clip_param)
-        if self._use_popart or self._use_valuenorm:
-            self.value_normalizer.update(return_batch)
-            error_clipped = self.value_normalizer.normalize(
-                return_batch) - value_pred_clipped
-            error_original = self.value_normalizer.normalize(
-                return_batch) - values
-        else:
-            error_clipped = return_batch - value_pred_clipped
-            error_original = return_batch - values
-
-        if self._use_huber_loss:
-            value_loss_clipped = huber_loss(error_clipped, self.huber_delta)
-            value_loss_original = huber_loss(error_original, self.huber_delta)
-        else:
-            value_loss_clipped = mse_loss(error_clipped)
-            value_loss_original = mse_loss(error_original)
-
-        if self._use_clipped_value_loss:
-            value_loss = torch.max(value_loss_original, value_loss_clipped)
-        else:
-            value_loss = value_loss_original
-
-        if self._use_value_active_masks:
-            value_loss = (value_loss *
-                          active_masks_batch).sum() / active_masks_batch.sum()
-        else:
-            value_loss = value_loss.mean()
-
-        return value_loss
-
-    def ppo_update(self, sample, update_actor=True):
-        """
-        Update actor and critic networks.
-        :param sample: (Tuple) contains data batch with which to update networks.
-        :update_actor: (bool) whether to update actor network.
-
-        :return value_loss: (torch.Tensor) value function loss.
-        :return critic_grad_norm: (torch.Tensor) gradient norm from critic up9date.
-        ;return policy_loss: (torch.Tensor) actor(policy) loss value.
-        :return dist_entropy: (torch.Tensor) action entropies.
-        :return actor_grad_norm: (torch.Tensor) gradient norm from actor update.
-        :return imp_weights: (torch.Tensor) importance sampling weights.
-        """
-        share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, \
-        value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, \
-        adv_targ, available_actions_batch = sample
-
-        old_action_log_probs_batch = check(old_action_log_probs_batch).to(
-            **self.tpdv)
-        adv_targ = check(adv_targ).to(**self.tpdv)
-        value_preds_batch = check(value_preds_batch).to(**self.tpdv)
-        return_batch = check(return_batch).to(**self.tpdv)
-        active_masks_batch = check(active_masks_batch).to(**self.tpdv)
-
-        # Reshape to do in a single forward pass for all steps
-        values, action_log_probs, dist_entropy = self.policy.evaluate_actions(
-            share_obs_batch, obs_batch, rnn_states_batch,
-            rnn_states_critic_batch, actions_batch, masks_batch,
-            available_actions_batch, active_masks_batch)
-        # actor update
-        imp_weights = torch.exp(action_log_probs - old_action_log_probs_batch)
-
-        surr1 = imp_weights * adv_targ
-        surr2 = torch.clamp(imp_weights, 1.0 - self.clip_param,
-                            1.0 + self.clip_param) * adv_targ
-
-        if self._use_policy_active_masks:
-            policy_action_loss = (
-                -torch.sum(torch.min(surr1, surr2), dim=-1, keepdim=True) *
-                active_masks_batch).sum() / active_masks_batch.sum()
-        else:
-            policy_action_loss = -torch.sum(
-                torch.min(surr1, surr2), dim=-1, keepdim=True).mean()
-
-        policy_loss = policy_action_loss
-
-        self.policy.actor_optimizer.zero_grad()
-
-        if update_actor:
-            (policy_loss - dist_entropy * self.entropy_coef).backward()
-
-        if self._use_max_grad_norm:
-            actor_grad_norm = nn.utils.clip_grad_norm_(
-                self.policy.actor.parameters(), self.max_grad_norm)
-        else:
-            actor_grad_norm = get_gard_norm(self.policy.actor.parameters())
-
-        self.policy.actor_optimizer.step()
-
-        # critic update
-        value_loss = self.cal_value_loss(values, value_preds_batch,
-                                         return_batch, active_masks_batch)
-
-        self.policy.critic_optimizer.zero_grad()
-
-        (value_loss * self.value_loss_coef).backward()
-
-        if self._use_max_grad_norm:
-            critic_grad_norm = nn.utils.clip_grad_norm_(
-                self.policy.critic.parameters(), self.max_grad_norm)
-        else:
-            critic_grad_norm = get_gard_norm(self.policy.critic.parameters())
-
-        self.policy.critic_optimizer.step()
-
-        return value_loss, critic_grad_norm, policy_loss, dist_entropy, actor_grad_norm, imp_weights
-
-    def train(self, buffer, update_actor=True):
-        """
-        Perform a training update using minibatch GD.
-        :param buffer: (SharedReplayBuffer) buffer containing training data.
-        :param update_actor: (bool) whether to update actor network.
-
-        :return train_info: (dict) contains information regarding training update (e.g. loss, grad norms, etc).
-        """
-        if self._use_popart or self._use_valuenorm:
-            advantages = buffer.returns[:
-                                        -1] - self.value_normalizer.denormalize(
-                                            buffer.value_preds[:-1])
-        else:
-            advantages = buffer.returns[:-1] - buffer.value_preds[:-1]
-        advantages_copy = advantages.copy()
-        advantages_copy[buffer.active_masks[:-1] == 0.0] = np.nan
-        mean_advantages = np.nanmean(advantages_copy)
-        std_advantages = np.nanstd(advantages_copy)
-        advantages = (advantages - mean_advantages) / (std_advantages + 1e-5)
-
-        train_info = {}
-
-        train_info['value_loss'] = 0
-        train_info['policy_loss'] = 0
-        train_info['dist_entropy'] = 0
-        train_info['actor_grad_norm'] = 0
-        train_info['critic_grad_norm'] = 0
-        train_info['ratio'] = 0
-
-        for _ in range(self.ppo_epoch):
-            if self._use_recurrent_policy:
-                data_generator = buffer.recurrent_generator(
-                    advantages, self.num_mini_batch, self.data_chunk_length)
-            elif self._use_naive_recurrent:
-                data_generator = buffer.naive_recurrent_generator(
-                    advantages, self.num_mini_batch)
-            else:
-                data_generator = buffer.feed_forward_generator(
-                    advantages, self.num_mini_batch)
-
-            for sample in data_generator:
-
-                value_loss, critic_grad_norm, policy_loss, dist_entropy, actor_grad_norm, imp_weights \
-                    = self.ppo_update(sample, update_actor)
-
-                train_info['value_loss'] += value_loss.item()
-                train_info['policy_loss'] += policy_loss.item()
-                train_info['dist_entropy'] += dist_entropy.item()
-                train_info['actor_grad_norm'] += actor_grad_norm
-                train_info['critic_grad_norm'] += critic_grad_norm
-                train_info['ratio'] += imp_weights.mean()
-
-        num_updates = self.ppo_epoch * self.num_mini_batch
-
-        for k in train_info.keys():
-            train_info[k] /= num_updates
-
-        return train_info
-
-    def prep_training(self):
-        self.policy.actor.train()
-        self.policy.critic.train()
-
-    def prep_rollout(self):
-        self.policy.actor.eval()
-        self.policy.critic.eval()
diff --git a/algos/ppo/utils/__init__.py b/algos/ppo/utils/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/algos/ppo/utils/multi_discrete.py b/algos/ppo/utils/multi_discrete.py
deleted file mode 100644
index 64f106fa..00000000
--- a/algos/ppo/utils/multi_discrete.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import gym
-import numpy as np
-
-
-# An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
-# (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
-class MultiDiscrete(gym.Space):
-    """
-    - The multi-discrete action space consists of a series of discrete action spaces with different parameters
-    - It can be adapted to both a Discrete action space or a continuous (Box) action space
-    - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
-    - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space where the discrete action space can take any integers from `min` to `max` (both inclusive)
-    Note: A value of 0 always need to represent the NOOP action.
-    e.g. Nintendo Game Controller
-    - Can be conceptualized as 3 discrete action spaces:
-        1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
-        2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
-        3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
-    - Can be initialized as
-        MultiDiscrete([ [0,4], [0,1], [0,1] ])
-    """
-
-    def __init__(self, array_of_param_array):
-        self.low = np.array([x[0] for x in array_of_param_array])
-        self.high = np.array([x[1] for x in array_of_param_array])
-        self.num_discrete_space = self.low.shape[0]
-        self.n = np.sum(self.high) + 2
-
-    def sample(self):
-        """ Returns a array with one sample from each discrete action space """
-        # For each row: round(random .* (max - min) + min, 0)
-        random_array = np.random.rand(self.num_discrete_space)
-        return [
-            int(x) for x in np.floor(
-                np.multiply((self.high - self.low + 1.), random_array) +
-                self.low)
-        ]
-
-    def contains(self, x):
-        return len(x) == self.num_discrete_space and (
-            np.array(x) >= self.low).all() and (np.array(x) <=
-                                                self.high).all()
-
-    @property
-    def shape(self):
-        return self.num_discrete_space
-
-    def __repr__(self):
-        return "MultiDiscrete" + str(self.num_discrete_space)
-
-    def __eq__(self, other):
-        return np.array_equal(self.low, other.low) and np.array_equal(
-            self.high, other.high)
diff --git a/algos/ppo/utils/separated_buffer.py b/algos/ppo/utils/separated_buffer.py
deleted file mode 100644
index 342b51ff..00000000
--- a/algos/ppo/utils/separated_buffer.py
+++ /dev/null
@@ -1,505 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-import numpy as np
-from collections import defaultdict
-
-from algos.ppo.utils.util import check, get_shape_from_obs_space, get_shape_from_act_space
-
-
-def _flatten(T, N, x):
-    return x.reshape(T * N, *x.shape[2:])
-
-
-def _cast(x):
-    return x.transpose(1, 0, 2).reshape(-1, *x.shape[2:])
-
-
-class SeparatedReplayBuffer(object):
-
-    def __init__(self, args, obs_space, share_obs_space, act_space):
-        self.episode_length = args.episode_length
-        self.n_rollout_threads = args.n_rollout_threads
-        self.rnn_hidden_size = args.hidden_size
-        self.recurrent_N = args.recurrent_N
-        self.gamma = args.gamma
-        self.gae_lambda = args.gae_lambda
-        self._use_gae = args.use_gae
-        self._use_popart = args.use_popart
-        self._use_valuenorm = args.use_valuenorm
-        self._use_proper_time_limits = args.use_proper_time_limits
-
-        obs_shape = get_shape_from_obs_space(obs_space)
-        share_obs_shape = get_shape_from_obs_space(share_obs_space)
-
-        if type(obs_shape[-1]) == list:
-            obs_shape = obs_shape[:1]
-
-        if type(share_obs_shape[-1]) == list:
-            share_obs_shape = share_obs_shape[:1]
-
-        self.share_obs = np.zeros((self.episode_length + 1,
-                                   self.n_rollout_threads, *share_obs_shape),
-                                  dtype=np.float32)
-        self.obs = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, *obs_shape),
-            dtype=np.float32)
-
-        self.rnn_states = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, self.recurrent_N,
-             self.rnn_hidden_size),
-            dtype=np.float32)
-        self.rnn_states_critic = np.zeros_like(self.rnn_states)
-
-        self.value_preds = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, 1),
-            dtype=np.float32)
-        self.returns = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, 1),
-            dtype=np.float32)
-
-        if act_space.__class__.__name__ == 'Discrete':
-            self.available_actions = np.ones(
-                (self.episode_length + 1, self.n_rollout_threads, act_space.n),
-                dtype=np.float32)
-        else:
-            self.available_actions = None
-
-        act_shape = get_shape_from_act_space(act_space)
-
-        self.actions = np.zeros(
-            (self.episode_length, self.n_rollout_threads, act_shape),
-            dtype=np.float32)
-        self.action_log_probs = np.zeros(
-            (self.episode_length, self.n_rollout_threads, act_shape),
-            dtype=np.float32)
-        self.rewards = np.zeros(
-            (self.episode_length, self.n_rollout_threads, 1), dtype=np.float32)
-
-        self.masks = np.ones(
-            (self.episode_length + 1, self.n_rollout_threads, 1),
-            dtype=np.float32)
-        self.bad_masks = np.ones_like(self.masks)
-        self.active_masks = np.ones_like(self.masks)
-
-        self.step = 0
-
-    def insert(self,
-               share_obs,
-               obs,
-               rnn_states,
-               rnn_states_critic,
-               actions,
-               action_log_probs,
-               value_preds,
-               rewards,
-               masks,
-               bad_masks=None,
-               active_masks=None,
-               available_actions=None):
-        self.share_obs[self.step + 1] = share_obs.copy()
-        self.obs[self.step + 1] = obs.copy()
-        self.rnn_states[self.step + 1] = rnn_states.copy()
-        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
-        self.actions[self.step] = actions.copy()
-        self.action_log_probs[self.step] = action_log_probs.copy()
-        self.value_preds[self.step] = value_preds.copy()
-        self.rewards[self.step] = rewards.copy()
-        self.masks[self.step + 1] = masks.copy()
-        if bad_masks is not None:
-            self.bad_masks[self.step + 1] = bad_masks.copy()
-        if active_masks is not None:
-            self.active_masks[self.step + 1] = active_masks.copy()
-        if available_actions is not None:
-            self.available_actions[self.step + 1] = available_actions.copy()
-
-        self.step = (self.step + 1) % self.episode_length
-
-    def chooseinsert(self,
-                     share_obs,
-                     obs,
-                     rnn_states,
-                     rnn_states_critic,
-                     actions,
-                     action_log_probs,
-                     value_preds,
-                     rewards,
-                     masks,
-                     bad_masks=None,
-                     active_masks=None,
-                     available_actions=None):
-        self.share_obs[self.step] = share_obs.copy()
-        self.obs[self.step] = obs.copy()
-        self.rnn_states[self.step + 1] = rnn_states.copy()
-        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
-        self.actions[self.step] = actions.copy()
-        self.action_log_probs[self.step] = action_log_probs.copy()
-        self.value_preds[self.step] = value_preds.copy()
-        self.rewards[self.step] = rewards.copy()
-        self.masks[self.step + 1] = masks.copy()
-        if bad_masks is not None:
-            self.bad_masks[self.step + 1] = bad_masks.copy()
-        if active_masks is not None:
-            self.active_masks[self.step] = active_masks.copy()
-        if available_actions is not None:
-            self.available_actions[self.step] = available_actions.copy()
-
-        self.step = (self.step + 1) % self.episode_length
-
-    def after_update(self):
-        self.share_obs[0] = self.share_obs[-1].copy()
-        self.obs[0] = self.obs[-1].copy()
-        self.rnn_states[0] = self.rnn_states[-1].copy()
-        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
-        self.masks[0] = self.masks[-1].copy()
-        self.bad_masks[0] = self.bad_masks[-1].copy()
-        self.active_masks[0] = self.active_masks[-1].copy()
-        if self.available_actions is not None:
-            self.available_actions[0] = self.available_actions[-1].copy()
-
-    def chooseafter_update(self):
-        self.rnn_states[0] = self.rnn_states[-1].copy()
-        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
-        self.masks[0] = self.masks[-1].copy()
-        self.bad_masks[0] = self.bad_masks[-1].copy()
-
-    def compute_returns(self, next_value, value_normalizer=None):
-        if self._use_proper_time_limits:
-            if self._use_gae:
-                self.value_preds[-1] = next_value
-                gae = 0
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart or self._use_valuenorm:
-                        delta = self.rewards[
-                            step] + self.gamma * value_normalizer.denormalize(
-                                self.value_preds[step + 1]) * self.masks[
-                                    step + 1] - value_normalizer.denormalize(
-                                        self.value_preds[step])
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        gae = gae * self.bad_masks[step + 1]
-                        self.returns[
-                            step] = gae + value_normalizer.denormalize(
-                                self.value_preds[step])
-                    else:
-                        delta = self.rewards[
-                            step] + self.gamma * self.value_preds[
-                                step + 1] * self.masks[
-                                    step + 1] - self.value_preds[step]
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        gae = gae * self.bad_masks[step + 1]
-                        self.returns[step] = gae + self.value_preds[step]
-            else:
-                self.returns[-1] = next_value
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart:
-                        self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[step]) * self.bad_masks[step + 1] \
-                            + (1 - self.bad_masks[step + 1]) * value_normalizer.denormalize(self.value_preds[step])
-                    else:
-                        self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[step]) * self.bad_masks[step + 1] \
-                            + (1 - self.bad_masks[step + 1]) * self.value_preds[step]
-        else:
-            if self._use_gae:
-                self.value_preds[-1] = next_value
-                gae = 0
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart or self._use_valuenorm:
-                        delta = self.rewards[
-                            step] + self.gamma * value_normalizer.denormalize(
-                                self.value_preds[step + 1]) * self.masks[
-                                    step + 1] - value_normalizer.denormalize(
-                                        self.value_preds[step])
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        self.returns[
-                            step] = gae + value_normalizer.denormalize(
-                                self.value_preds[step])
-                    else:
-                        delta = self.rewards[
-                            step] + self.gamma * self.value_preds[
-                                step + 1] * self.masks[
-                                    step + 1] - self.value_preds[step]
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        self.returns[step] = gae + self.value_preds[step]
-            else:
-                self.returns[-1] = next_value
-                for step in reversed(range(self.rewards.shape[0])):
-                    self.returns[step] = self.returns[
-                        step + 1] * self.gamma * self.masks[
-                            step + 1] + self.rewards[step]
-
-    def feed_forward_generator(self,
-                               advantages,
-                               num_mini_batch=None,
-                               mini_batch_size=None):
-        episode_length, n_rollout_threads = self.rewards.shape[0:2]
-        batch_size = n_rollout_threads * episode_length
-
-        if mini_batch_size is None:
-            assert batch_size >= num_mini_batch, (
-                "PPO requires the number of processes ({}) "
-                "* number of steps ({}) = {} "
-                "to be greater than or equal to the number of PPO mini batches ({})."
-                "".format(n_rollout_threads, episode_length,
-                          n_rollout_threads * episode_length, num_mini_batch))
-            mini_batch_size = batch_size // num_mini_batch
-
-        rand = torch.randperm(batch_size).numpy()
-        sampler = [
-            rand[i * mini_batch_size:(i + 1) * mini_batch_size]
-            for i in range(num_mini_batch)
-        ]
-
-        share_obs = self.share_obs[:-1].reshape(-1, *self.share_obs.shape[2:])
-        obs = self.obs[:-1].reshape(-1, *self.obs.shape[2:])
-        rnn_states = self.rnn_states[:-1].reshape(-1,
-                                                  *self.rnn_states.shape[2:])
-        rnn_states_critic = self.rnn_states_critic[:-1].reshape(
-            -1, *self.rnn_states_critic.shape[2:])
-        actions = self.actions.reshape(-1, self.actions.shape[-1])
-        if self.available_actions is not None:
-            available_actions = self.available_actions[:-1].reshape(
-                -1, self.available_actions.shape[-1])
-        value_preds = self.value_preds[:-1].reshape(-1, 1)
-        returns = self.returns[:-1].reshape(-1, 1)
-        masks = self.masks[:-1].reshape(-1, 1)
-        active_masks = self.active_masks[:-1].reshape(-1, 1)
-        action_log_probs = self.action_log_probs.reshape(
-            -1, self.action_log_probs.shape[-1])
-        advantages = advantages.reshape(-1, 1)
-
-        for indices in sampler:
-            # obs size [T+1 N Dim]-->[T N Dim]-->[T*N,Dim]-->[index,Dim]
-            share_obs_batch = share_obs[indices]
-            obs_batch = obs[indices]
-            rnn_states_batch = rnn_states[indices]
-            rnn_states_critic_batch = rnn_states_critic[indices]
-            actions_batch = actions[indices]
-            if self.available_actions is not None:
-                available_actions_batch = available_actions[indices]
-            else:
-                available_actions_batch = None
-            value_preds_batch = value_preds[indices]
-            return_batch = returns[indices]
-            masks_batch = masks[indices]
-            active_masks_batch = active_masks[indices]
-            old_action_log_probs_batch = action_log_probs[indices]
-            if advantages is None:
-                adv_targ = None
-            else:
-                adv_targ = advantages[indices]
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch
-
-    def naive_recurrent_generator(self, advantages, num_mini_batch):
-        n_rollout_threads = self.rewards.shape[1]
-        assert n_rollout_threads >= num_mini_batch, (
-            "PPO requires the number of processes ({}) "
-            "to be greater than or equal to the number of "
-            "PPO mini batches ({}).".format(n_rollout_threads, num_mini_batch))
-        num_envs_per_batch = n_rollout_threads // num_mini_batch
-        perm = torch.randperm(n_rollout_threads).numpy()
-        for start_ind in range(0, n_rollout_threads, num_envs_per_batch):
-            share_obs_batch = []
-            obs_batch = []
-            rnn_states_batch = []
-            rnn_states_critic_batch = []
-            actions_batch = []
-            available_actions_batch = []
-            value_preds_batch = []
-            return_batch = []
-            masks_batch = []
-            active_masks_batch = []
-            old_action_log_probs_batch = []
-            adv_targ = []
-
-            for offset in range(num_envs_per_batch):
-                ind = perm[start_ind + offset]
-                share_obs_batch.append(self.share_obs[:-1, ind])
-                obs_batch.append(self.obs[:-1, ind])
-                rnn_states_batch.append(self.rnn_states[0:1, ind])
-                rnn_states_critic_batch.append(self.rnn_states_critic[0:1,
-                                                                      ind])
-                actions_batch.append(self.actions[:, ind])
-                if self.available_actions is not None:
-                    available_actions_batch.append(self.available_actions[:-1,
-                                                                          ind])
-                value_preds_batch.append(self.value_preds[:-1, ind])
-                return_batch.append(self.returns[:-1, ind])
-                masks_batch.append(self.masks[:-1, ind])
-                active_masks_batch.append(self.active_masks[:-1, ind])
-                old_action_log_probs_batch.append(self.action_log_probs[:,
-                                                                        ind])
-                adv_targ.append(advantages[:, ind])
-
-            # [N[T, dim]]
-            T, N = self.episode_length, num_envs_per_batch
-            # These are all from_numpys of size (T, N, -1)
-            share_obs_batch = np.stack(share_obs_batch, 1)
-            obs_batch = np.stack(obs_batch, 1)
-            actions_batch = np.stack(actions_batch, 1)
-            if self.available_actions is not None:
-                available_actions_batch = np.stack(available_actions_batch, 1)
-            value_preds_batch = np.stack(value_preds_batch, 1)
-            return_batch = np.stack(return_batch, 1)
-            masks_batch = np.stack(masks_batch, 1)
-            active_masks_batch = np.stack(active_masks_batch, 1)
-            old_action_log_probs_batch = np.stack(old_action_log_probs_batch,
-                                                  1)
-            adv_targ = np.stack(adv_targ, 1)
-
-            # States is just a (N, -1) from_numpy [N[1,dim]]
-            rnn_states_batch = np.stack(rnn_states_batch,
-                                        1).reshape(N,
-                                                   *self.rnn_states.shape[2:])
-            rnn_states_critic_batch = np.stack(
-                rnn_states_critic_batch,
-                1).reshape(N, *self.rnn_states_critic.shape[2:])
-
-            # Flatten the (T, N, ...) from_numpys to (T * N, ...)
-            share_obs_batch = _flatten(T, N, share_obs_batch)
-            obs_batch = _flatten(T, N, obs_batch)
-            actions_batch = _flatten(T, N, actions_batch)
-            if self.available_actions is not None:
-                available_actions_batch = _flatten(T, N,
-                                                   available_actions_batch)
-            else:
-                available_actions_batch = None
-            value_preds_batch = _flatten(T, N, value_preds_batch)
-            return_batch = _flatten(T, N, return_batch)
-            masks_batch = _flatten(T, N, masks_batch)
-            active_masks_batch = _flatten(T, N, active_masks_batch)
-            old_action_log_probs_batch = _flatten(T, N,
-                                                  old_action_log_probs_batch)
-            adv_targ = _flatten(T, N, adv_targ)
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch
-
-    def recurrent_generator(self, advantages, num_mini_batch,
-                            data_chunk_length):
-        episode_length, n_rollout_threads = self.rewards.shape[0:2]
-        batch_size = n_rollout_threads * episode_length
-        data_chunks = batch_size // data_chunk_length  # [C=r*T/L]
-        mini_batch_size = data_chunks // num_mini_batch
-
-        assert episode_length * n_rollout_threads >= data_chunk_length, (
-            "PPO requires the number of processes ({}) * episode length ({}) "
-            "to be greater than or equal to the number of "
-            "data chunk length ({}).".format(n_rollout_threads, episode_length,
-                                             data_chunk_length))
-        assert data_chunks >= 2, ("need larger batch size")
-
-        rand = torch.randperm(data_chunks).numpy()
-        sampler = [
-            rand[i * mini_batch_size:(i + 1) * mini_batch_size]
-            for i in range(num_mini_batch)
-        ]
-
-        if len(self.share_obs.shape) > 3:
-            share_obs = self.share_obs[:-1].transpose(1, 0, 2, 3, 4).reshape(
-                -1, *self.share_obs.shape[2:])
-            obs = self.obs[:-1].transpose(1, 0, 2, 3,
-                                          4).reshape(-1, *self.obs.shape[2:])
-        else:
-            share_obs = _cast(self.share_obs[:-1])
-            obs = _cast(self.obs[:-1])
-
-        actions = _cast(self.actions)
-        action_log_probs = _cast(self.action_log_probs)
-        advantages = _cast(advantages)
-        value_preds = _cast(self.value_preds[:-1])
-        returns = _cast(self.returns[:-1])
-        masks = _cast(self.masks[:-1])
-        active_masks = _cast(self.active_masks[:-1])
-        # rnn_states = _cast(self.rnn_states[:-1])
-        # rnn_states_critic = _cast(self.rnn_states_critic[:-1])
-        rnn_states = self.rnn_states[:-1].transpose(1, 0, 2, 3).reshape(
-            -1, *self.rnn_states.shape[2:])
-        rnn_states_critic = self.rnn_states_critic[:-1].transpose(
-            1, 0, 2, 3).reshape(-1, *self.rnn_states_critic.shape[2:])
-
-        if self.available_actions is not None:
-            available_actions = _cast(self.available_actions[:-1])
-
-        for indices in sampler:
-            share_obs_batch = []
-            obs_batch = []
-            rnn_states_batch = []
-            rnn_states_critic_batch = []
-            actions_batch = []
-            available_actions_batch = []
-            value_preds_batch = []
-            return_batch = []
-            masks_batch = []
-            active_masks_batch = []
-            old_action_log_probs_batch = []
-            adv_targ = []
-
-            for index in indices:
-                ind = index * data_chunk_length
-                # size [T+1 N M Dim]-->[T N Dim]-->[N T Dim]-->[T*N,Dim]-->[L,Dim]
-                share_obs_batch.append(share_obs[ind:ind + data_chunk_length])
-                obs_batch.append(obs[ind:ind + data_chunk_length])
-                actions_batch.append(actions[ind:ind + data_chunk_length])
-                if self.available_actions is not None:
-                    available_actions_batch.append(
-                        available_actions[ind:ind + data_chunk_length])
-                value_preds_batch.append(value_preds[ind:ind +
-                                                     data_chunk_length])
-                return_batch.append(returns[ind:ind + data_chunk_length])
-                masks_batch.append(masks[ind:ind + data_chunk_length])
-                active_masks_batch.append(active_masks[ind:ind +
-                                                       data_chunk_length])
-                old_action_log_probs_batch.append(
-                    action_log_probs[ind:ind + data_chunk_length])
-                adv_targ.append(advantages[ind:ind + data_chunk_length])
-                # size [T+1 N Dim]-->[T N Dim]-->[T*N,Dim]-->[1,Dim]
-                rnn_states_batch.append(rnn_states[ind])
-                rnn_states_critic_batch.append(rnn_states_critic[ind])
-
-            L, N = data_chunk_length, mini_batch_size
-
-            # These are all from_numpys of size (N, L, Dim)
-            share_obs_batch = np.stack(share_obs_batch)
-            obs_batch = np.stack(obs_batch)
-
-            actions_batch = np.stack(actions_batch)
-            if self.available_actions is not None:
-                available_actions_batch = np.stack(available_actions_batch)
-            value_preds_batch = np.stack(value_preds_batch)
-            return_batch = np.stack(return_batch)
-            masks_batch = np.stack(masks_batch)
-            active_masks_batch = np.stack(active_masks_batch)
-            old_action_log_probs_batch = np.stack(old_action_log_probs_batch)
-            adv_targ = np.stack(adv_targ)
-
-            # States is just a (N, -1) from_numpy
-            rnn_states_batch = np.stack(rnn_states_batch).reshape(
-                N, *self.rnn_states.shape[2:])
-            rnn_states_critic_batch = np.stack(
-                rnn_states_critic_batch).reshape(
-                    N, *self.rnn_states_critic.shape[2:])
-
-            # Flatten the (L, N, ...) from_numpys to (L * N, ...)
-            share_obs_batch = _flatten(L, N, share_obs_batch)
-            obs_batch = _flatten(L, N, obs_batch)
-            actions_batch = _flatten(L, N, actions_batch)
-            if self.available_actions is not None:
-                available_actions_batch = _flatten(L, N,
-                                                   available_actions_batch)
-            else:
-                available_actions_batch = None
-            value_preds_batch = _flatten(L, N, value_preds_batch)
-            return_batch = _flatten(L, N, return_batch)
-            masks_batch = _flatten(L, N, masks_batch)
-            active_masks_batch = _flatten(L, N, active_masks_batch)
-            old_action_log_probs_batch = _flatten(L, N,
-                                                  old_action_log_probs_batch)
-            adv_targ = _flatten(L, N, adv_targ)
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch
diff --git a/algos/ppo/utils/shared_buffer.py b/algos/ppo/utils/shared_buffer.py
deleted file mode 100644
index 5bd6c20a..00000000
--- a/algos/ppo/utils/shared_buffer.py
+++ /dev/null
@@ -1,584 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import torch
-import numpy as np
-from algos.ppo.utils.util import get_shape_from_obs_space, get_shape_from_act_space
-
-
-def _flatten(T, N, x):
-    return x.reshape(T * N, *x.shape[2:])
-
-
-def _cast(x):
-    return x.transpose(1, 2, 0, 3).reshape(-1, *x.shape[3:])
-
-
-class SharedReplayBuffer(object):
-    """
-    Buffer to store training data.
-    :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information.
-    :param num_agents: (int) number of agents in the env.
-    :param obs_space: (gym.Space) observation space of agents.
-    :param cent_obs_space: (gym.Space) centralized observation space of agents.
-    :param act_space: (gym.Space) action space for agents.
-    """
-
-    def __init__(self, args, num_agents, obs_space, cent_obs_space, act_space):
-        self.episode_length = args.episode_length
-        self.n_rollout_threads = args.n_rollout_threads
-        self.hidden_size = args.hidden_size
-        self.recurrent_N = args.recurrent_N
-        self.gamma = args.gamma
-        self.gae_lambda = args.gae_lambda
-        self._use_gae = args.use_gae
-        self._use_popart = args.use_popart
-        self._use_valuenorm = args.use_valuenorm
-        self._use_proper_time_limits = args.use_proper_time_limits
-
-        obs_shape = get_shape_from_obs_space(obs_space)
-        share_obs_shape = get_shape_from_obs_space(cent_obs_space)
-
-        if type(obs_shape[-1]) == list:
-            obs_shape = obs_shape[:1]
-
-        if type(share_obs_shape[-1]) == list:
-            share_obs_shape = share_obs_shape[:1]
-
-        self.share_obs = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, num_agents,
-             *share_obs_shape),
-            dtype=np.float32)
-        self.obs = np.zeros((self.episode_length + 1, self.n_rollout_threads,
-                             num_agents, *obs_shape),
-                            dtype=np.float32)
-
-        self.rnn_states = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, num_agents,
-             self.recurrent_N, self.hidden_size),
-            dtype=np.float32)
-        self.rnn_states_critic = np.zeros_like(self.rnn_states)
-
-        self.value_preds = np.zeros(
-            (self.episode_length + 1, self.n_rollout_threads, num_agents, 1),
-            dtype=np.float32)
-        self.returns = np.zeros_like(self.value_preds)
-
-        if act_space.__class__.__name__ == 'Discrete':
-            self.available_actions = np.ones(
-                (self.episode_length + 1, self.n_rollout_threads, num_agents,
-                 act_space.n),
-                dtype=np.float32)
-        else:
-            self.available_actions = None
-
-        act_shape = get_shape_from_act_space(act_space)
-
-        self.actions = np.zeros((self.episode_length, self.n_rollout_threads,
-                                 num_agents, act_shape),
-                                dtype=np.float32)
-        self.action_log_probs = np.zeros(
-            (self.episode_length, self.n_rollout_threads, num_agents,
-             act_shape),
-            dtype=np.float32)
-        self.rewards = np.zeros(
-            (self.episode_length, self.n_rollout_threads, num_agents, 1),
-            dtype=np.float32)
-
-        self.masks = np.ones(
-            (self.episode_length + 1, self.n_rollout_threads, num_agents, 1),
-            dtype=np.float32)
-        self.bad_masks = np.ones_like(self.masks)
-        self.active_masks = np.ones_like(self.masks)
-
-        self.step = 0
-
-    def insert(self,
-               share_obs,
-               obs,
-               rnn_states_actor,
-               rnn_states_critic,
-               actions,
-               action_log_probs,
-               value_preds,
-               rewards,
-               masks,
-               bad_masks=None,
-               active_masks=None,
-               available_actions=None):
-        """
-        Insert data into the buffer.
-        :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information.
-        :param obs: (np.ndarray) local agent observations.
-        :param rnn_states_actor: (np.ndarray) RNN states for actor network.
-        :param rnn_states_critic: (np.ndarray) RNN states for critic network.
-        :param actions:(np.ndarray) actions taken by agents.
-        :param action_log_probs:(np.ndarray) log probs of actions taken by agents
-        :param value_preds: (np.ndarray) value function prediction at each step.
-        :param rewards: (np.ndarray) reward collected at each step.
-        :param masks: (np.ndarray) denotes whether the environment has terminated or not.
-        :param bad_masks: (np.ndarray) action space for agents.
-        :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env.
-        :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available.
-        """
-        self.share_obs[self.step + 1] = share_obs.copy()
-        self.obs[self.step + 1] = obs.copy()
-        self.rnn_states[self.step + 1] = rnn_states_actor.copy()
-        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
-        self.actions[self.step] = actions.copy()
-        self.action_log_probs[self.step] = action_log_probs.copy()
-        self.value_preds[self.step] = value_preds.copy()
-        self.rewards[self.step] = rewards.copy()
-        self.masks[self.step + 1] = masks.copy()
-        if bad_masks is not None:
-            self.bad_masks[self.step + 1] = bad_masks.copy()
-        if active_masks is not None:
-            self.active_masks[self.step + 1] = active_masks.copy()
-        if available_actions is not None:
-            self.available_actions[self.step + 1] = available_actions.copy()
-
-        self.step = (self.step + 1) % self.episode_length
-
-    def chooseinsert(self,
-                     share_obs,
-                     obs,
-                     rnn_states,
-                     rnn_states_critic,
-                     actions,
-                     action_log_probs,
-                     value_preds,
-                     rewards,
-                     masks,
-                     bad_masks=None,
-                     active_masks=None,
-                     available_actions=None):
-        """
-        Insert data into the buffer. This insert function is used specifically for Hanabi, which is turn based.
-        :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information.
-        :param obs: (np.ndarray) local agent observations.
-        :param rnn_states_actor: (np.ndarray) RNN states for actor network.
-        :param rnn_states_critic: (np.ndarray) RNN states for critic network.
-        :param actions:(np.ndarray) actions taken by agents.
-        :param action_log_probs:(np.ndarray) log probs of actions taken by agents
-        :param value_preds: (np.ndarray) value function prediction at each step.
-        :param rewards: (np.ndarray) reward collected at each step.
-        :param masks: (np.ndarray) denotes whether the environment has terminated or not.
-        :param bad_masks: (np.ndarray) denotes indicate whether whether true terminal state or due to episode limit
-        :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env.
-        :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available.
-        """
-        self.share_obs[self.step] = share_obs.copy()
-        self.obs[self.step] = obs.copy()
-        self.rnn_states[self.step + 1] = rnn_states.copy()
-        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
-        self.actions[self.step] = actions.copy()
-        self.action_log_probs[self.step] = action_log_probs.copy()
-        self.value_preds[self.step] = value_preds.copy()
-        self.rewards[self.step] = rewards.copy()
-        self.masks[self.step + 1] = masks.copy()
-        if bad_masks is not None:
-            self.bad_masks[self.step + 1] = bad_masks.copy()
-        if active_masks is not None:
-            self.active_masks[self.step] = active_masks.copy()
-        if available_actions is not None:
-            self.available_actions[self.step] = available_actions.copy()
-
-        self.step = (self.step + 1) % self.episode_length
-
-    def after_update(self):
-        """Copy last timestep data to first index. Called after update to model."""
-        self.share_obs[0] = self.share_obs[-1].copy()
-        self.obs[0] = self.obs[-1].copy()
-        self.rnn_states[0] = self.rnn_states[-1].copy()
-        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
-        self.masks[0] = self.masks[-1].copy()
-        self.bad_masks[0] = self.bad_masks[-1].copy()
-        self.active_masks[0] = self.active_masks[-1].copy()
-        if self.available_actions is not None:
-            self.available_actions[0] = self.available_actions[-1].copy()
-
-    def chooseafter_update(self):
-        """Copy last timestep data to first index. This method is used for Hanabi."""
-        self.rnn_states[0] = self.rnn_states[-1].copy()
-        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
-        self.masks[0] = self.masks[-1].copy()
-        self.bad_masks[0] = self.bad_masks[-1].copy()
-
-    def compute_returns(self, next_value, value_normalizer=None):
-        """
-        Compute returns either as discounted sum of rewards, or using GAE.
-        :param next_value: (np.ndarray) value predictions for the step after the last episode step.
-        :param value_normalizer: (PopArt) If not None, PopArt value normalizer instance.
-        """
-        if self._use_proper_time_limits:
-            if self._use_gae:
-                self.value_preds[-1] = next_value
-                gae = 0
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart or self._use_valuenorm:
-                        # step + 1
-                        delta = self.rewards[step] + self.gamma * value_normalizer.denormalize(
-                            self.value_preds[step + 1]) * self.masks[step + 1] \
-                                - value_normalizer.denormalize(self.value_preds[step])
-                        gae = delta + self.gamma * self.gae_lambda * gae * self.masks[
-                            step + 1]
-                        gae = gae * self.bad_masks[step + 1]
-                        self.returns[
-                            step] = gae + value_normalizer.denormalize(
-                                self.value_preds[step])
-                    else:
-                        delta = self.rewards[step] + self.gamma * self.value_preds[step + 1] * self.masks[step + 1] - \
-                                self.value_preds[step]
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        gae = gae * self.bad_masks[step + 1]
-                        self.returns[step] = gae + self.value_preds[step]
-            else:
-                self.returns[-1] = next_value
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart or self._use_valuenorm:
-                        self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[
-                            step]) * self.bad_masks[step + 1] \
-                                             + (1 - self.bad_masks[step + 1]) * value_normalizer.denormalize(
-                            self.value_preds[step])
-                    else:
-                        self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[
-                            step]) * self.bad_masks[step + 1] \
-                                             + (1 - self.bad_masks[step + 1]) * self.value_preds[step]
-        else:
-            if self._use_gae:
-                self.value_preds[-1] = next_value
-                gae = 0
-                for step in reversed(range(self.rewards.shape[0])):
-                    if self._use_popart or self._use_valuenorm:
-                        delta = self.rewards[step] + self.gamma * value_normalizer.denormalize(
-                            self.value_preds[step + 1]) * self.masks[step + 1] \
-                                - value_normalizer.denormalize(self.value_preds[step])
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        self.returns[
-                            step] = gae + value_normalizer.denormalize(
-                                self.value_preds[step])
-                    else:
-                        delta = self.rewards[step] + self.gamma * self.value_preds[step + 1] * self.masks[step + 1] - \
-                                self.value_preds[step]
-                        gae = delta + self.gamma * self.gae_lambda * self.masks[
-                            step + 1] * gae
-                        self.returns[step] = gae + self.value_preds[step]
-            else:
-                self.returns[-1] = next_value
-                for step in reversed(range(self.rewards.shape[0])):
-                    self.returns[step] = self.returns[
-                        step + 1] * self.gamma * self.masks[
-                            step + 1] + self.rewards[step]
-
-    def feed_forward_generator(self,
-                               advantages,
-                               num_mini_batch=None,
-                               mini_batch_size=None):
-        """
-        Yield training data for MLP policies.
-        :param advantages: (np.ndarray) advantage estimates.
-        :param num_mini_batch: (int) number of minibatches to split the batch into.
-        :param mini_batch_size: (int) number of samples in each minibatch.
-        """
-        episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3]
-        batch_size = n_rollout_threads * episode_length * num_agents
-
-        if mini_batch_size is None:
-            assert batch_size >= num_mini_batch, (
-                "PPO requires the number of processes ({}) "
-                "* number of steps ({}) * number of agents ({}) = {} "
-                "to be greater than or equal to the number of PPO mini batches ({})."
-                "".format(n_rollout_threads, episode_length, num_agents,
-                          n_rollout_threads * episode_length * num_agents,
-                          num_mini_batch))
-            mini_batch_size = batch_size // num_mini_batch
-
-        rand = torch.randperm(batch_size).numpy()
-        sampler = [
-            rand[i * mini_batch_size:(i + 1) * mini_batch_size]
-            for i in range(num_mini_batch)
-        ]
-
-        share_obs = self.share_obs[:-1].reshape(-1, *self.share_obs.shape[3:])
-        obs = self.obs[:-1].reshape(-1, *self.obs.shape[3:])
-        rnn_states = self.rnn_states[:-1].reshape(-1,
-                                                  *self.rnn_states.shape[3:])
-        rnn_states_critic = self.rnn_states_critic[:-1].reshape(
-            -1, *self.rnn_states_critic.shape[3:])
-        actions = self.actions.reshape(-1, self.actions.shape[-1])
-        if self.available_actions is not None:
-            available_actions = self.available_actions[:-1].reshape(
-                -1, self.available_actions.shape[-1])
-        value_preds = self.value_preds[:-1].reshape(-1, 1)
-        returns = self.returns[:-1].reshape(-1, 1)
-        masks = self.masks[:-1].reshape(-1, 1)
-        active_masks = self.active_masks[:-1].reshape(-1, 1)
-        action_log_probs = self.action_log_probs.reshape(
-            -1, self.action_log_probs.shape[-1])
-        advantages = advantages.reshape(-1, 1)
-
-        for indices in sampler:
-            # obs size [T+1 N M Dim]-->[T N M Dim]-->[T*N*M,Dim]-->[index,Dim]
-            share_obs_batch = share_obs[indices]
-            obs_batch = obs[indices]
-            rnn_states_batch = rnn_states[indices]
-            rnn_states_critic_batch = rnn_states_critic[indices]
-            actions_batch = actions[indices]
-            if self.available_actions is not None:
-                available_actions_batch = available_actions[indices]
-            else:
-                available_actions_batch = None
-            value_preds_batch = value_preds[indices]
-            return_batch = returns[indices]
-            masks_batch = masks[indices]
-            active_masks_batch = active_masks[indices]
-            old_action_log_probs_batch = action_log_probs[indices]
-            if advantages is None:
-                adv_targ = None
-            else:
-                adv_targ = advantages[indices]
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\
-                  value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\
-                  adv_targ, available_actions_batch
-
-    def naive_recurrent_generator(self, advantages, num_mini_batch):
-        """
-        Yield training data for non-chunked RNN training.
-        :param advantages: (np.ndarray) advantage estimates.
-        :param num_mini_batch: (int) number of minibatches to split the batch into.
-        """
-        episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3]
-        batch_size = n_rollout_threads * num_agents
-        assert n_rollout_threads * num_agents >= num_mini_batch, (
-            "PPO requires the number of processes ({})* number of agents ({}) "
-            "to be greater than or equal to the number of "
-            "PPO mini batches ({}).".format(n_rollout_threads, num_agents,
-                                            num_mini_batch))
-        num_envs_per_batch = batch_size // num_mini_batch
-        perm = torch.randperm(batch_size).numpy()
-
-        share_obs = self.share_obs.reshape(-1, batch_size,
-                                           *self.share_obs.shape[3:])
-        obs = self.obs.reshape(-1, batch_size, *self.obs.shape[3:])
-        rnn_states = self.rnn_states.reshape(-1, batch_size,
-                                             *self.rnn_states.shape[3:])
-        rnn_states_critic = self.rnn_states_critic.reshape(
-            -1, batch_size, *self.rnn_states_critic.shape[3:])
-        actions = self.actions.reshape(-1, batch_size, self.actions.shape[-1])
-        if self.available_actions is not None:
-            available_actions = self.available_actions.reshape(
-                -1, batch_size, self.available_actions.shape[-1])
-        value_preds = self.value_preds.reshape(-1, batch_size, 1)
-        returns = self.returns.reshape(-1, batch_size, 1)
-        masks = self.masks.reshape(-1, batch_size, 1)
-        active_masks = self.active_masks.reshape(-1, batch_size, 1)
-        action_log_probs = self.action_log_probs.reshape(
-            -1, batch_size, self.action_log_probs.shape[-1])
-        advantages = advantages.reshape(-1, batch_size, 1)
-
-        for start_ind in range(0, batch_size, num_envs_per_batch):
-            share_obs_batch = []
-            obs_batch = []
-            rnn_states_batch = []
-            rnn_states_critic_batch = []
-            actions_batch = []
-            available_actions_batch = []
-            value_preds_batch = []
-            return_batch = []
-            masks_batch = []
-            active_masks_batch = []
-            old_action_log_probs_batch = []
-            adv_targ = []
-
-            for offset in range(num_envs_per_batch):
-                ind = perm[start_ind + offset]
-                share_obs_batch.append(share_obs[:-1, ind])
-                obs_batch.append(obs[:-1, ind])
-                rnn_states_batch.append(rnn_states[0:1, ind])
-                rnn_states_critic_batch.append(rnn_states_critic[0:1, ind])
-                actions_batch.append(actions[:, ind])
-                if self.available_actions is not None:
-                    available_actions_batch.append(available_actions[:-1, ind])
-                value_preds_batch.append(value_preds[:-1, ind])
-                return_batch.append(returns[:-1, ind])
-                masks_batch.append(masks[:-1, ind])
-                active_masks_batch.append(active_masks[:-1, ind])
-                old_action_log_probs_batch.append(action_log_probs[:, ind])
-                adv_targ.append(advantages[:, ind])
-
-            # [N[T, dim]]
-            T, N = self.episode_length, num_envs_per_batch
-            # These are all from_numpys of size (T, N, -1)
-            share_obs_batch = np.stack(share_obs_batch, 1)
-            obs_batch = np.stack(obs_batch, 1)
-            actions_batch = np.stack(actions_batch, 1)
-            if self.available_actions is not None:
-                available_actions_batch = np.stack(available_actions_batch, 1)
-            value_preds_batch = np.stack(value_preds_batch, 1)
-            return_batch = np.stack(return_batch, 1)
-            masks_batch = np.stack(masks_batch, 1)
-            active_masks_batch = np.stack(active_masks_batch, 1)
-            old_action_log_probs_batch = np.stack(old_action_log_probs_batch,
-                                                  1)
-            adv_targ = np.stack(adv_targ, 1)
-
-            # States is just a (N, dim) from_numpy [N[1,dim]]
-            rnn_states_batch = np.stack(rnn_states_batch).reshape(
-                N, *self.rnn_states.shape[3:])
-            rnn_states_critic_batch = np.stack(
-                rnn_states_critic_batch).reshape(
-                    N, *self.rnn_states_critic.shape[3:])
-
-            # Flatten the (T, N, ...) from_numpys to (T * N, ...)
-            share_obs_batch = _flatten(T, N, share_obs_batch)
-            obs_batch = _flatten(T, N, obs_batch)
-            actions_batch = _flatten(T, N, actions_batch)
-            if self.available_actions is not None:
-                available_actions_batch = _flatten(T, N,
-                                                   available_actions_batch)
-            else:
-                available_actions_batch = None
-            value_preds_batch = _flatten(T, N, value_preds_batch)
-            return_batch = _flatten(T, N, return_batch)
-            masks_batch = _flatten(T, N, masks_batch)
-            active_masks_batch = _flatten(T, N, active_masks_batch)
-            old_action_log_probs_batch = _flatten(T, N,
-                                                  old_action_log_probs_batch)
-            adv_targ = _flatten(T, N, adv_targ)
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\
-                  value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\
-                  adv_targ, available_actions_batch
-
-    def recurrent_generator(self, advantages, num_mini_batch,
-                            data_chunk_length):
-        """
-        Yield training data for chunked RNN training.
-        :param advantages: (np.ndarray) advantage estimates.
-        :param num_mini_batch: (int) number of minibatches to split the batch into.
-        :param data_chunk_length: (int) length of sequence chunks with which to train RNN.
-        """
-        episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3]
-        batch_size = n_rollout_threads * episode_length * num_agents
-        data_chunks = batch_size // data_chunk_length  # [C=r*T*M/L]
-        mini_batch_size = data_chunks // num_mini_batch
-
-        rand = torch.randperm(data_chunks).numpy()
-        sampler = [
-            rand[i * mini_batch_size:(i + 1) * mini_batch_size]
-            for i in range(num_mini_batch)
-        ]
-
-        if len(self.share_obs.shape) > 4:
-            share_obs = self.share_obs[:-1].transpose(
-                1, 2, 0, 3, 4, 5).reshape(-1, *self.share_obs.shape[3:])
-            obs = self.obs[:-1].transpose(1, 2, 0, 3, 4,
-                                          5).reshape(-1, *self.obs.shape[3:])
-        else:
-            share_obs = _cast(self.share_obs[:-1])
-            obs = _cast(self.obs[:-1])
-
-        actions = _cast(self.actions)
-        action_log_probs = _cast(self.action_log_probs)
-        advantages = _cast(advantages)
-        value_preds = _cast(self.value_preds[:-1])
-        returns = _cast(self.returns[:-1])
-        masks = _cast(self.masks[:-1])
-        active_masks = _cast(self.active_masks[:-1])
-        # rnn_states = _cast(self.rnn_states[:-1])
-        # rnn_states_critic = _cast(self.rnn_states_critic[:-1])
-        rnn_states = self.rnn_states[:-1].transpose(1, 2, 0, 3, 4).reshape(
-            -1, *self.rnn_states.shape[3:])
-        rnn_states_critic = self.rnn_states_critic[:-1].transpose(
-            1, 2, 0, 3, 4).reshape(-1, *self.rnn_states_critic.shape[3:])
-
-        if self.available_actions is not None:
-            available_actions = _cast(self.available_actions[:-1])
-
-        for indices in sampler:
-            share_obs_batch = []
-            obs_batch = []
-            rnn_states_batch = []
-            rnn_states_critic_batch = []
-            actions_batch = []
-            available_actions_batch = []
-            value_preds_batch = []
-            return_batch = []
-            masks_batch = []
-            active_masks_batch = []
-            old_action_log_probs_batch = []
-            adv_targ = []
-
-            for index in indices:
-
-                ind = index * data_chunk_length
-                # size [T+1 N M Dim]-->[T N M Dim]-->[N,M,T,Dim]-->[N*M*T,Dim]-->[L,Dim]
-                share_obs_batch.append(share_obs[ind:ind + data_chunk_length])
-                obs_batch.append(obs[ind:ind + data_chunk_length])
-                actions_batch.append(actions[ind:ind + data_chunk_length])
-                if self.available_actions is not None:
-                    available_actions_batch.append(
-                        available_actions[ind:ind + data_chunk_length])
-                value_preds_batch.append(value_preds[ind:ind +
-                                                     data_chunk_length])
-                return_batch.append(returns[ind:ind + data_chunk_length])
-                masks_batch.append(masks[ind:ind + data_chunk_length])
-                active_masks_batch.append(active_masks[ind:ind +
-                                                       data_chunk_length])
-                old_action_log_probs_batch.append(
-                    action_log_probs[ind:ind + data_chunk_length])
-                adv_targ.append(advantages[ind:ind + data_chunk_length])
-                # size [T+1 N M Dim]-->[T N M Dim]-->[N M T Dim]-->[N*M*T,Dim]-->[1,Dim]
-                rnn_states_batch.append(rnn_states[ind])
-                rnn_states_critic_batch.append(rnn_states_critic[ind])
-
-            L, N = data_chunk_length, mini_batch_size
-
-            # These are all from_numpys of size (L, N, Dim)
-            share_obs_batch = np.stack(share_obs_batch, axis=1)
-            obs_batch = np.stack(obs_batch, axis=1)
-
-            actions_batch = np.stack(actions_batch, axis=1)
-            if self.available_actions is not None:
-                available_actions_batch = np.stack(available_actions_batch,
-                                                   axis=1)
-            value_preds_batch = np.stack(value_preds_batch, axis=1)
-            return_batch = np.stack(return_batch, axis=1)
-            masks_batch = np.stack(masks_batch, axis=1)
-            active_masks_batch = np.stack(active_masks_batch, axis=1)
-            old_action_log_probs_batch = np.stack(old_action_log_probs_batch,
-                                                  axis=1)
-            adv_targ = np.stack(adv_targ, axis=1)
-
-            # States is just a (N, -1) from_numpy
-            rnn_states_batch = np.stack(rnn_states_batch).reshape(
-                N, *self.rnn_states.shape[3:])
-            rnn_states_critic_batch = np.stack(
-                rnn_states_critic_batch).reshape(
-                    N, *self.rnn_states_critic.shape[3:])
-
-            # Flatten the (L, N, ...) from_numpys to (L * N, ...)
-            share_obs_batch = _flatten(L, N, share_obs_batch)
-            obs_batch = _flatten(L, N, obs_batch)
-            actions_batch = _flatten(L, N, actions_batch)
-            if self.available_actions is not None:
-                available_actions_batch = _flatten(L, N,
-                                                   available_actions_batch)
-            else:
-                available_actions_batch = None
-            value_preds_batch = _flatten(L, N, value_preds_batch)
-            return_batch = _flatten(L, N, return_batch)
-            masks_batch = _flatten(L, N, masks_batch)
-            active_masks_batch = _flatten(L, N, active_masks_batch)
-            old_action_log_probs_batch = _flatten(L, N,
-                                                  old_action_log_probs_batch)
-            adv_targ = _flatten(L, N, adv_targ)
-
-            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\
-                  value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\
-                  adv_targ, available_actions_batch
diff --git a/algos/ppo/utils/util.py b/algos/ppo/utils/util.py
deleted file mode 100644
index 7e23b9ea..00000000
--- a/algos/ppo/utils/util.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import numpy as np
-import math
-import torch
-
-
-def check(input):
-    if type(input) == np.ndarray:
-        return torch.from_numpy(input)
-
-
-def get_gard_norm(it):
-    sum_grad = 0
-    for x in it:
-        if x.grad is None:
-            continue
-        sum_grad += x.grad.norm()**2
-    return math.sqrt(sum_grad)
-
-
-def update_linear_schedule(optimizer, epoch, total_num_epochs, initial_lr):
-    """Decreases the learning rate linearly"""
-    lr = initial_lr - (initial_lr * (epoch / float(total_num_epochs)))
-    for param_group in optimizer.param_groups:
-        param_group['lr'] = lr
-
-
-def huber_loss(e, d):
-    a = (abs(e) <= d).float()
-    b = (e > d).float()
-    return a * e**2 / 2 + b * d * (abs(e) - d / 2)
-
-
-def mse_loss(e):
-    return e**2 / 2
-
-
-def get_shape_from_obs_space(obs_space):
-    if obs_space.__class__.__name__ == 'Box':
-        obs_shape = obs_space.shape
-    elif obs_space.__class__.__name__ == 'list':
-        obs_shape = obs_space
-    else:
-        raise NotImplementedError
-    return obs_shape
-
-
-def get_shape_from_act_space(act_space):
-    if act_space.__class__.__name__ == 'Discrete':
-        act_shape = 1
-    elif act_space.__class__.__name__ == "MultiDiscrete":
-        act_shape = act_space.shape
-    elif act_space.__class__.__name__ == "Box":
-        act_shape = act_space.shape[0]
-    elif act_space.__class__.__name__ == "MultiBinary":
-        act_shape = act_space.shape[0]
-    else:  # agar
-        act_shape = act_space[0].shape[0] + 1
-    return act_shape
-
-
-def tile_images(img_nhwc):
-    """
-    Tile N images into one big PxQ image
-    (P,Q) are chosen to be as close as possible, and if N
-    is square, then P=Q.
-    input: img_nhwc, list or array of images, ndim=4 once turned into array
-        n = batch index, h = height, w = width, c = channel
-    returns:
-        bigim_HWc, ndarray with ndim=3
-    """
-    img_nhwc = np.asarray(img_nhwc)
-    N, h, w, c = img_nhwc.shape
-    H = int(np.ceil(np.sqrt(N)))
-    W = int(np.ceil(float(N) / H))
-    img_nhwc = np.array(
-        list(img_nhwc) + [img_nhwc[0] * 0 for _ in range(N, H * W)])
-    img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
-    img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
-    img_Hh_Ww_c = img_HhWwc.reshape(H * h, W * w, c)
-    return img_Hh_Ww_c
diff --git a/algos/ppo/utils/valuenorm.py b/algos/ppo/utils/valuenorm.py
deleted file mode 100644
index 76df255d..00000000
--- a/algos/ppo/utils/valuenorm.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-import numpy as np
-
-import torch
-import torch.nn as nn
-
-
-class ValueNorm(nn.Module):
-    """ Normalize a vector of observations - across the first norm_axes dimensions"""
-
-    def __init__(self,
-                 input_shape,
-                 norm_axes=1,
-                 beta=0.99999,
-                 per_element_update=False,
-                 epsilon=1e-5,
-                 device=torch.device("cpu")):
-        super(ValueNorm, self).__init__()
-
-        self.input_shape = input_shape
-        self.norm_axes = norm_axes
-        self.epsilon = epsilon
-        self.beta = beta
-        self.per_element_update = per_element_update
-        self.tpdv = dict(dtype=torch.float32, device=device)
-
-        self.running_mean = nn.Parameter(torch.zeros(input_shape),
-                                         requires_grad=False).to(**self.tpdv)
-        self.running_mean_sq = nn.Parameter(
-            torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
-        self.debiasing_term = nn.Parameter(torch.tensor(0.0),
-                                           requires_grad=False).to(**self.tpdv)
-
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        self.running_mean.zero_()
-        self.running_mean_sq.zero_()
-        self.debiasing_term.zero_()
-
-    def running_mean_var(self):
-        debiased_mean = self.running_mean / self.debiasing_term.clamp(
-            min=self.epsilon)
-        debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp(
-            min=self.epsilon)
-        debiased_var = (debiased_mean_sq - debiased_mean**2).clamp(min=1e-2)
-        return debiased_mean, debiased_var
-
-    @torch.no_grad()
-    def update(self, input_vector):
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes)))
-        batch_sq_mean = (input_vector**2).mean(
-            dim=tuple(range(self.norm_axes)))
-
-        if self.per_element_update:
-            batch_size = np.prod(input_vector.size()[:self.norm_axes])
-            weight = self.beta**batch_size
-        else:
-            weight = self.beta
-
-        self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight))
-        self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight))
-        self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight))
-
-    def normalize(self, input_vector):
-        # Make sure input is float32
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        mean, var = self.running_mean_var()
-        out = (input_vector - mean[(None, ) * self.norm_axes]
-               ) / torch.sqrt(var)[(None, ) * self.norm_axes]
-
-        return out
-
-    def denormalize(self, input_vector):
-        """ Transform normalized data back into original distribution """
-        if type(input_vector) == np.ndarray:
-            input_vector = torch.from_numpy(input_vector)
-        input_vector = input_vector.to(**self.tpdv)
-
-        mean, var = self.running_mean_var()
-        out = input_vector * torch.sqrt(var)[(None, ) * self.norm_axes] + mean[
-            (None, ) * self.norm_axes]
-
-        out = out.cpu().numpy()
-
-        return out
diff --git a/setup.py b/build.py
similarity index 74%
rename from setup.py
rename to build.py
index 4863ae61..43af0ae5 100644
--- a/setup.py
+++ b/build.py
@@ -1,9 +1,4 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""Run via ```python setup.py develop``` to install Nocturne in your environment."""
+from pybind11.setup_helpers import build_ext, Pybind11Extension
 import logging
 import multiprocessing
 import os
@@ -12,18 +7,14 @@
 import sys
 
 from distutils.version import LooseVersion
-from setuptools import Extension, setup
-from setuptools.command.build_ext import build_ext
-
-# Reference:
-# https://www.benjack.io/2017/06/12/python-cpp-tests.html
 
+logging.basicConfig(level=logging.INFO)
 
-class CMakeExtension(Extension):
+class CMakeExtension(Pybind11Extension):
     """Use CMake to construct the Nocturne extension."""
 
     def __init__(self, name, src_dir=""):
-        Extension.__init__(self, name, sources=[])
+        Pybind11Extension.__init__(self, name, sources=[])
         self.src_dir = os.path.abspath(src_dir)
 
 
@@ -87,15 +78,9 @@ def build_extension(self, ext):
         print()  # Add an empty line for cleaner output
 
 
-def main():
-    """Build the C++ code."""
-    # with open("./requirements.txt", "r") as f:
-    #     requires = f.read().splitlines()
-    setup(
-        ext_modules=[CMakeExtension("nocturne", "./nocturne")],
-        cmdclass=dict(build_ext=CMakeBuild),
-    )
-
-
-if __name__ == "__main__":
-    main()
+def build(setup_kwargs):
+    setup_kwargs.update({
+        "ext_modules": [CMakeExtension("nocturne", "./nocturne")],
+        "cmdclass": {"build_ext": CMakeBuild},
+        "zip_safe": False,
+    })
diff --git a/cfgs/algorithm/APPO.yaml b/cfgs/algorithm/APPO.yaml
deleted file mode 100644
index 5c83b6e5..00000000
--- a/cfgs/algorithm/APPO.yaml
+++ /dev/null
@@ -1,208 +0,0 @@
-algo: APPO
-experiments_root: null 
-                 # If not None, store experiment data in the specified subfolder of train_dir. Useful for groups of experiments (e.g. gridsearch) (default: None)
-train_dir: null
-                        # Root for all experiments (default: /private/home/eugenevinitsky/Code/nocturne/examples/train_dir)
-                        # if null use the hydra default position
-device: gpu  # CPU training is only recommended for smaller e.g. MLP policies (default: gpu)
-save_every_sec: 120 # Checkpointing rate (default: 120)
-keep_checkpoints: 3 #Number of model checkpoints to keep (default: 3)
-save_milestones_sec: -1 #Save intermediate checkpoints in a separate folder for later evaluation (default=never) (default: -1)
-stats_avg: 100 #How many episodes to average to measure performance (avg. reward etc) (default: 100)
-learning_rate: 0.0001 # LR (default: 0.0001)
-train_for_env_steps: 3000000000 # Stop after all policies are trained for this many env steps (default: 10000000000)
-train_for_seconds: 10000000000 #Stop training after this many seconds (default: 10000000000)
-lr_schedule: constant #Learning rate schedule to use. Constant keeps constant learning rate throughout training.
-                  # kl_adaptive* schedulers look at --lr_schedule_kl_threshold and if KL-divergence with behavior policy'
-                  # after the last minibatch/epoch significantly deviates from this threshold, lr is apropriately'
-                  # increased or decreased
-                  # options are 'constant', 'kl_adaptive_minibatch', 'kl_adaptive_epoch'
-lr_schedule_kl_threshold: 0.008 #Used with kl_adaptive_* schedulers
-obs_subtract_mean: 0.0 # Observation preprocessing, mean value to subtract from observation (e.g. 128.0 for 8-bit RGB) (default: 0.0)
-obs_scale: 10.0 # Observation preprocessing, divide observation tensors by this scalar (e.g. 128.0 for 8-bit RGB) (default: 1.0)
-gamma: 0.99 # Discount factor (default: 0.99)
-reward_scale: 1.0
-              # Multiply all rewards by this factor before feeding into RL algorithm.Sometimes the overall scale of rewards is too high which makes value estimation a
-              # harder regression task.Loss values become too high which requires a smaller learning rate, etc. (default: 1.0)
-reward_clip: 10.0 # Clip rewards between [-c, c]. Default [-10, 10] virtually means no clipping for most envs (default: 10.0)
-encoder_type: mlp # Type of the encoder. Supported: conv, mlp, resnet (feel free to define more) (default: conv)
-encoder_subtype: mlp_mujoco # Specific encoder design (see model.py) (default: convnet_simple)
-encoder_custom: custom_env_encoder # Use custom encoder class from the registry (see model_utils.py) (default: null, options {null, custom_env_encoder})
-encoder_extra_fc_layers: 1 # Number of fully-connected layers of size "hidden size" to add after the basic encoder (e.g. convolutional) (default: 1)
-encoder_hidden_size: 256
-hidden_size: 256 # Size of hidden layer in the model, or the size of RNN hidden state in recurrent model (e.g. GRU) (default: 128)
-nonlinearity: tanh #  {elu,relu,tanh}
-                  #      Type of nonlinearity to use (default: elu)
-policy_initialization: orthogonal #  {orthogonal,xavier_uniform}
-                        # NN weight initialization (default: orthogonal)
-policy_init_gain: 1.0 # Gain parameter of PyTorch initialization schemas (i.e. Xavier) (default: 1.0)
-actor_critic_share_weights: True # Whether to share the weights between policy and value function (default: True)
-use_spectral_norm: False # Use spectral normalization to smoothen the gradients and stabilize training. Only supports fully connected layers (default: False)
-adaptive_stddev: True # Only for continuous action distributions, whether stddev is state-dependent or just a single learned parameter (default: True)
-initial_stddev: 1.0 # Initial value for non-adaptive stddev. Only makes sense for continuous action spaces (default: 1.0)
-experiment_summaries_interval: 20 # How often in seconds we write avg. statistics about the experiment (reward, episode length, extra stats...) (default: 20)
-adam_eps: 1e-06 # Adam epsilon parameter (1e-8 to 1e-5 seem to reliably work okay, 1e-3 and up does not work) (default: 1e-06)
-adam_beta1: 0.9 # Adam momentum decay coefficient (default: 0.9)
-adam_beta2: 0.999 # Adam second momentum decay coefficient (default: 0.999)
-gae_lambda: 0.95 # Generalized Advantage Estimation discounting (only used when V-trace is False (default: 0.95)
-rollout: 20
-#    Length of the rollout from each environment in timesteps.Once we collect this many timesteps on actor worker, we send this trajectory to the learner.The
-#    length of the rollout will determine how many timesteps are used to calculate bootstrappedMonte-Carlo estimates of discounted rewards, advantages, GAE,
-#    or V-trace targets. Shorter rolloutsreduce variance, but the estimates are less precise (bias vs variance tradeoff).For RNN policies, this should be a
-#    multiple of --recurrence, so every rollout will be splitinto (n = rollout / recurrence) segments for backpropagation. V-trace algorithm currently
-#    requires thatrollout == recurrence, which what you want most of the time anyway.Rollout length is independent from the episode length. Episode length
-#    can be both shorter or longer thanrollout, although for PBT training it is currently recommended that rollout << episode_len(see function
-#    finalize_trajectory in actor_worker.py) (default: 32)
-num_workers: 80 # Number of parallel environment workers. Should be less than num_envs and should divide num_envs (default: 80)
-recurrence: 20 # Trajectory length for backpropagation through time. If recurrence=1 there is no backpropagation through time, and experience is shuffled completely
-               #         randomlyFor V-trace recurrence should be equal to rollout length. (default: 32)
-use_rnn: True #     Whether to use RNN core in a policy or not (default: True)
-rnn_type: gru #  {gru,lstm}
-              #  Type of RNN cell to use if use_rnn is True (default: gru)
-rnn_num_layers: 1 # Number of RNN layers to use if use_rnn is True (default: 1)
-ppo_clip_ratio: 0.1 # We use unbiased clip(x, 1+e, 1/(1+e)) instead of clip(x, 1+e, 1-e) in the paper (default: 0.1)
-ppo_clip_value: 1.0 # Maximum absolute change in value estimate until it is clipped. Sensitive to value magnitude (default: 1.0)
-batch_size: 7180 # Minibatch size for SGD (default: 1024)
-num_batches_per_iteration: 1 
-# How many minibatches we collect before training on the collected experience. It is generally recommended to set this to 1 for most experiments, because
-# any higher value will increase the policy lag.But in some specific circumstances it can be beneficial to have a larger macro-batch in order to shuffle
-# and decorrelate the minibatches.Here and throughout the codebase: macro batch is the portion of experience that learner processes per iteration
-# (consisting of 1 or several minibatches) (default: 1)
-ppo_epochs: 1 # Number of training epochs before a new batch of experience is collected (default: 1)
-num_minibatches_to_accumulate: -1
-# This parameter governs the maximum number of minibatches the learner can accumulate before further experience collection is stopped.The default value
-# (-1) will set this to 2 * num_batches_per_iteration, so if the experience collection is faster than the training,the learner will accumulate enough
-# minibatches for 2 iterations of training (but no more). This is a good balance between policy-lag and throughput.When the limit is reached, the learner
-# will notify the actor workers that they ought to stop the experience collection until accumulated minibatchesare processed. Set this parameter to 1 *
-# num_batches_per_iteration to further reduce policy-lag.If the experience collection is very non-uniform, increasing this parameter can increase overall
-# throughput, at the cost of increased policy-lag.A value of 0 is treated specially. This means the experience accumulation is turned off, and all
-# experience collection will be halted during training.This is the regime with potentially lowest policy-lag.When this parameter is 0 and num_workers *
-# num_envs_per_worker * rollout == num_batches_per_iteration * batch_size, the algorithm is similar toregular synchronous PPO. (default: -1)
-max_grad_norm: 4.0 # Max L2 norm of the gradient vector (default: 4.0)
-exploration_loss_coeff: 0.001 # Coefficient for the exploration component of the loss function. (default: 0.001)
-value_loss_coeff: 0.5 # Coefficient for the critic loss (default: 0.5)
-kl_loss_coeff: 0.0 #Coefficient for fixed KL loss (as used by Schulman et al. in https://arxiv.org/pdf/1707.06347.pdf). Highly recommended for environments with continuous
-                   #     action spaces. (default: 0.0)
-exploration_loss: entropy 
-                        # {entropy,symmetric_kl}
-                        # Usually the exploration loss is based on maximizing the entropy of the probability distribution. Note that mathematically maximizing entropy of the
-                        # categorical probability distribution is exactly the same as minimizing the (regular) KL-divergence between this distribution and a uniform prior. The
-                        # downside of using the entropy term (or regular asymmetric KL-divergence) is the fact that penalty does not increase as probabilities of some actions
-                        # approach zero. I.e. numerically, there is almost no difference between an action distribution with a probability epsilon > 0 for some action and an
-                        # action distribution with a probability = zero for this action. For many tasks the first (epsilon) distribution is preferrable because we keep some
-                        # (albeit small) amount of exploration, while the second distribution will never explore this action ever again.Unlike the entropy term, symmetric KL
-                        # divergence between the action distribution and a uniform prior approaches infinity when entropy of the distribution approaches zero, so it can prevent
-                        # the pathological situations where the agent stops exploring. Empirically, symmetric KL-divergence yielded slightly better results on some problems.
-                        # (default: entropy)
-max_entropy_coeff: 0.0, # Coefficient for max entropy term added directly to rewards. 0 means no max entropy term to env rewards. '
-                        # Note that this is different from exploration loss (see https://arxiv.org/abs/1805.00909)'
-num_envs_per_worker: 2
-                        # Number of envs on a single CPU actor, in high-throughput configurations this should be in 10-30 range for Atari/VizDoomMust be even for double-buffered
-                        # sampling! (default: 2)
-worker_num_splits: 2
-                        # Typically we split a vector of envs into two parts for "double buffered" experience collectionSet this to 1 to disable double buffering. Set this to 3
-                        # for triple buffering! (default: 2)
-num_policies: 1
-                        # Number of policies to train jointly (default: 1)
-policy_workers_per_policy: 1
-                        # Number of policy workers that compute forward pass (per policy) (default: 1)
-max_policy_lag: 10000
-                        # Max policy lag in policy versions. Discard all experience that is older than this. This should be increased for configurations with multiple epochs of
-                        # SGD because naturallypolicy-lag may exceed this value. (default: 10000)
-traj_buffers_excess_ratio: 1.3
-                        # Increase this value to make sure the system always has enough free trajectory buffers (can be useful when i.e. a lot of inactive agents in multi-agent
-                        # envs)Decrease this to 1.0 to save as much RAM as possible. (default: 1.3)
-decorrelate_experience_max_seconds: 10
-                        # Decorrelating experience serves two benefits. First: this is better for learning because samples from workers come from random moments in the episode,
-                        # becoming more "i.i.d".Second, and more important one: this is good for environments with highly non-uniform one-step times, including long and expensive
-                        # episode resets. If experience is not decorrelatedthen training batches will come in bursts e.g. after a bunch of environments finished resets and many
-                        # iterations on the learner might be required,which will increase the policy-lag of the new experience collected. The performance of the Sample Factory is
-                        # best when experience is generated as more-or-lessuniform stream. Try increasing this to 100-200 seconds to smoothen the experience distribution in time
-                        # right from the beginning (it will eventually spread out and settle anyway) (default: 10)
-decorrelate_envs_on_one_worker: True
-                        # In addition to temporal decorrelation of worker processes, also decorrelate envs within one worker processFor environments with a fixed episode length
-                        # it can prevent the reset from happening in the same rollout for all envs simultaneously, which makes experience collection more uniform. (default: True)
-with_vtrace: True
-                        # Enables V-trace off-policy correction. If this is True, then GAE is not used (default: True)
-vtrace_rho: 1.0
-                        # rho_hat clipping parameter of the V-trace algorithm (importance sampling truncation) (default: 1.0)
-vtrace_c: 1.0   
-                        # c_hat clipping parameter of the V-trace algorithm. Low values for c_hat can reduce variance of the advantage estimates (similar to GAE lambda < 1)
-                        # (default: 1.0)
-set_workers_cpu_affinity: True
-                        # Whether to assign workers to specific CPU cores or not. The logic is beneficial for most workloads because prevents a lot of context switching.However
-                        # for some environments it can be better to disable it, to allow one worker to use all cores some of the time. This can be the case for some DMLab
-                        # environments with very expensive episode resetthat can use parallel CPU cores for level generation. (default: True)
-force_envs_single_thread: True
-                        # Some environments may themselves use parallel libraries such as OpenMP or MKL. Since we parallelize environments on the level of workers, there is no
-                        # need to keep this parallel semantic.This flag uses threadpoolctl to force libraries such as OpenMP and MKL to use only a single thread within the
-                        # environment.Default value (True) is recommended unless you are running fewer workers than CPU cores. (default: True)
-reset_timeout_seconds: 120
-                        # Fail worker on initialization if not a single environment was reset in this time (worker probably got stuck) (default: 120)
-default_niceness: 0
-                        # Niceness of the highest priority process (the learner). Values below zero require elevated privileges. (default: 0)
-train_in_background_thread: True
-                        # Using background thread for training is faster and allows preparing the next batch while training is in progress.Unfortunately debugging can become very
-                        # tricky in this case. So there is an option to use only a single thread on the learner to simplify the debugging. (default: True)
-learner_main_loop_num_cores: 1
-                       # When batching on the learner is the bottleneck, increasing the number of cores PyTorch uses can improve the performance (default: 1)
-actor_worker_gpus: []  
-                       # [ACTOR_WORKER_GPUS [ACTOR_WORKER_GPUS ...]]
-                       # By default, actor workers only use CPUs. Changes this if e.g. you need GPU-based rendering on the actors (default: [])
-with_pbt: False        # Enables population-based training basic features (default: False)
-pbt_mix_policies_in_one_env: True
-                        # For multi-agent envs, whether we mix different policies in one env. (default: True)
-pbt_period_env_steps: 5000000
-                        # Periodically replace the worst policies with the best ones and perturb the hyperparameters (default: 5000000)
-pbt_start_mutation: 20000000
-                        # Allow initial diversification, start PBT after this many env steps (default: 20000000)
-pbt_replace_fraction: 0.3
-                        # A portion of policies performing worst to be replace by better policies (rounded up) (default: 0.3)
-pbt_mutation_rate: 0.15
-                        # Probability that a parameter mutates (default: 0.15)
-pbt_replace_reward_gap: 0.1
-                        # Relative gap in true reward when replacing weights of the policy with a better performing one (default: 0.1)
-pbt_replace_reward_gap_absolute: 1e-06
-                        # Absolute gap in true reward when replacing weights of the policy with a better performing one (default: 1e-06)
-pbt_optimize_batch_size: False
-                        # Whether to optimize batch size or not (experimental) (default: False)
-pbt_optimize_gamma: False
-                        # Whether to optimize gamma, discount factor, or not (experimental) (default: False)
-pbt_target_objective: true_reward
-                        # Policy stat to optimize with PBT. true_reward (default) is equal to raw env reward if not specified, but can also be any other per-policy stat.For
-                        # DMlab-30 use value "dmlab_target_objective" (which is capped human normalized score) (default: true_reward)
-pbt_perturb_min: 1.05
-                        # When PBT mutates a float hyperparam, it samples the change magnitude randomly from the uniform distribution [pbt_perturb_min, pbt_perturb_max] (default:
-                        # 1.05)
-pbt_perturb_max: 1.5
-                        # When PBT mutates a float hyperparam, it samples the change magnitude randomly from the uniform distribution [pbt_perturb_min, pbt_perturb_max] (default:
-                        # 1.5)
-use_cpc: False     # Use CPC|A as an auxiliary loss durning learning (default: False)
-cpc_forward_steps: 8
-                        # Number of forward prediction steps for CPC (default: 8)
-cpc_time_subsample: 6
-                        # Number of timesteps to sample from each batch. This should be less than recurrence to decorrelate experience. (default: 6)
-cpc_forward_subsample: 2
-                        # Number of forward steps to sample for loss computation. This should be less than cpc_forward_steps to decorrelate gradients. (default: 2)
-with_wandb: ${wandb}
-                        # Enables Weights and Biases integration (default: False)
-wandb_user: null
-                        # WandB username (entity). Must be specified from command line! Also see https://docs.wandb.ai/quickstart#1.-set-up-wandb (default: None)
-wandb_project: ${wandb_project}
-                        # WandB "Project" (default: sample_factory)
-wandb_group: ${wandb_group}
-                        # WandB "Group" (to group your experiments). By default this is the name of the env. (default: None)
-wandb_job_type: SF
-                        # WandB job type (default: SF)
-wandb_tags: []          # [WANDB_TAGS [WANDB_TAGS ...]]
-                        # Tags can help with finding experiments in WandB web console (default: [])
-benchmark: False
-                        # Benchmark mode (default: False)
-sampler_only: False
-                        # Do not send experience to the learner, measuring sampling throughput (default: False)
-env_frameskip: null
-                        # Number of frames for action repeat (frame skipping). Default (None) means use default environment value (default: None)
-env_framestack: 4
-                        # Frame stacking (only used in Atari?) (default: 4)
-pixel_format: CHW
-                        # PyTorch expects CHW by default, Ray & TensorFlow expect HWC (default: CHW)
\ No newline at end of file
diff --git a/cfgs/algorithm/ppo.yaml b/cfgs/algorithm/ppo.yaml
deleted file mode 100644
index 485f53d9..00000000
--- a/cfgs/algorithm/ppo.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-algorithm_name: 'rmappo' # choices=["rmappo", "mappo"]
-experiment: ${experiment}
-seed: ${seed}
-device: ${device}
-cuda_deterministic: True
-n_training_threads: 1 # "Number of torch threads for training"
-n_rollout_threads: 1 # Number of parallel envs for training rollouts
-n_eval_rollout_threads: 1 # Number of parallel envs for evaluating rollouts
-n_render_rollout_threads: 1 # Number of parallel envs for rendering rollouts
-num_env_steps: 1e8 # Number of environment steps to train
-wandb: ${wandb}
-use_obs_instead_of_state: True # Whether to use global state or concatenated obs
-episode_length: ${episode_length} # Max length for any episode
-share_policy: True # Whether all agents share the same policy
-use_centralized_V: False # Whether to use a centralized value function
-stacked_frames: 1 # number of stacked observations
-use_stacked_frames: True # whether to use stacked frames
-hidden_size: 64 # Dimension of hidden layers for actor/critic networks
-layer_N: 2 # "Number of layers for actor/critic networks"
-use_ReLU: True # Whether to use ReLU activation or Tanh
-use_popart: False # Use PopART to normalize rewards
-use_valuenorm: True # use running mean and std to normalize rewards
-use_feature_normalization: True # Whether to apply layernorm to the inputs
-use_orthogonal: True # Whether to use Orthogonal initialization for weights and 0 initialization for biases
-gain: 0.01 # The gain # of last action layer
-# recurrent parameters
-use_naive_recurrent_policy: False # Whether to use a naive recurrent policy by stacking states I believe?
-use_recurrent_policy: True # Whether to use a recurrent policy
-recurrent_N: 1 # The number of recurrent layers
-data_chunk_length: 10 # Time length of chunks used to train a recurrent_policy
-
-# optimizer parameters
-lr: 5e-4 # learning rate
-critic_lr: 5e-4 # critic LR
-opti_eps: 1e-5 # RMSprop optimizer epsilon
-weight_decay: 0 
-
-# ppo parameters
-ppo_epoch: 10 # number of PPO epochs
-use_clipped_value_loss: True # clip loss value
-clip_param: 0.2 # PPO clipping parameter
-num_mini_batch: 4 # Number of minibatches of the collected data to use
-entropy_coef: 0.00
-value_loss_coef: 0.5 # scaling on the value loss
-use_max_grad_norm: True # use max norm of gradients
-max_grad_norm: 10.0 # max norm of gradients
-use_gae: True # use generalized advantage estimation
-gamma: 0.99 # discount factor
-gae_lambda: 0.95
-use_proper_time_limits: False # compute returns taking into account time limits
-use_huber_loss: True 
-use_value_active_masks: True # whether to mask useless data in value loss
-use_policy_active_masks: True # whether to mask useless data in policy loss
-huber_delta: 10.0 # coefficient of huber loss
-use_linear_lr_decay: False
-
-# saving and logging
-save_interval: 1 # time duration between contiunous twice models saving
-log_interval: 5 # time duration between contiunous twice log printing
-use_eval: True
-eval_interval: 25 
-eval_episodes: 10
-save_gifs: True
-render_interval: 25 # how often to render
-use_render: False
-render_episodes: 1
-ifi: 0.1 # the play interval of each rendered image in saved video
-model_dir: null
-
-# goal env wrapper stuff
-density_buffer_size: 100000
-density_optim_samples: 1000
-num_goal_samples: 200
-bandwidth: 0.1
-log_figure: True
-kernel: 'gaussian' 
-quartile_cutoff: 0.0
-normalize_value: 400.0
-log_every_n_episodes: 50
-# if True, all the agents share the same goal buffer for sampling new goals
-share_goal_buffer: False
\ No newline at end of file
diff --git a/cfgs/config.py b/cfgs/config.py
deleted file mode 100644
index f759c9af..00000000
--- a/cfgs/config.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Set path to all the Waymo data and the parsed Waymo files."""
-import os
-from pathlib import Path
-
-from hydra import compose, initialize
-from hydra.core.global_hydra import GlobalHydra
-from omegaconf import OmegaConf
-from pyvirtualdisplay import Display
-
-VERSION_NUMBER = 2
-
-PROJECT_PATH = Path.resolve(Path(__file__).parent.parent)
-DATA_FOLDER = '/checkpoint/eugenevinitsky/waymo_open/motion_v1p1/uncompressed/scenario/'
-TRAIN_DATA_PATH = os.path.join(DATA_FOLDER, 'training')
-VALID_DATA_PATH = os.path.join(DATA_FOLDER, 'validation')
-TEST_DATA_PATH = os.path.join(DATA_FOLDER, 'testing')
-PROCESSED_TRAIN_NO_TL = os.path.join(
-    DATA_FOLDER, f'formatted_json_v{VERSION_NUMBER}_no_tl_train')
-PROCESSED_VALID_NO_TL = os.path.join(
-    DATA_FOLDER, f'formatted_json_v{VERSION_NUMBER}_no_tl_valid')
-PROCESSED_TRAIN = os.path.join(DATA_FOLDER,
-                               f'formatted_json_v{VERSION_NUMBER}_train')
-PROCESSED_VALID = os.path.join(DATA_FOLDER,
-                               f'formatted_json_v{VERSION_NUMBER}_valid')
-ERR_VAL = -1e4
-
-
-def get_scenario_dict(hydra_cfg):
-    """Convert the `scenario` key in the hydra config to a true dict."""
-    if isinstance(hydra_cfg['scenario'], dict):
-        return hydra_cfg['scenario']
-    else:
-        return OmegaConf.to_container(hydra_cfg['scenario'], resolve=True)
-
-
-def get_default_scenario_dict():
-    """Construct the `scenario` dict without w/o hydra decorator."""
-    GlobalHydra.instance().clear()
-    initialize(config_path="./")
-    cfg = compose(config_name="config")
-    return get_scenario_dict(cfg)
-
-
-def set_display_window():
-    """Set a virtual display for headless machines."""
-    if "DISPLAY" not in os.environ:
-        disp = Display()
-        disp.start()
diff --git a/cfgs/config.yaml b/cfgs/config.yaml
deleted file mode 100644
index cf123dbf..00000000
--- a/cfgs/config.yaml
+++ /dev/null
@@ -1,122 +0,0 @@
-defaults:
-  - algorithm: ppo
-  - override hydra/launcher: submitit_local
-
-seed: 0
-device: 'cuda:0'
-debug: False
-experiment: intersection
-env: my_custom_multi_env_v1 # name of the env, hardcoded for now
-
-# WANDB things
-wandb: False
-wandb_project: nocturne4
-wandb_id: null
-wandb_group: ${experiment}
-
-# one of the agents will be randomly tagged as the 
-# agent that we control, the rest of the agents will
-# replay trajectories
-single_agent_mode: False
-# all goals are achievable within 90 steps
-episode_length: 80
-# how many files of the total dataset to use. -1 indicates to use all of them
-num_files: -1
-scenario_path:  ${oc.env:PROCESSED_TRAIN_NO_TL}
-dt: 0.1
-sims_per_step: 10
-img_as_state: False
-discretize_actions: True
-accel_discretization: 6
-accel_lower_bound: -3
-accel_upper_bound: 2
-steering_lower_bound: -0.7 # corresponds to about 40 degrees of max steering angle
-steering_upper_bound: 0.7 # corresponds to about 40 degrees of max steering angle
-steering_discretization: 21
-head_angle_lower_bound: -1.6
-head_angle_upper_bound: 1.6
-head_angle_discretization: 5
-max_num_vehicles: 20 # we want to upper bound how many agents there can be in the scene
-                     # this is mostly useful because RL libraries expect it
-# TODO(eugenevinitsky) actually implement this
-randomize_goals: False
-scenario:
-  # initial timestep of the scenario (which ranges from timesteps 0 to 90)
-  start_time: 0
-  # if set to True, non-vehicle objects (eg. cyclists, pedestrians...) will be spawned
-  allow_non_vehicles: False
-  # for an object to be included into moving_objects
-  moving_threshold: 0.2  # its goal must be at least this distance from its initial position
-  speed_threshold: 0.05  # its speed must be superior to this value at some point
-  # maximum number of each objects visible in the object state
-  # if there are more objects, the closest ones are prioritized
-  # if there are less objects, the features vector is padded with zeros
-  max_visible_objects: 16
-  max_visible_road_points: 1000
-  max_visible_traffic_lights: 20
-  max_visible_stop_signs: 4
-  # from the set of road points that comprise each polyline, we take
-  # every n-th one of these
-  sample_every_n: 1
-  # if true we add all the road-edges (the edges you can collide with)
-  # to the visible road points first and only add the other points
-  # (road lines, lane lines) etc. if we have remaining states after
-  road_edge_first: False
-
-# these configs are mostly used for aligning displacement error computations
-# with the standard way of doing it in other libraries i.e. we keep 
-# the agent for the whole rollout and compute its distance from the expert
-# at all the points that the expert is valid
-remove_at_goal: True # if true, remove the agent when it reaches its goal
-remove_at_collide: True # if true, remove the agent when it collides
-
-rew_cfg:
-  shared_reward: False # agents get the collective reward instead of individual rewards
-  goal_tolerance: 0.5
-  reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms
-  collision_penalty: 0
-  shaped_goal_distance_scaling: 0.2
-  shaped_goal_distance: True
-  goal_distance_penalty: False # if shaped_goal_distance is true, then when this is True the goal distance 
-                               # is a penalty for being far from 
-                               # goal instead of a reward for being close
-  goal_achieved_bonus: ${episode_length}
-  # goal is only achieved if you're within this tolerance on distance from goal
-  position_target: True
-  position_target_tolerance: 1.0
-  # goal is only achieved if you're within this tolerance on final agent speed at goal position
-  speed_target: True
-  speed_target_tolerance: 1.0
-  # goal is only achieved if you're within this tolerance on final agent heading at goal position
-  heading_target: True
-  heading_target_tolerance: 0.3
-subscriber:
-  view_angle: 2.1
-  # the distance which the cone extends before agents are not visible
-  # TODO(eugenevinitsky) pick the right number
-  view_dist: 80
-  use_ego_state: True
-  use_observations: True
-  # if true, we return an observation for agents that have exited the system
-  # as well as returning an observation for the extra agents if the number of
-  # agents in the system is less than max_num_vehicles
-  keep_inactive_agents: False
-  # for values greater than 1, we will stack inputs together
-  n_frames_stacked: 1
-
-results_dir: ${oc.env:NOCTURNE_LOG_DIR}
-
-hydra:
-  run:
-    dir: ${results_dir}/test/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/${hydra.job.override_dirname}
-  sweep:
-    dir: ${results_dir}/${oc.env:USER}/nocturne/sweep/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}
-    subdir: ${hydra.job.num}
-  launcher:
-    timeout_min: 2880
-    cpus_per_task: 10
-    gpus_per_node: 1
-    tasks_per_node: 1
-    mem_gb: 160
-    nodes: 1
-    submitit_folder: ${results_dir}/sweep/${now:%Y.%m.%d}/${now:%H%M}_${experiment}/.slurm
diff --git a/cfgs/cpp_ b/cfgs/cpp_
deleted file mode 100644
index e69de29b..00000000
diff --git a/cfgs/imitation/config.yaml b/cfgs/imitation/config.yaml
deleted file mode 100644
index 6cf72fc1..00000000
--- a/cfgs/imitation/config.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-defaults:
-  - override hydra/launcher: submitit_local
-
-experiment: test
-path: ${oc.env:PROCESSED_TRAIN_NO_TL}
-num_files: 1000
-n_cpus: 9
-lr: 3e-4
-samples_per_epoch: 50000
-max_visible_road_points: 500
-batch_size: 512
-epochs: 700
-device: cuda
-n_stacked_states: 5
-view_dist: 80
-view_angle: 3.14
-actions_are_positions: False
-discrete: True
-seed: 0
-
-# WANDB things
-wandb: True
-wandb_project: nocturne
-wandb_group: ${experiment}
-
-# tensorboard logs
-write_to_tensorboard: True
-
-hydra:
-  run:
-    dir: /checkpoint/${oc.env:USER}/nocturne/test/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/${hydra.job.override_dirname}
-  sweep:
-    dir: /checkpoint/${oc.env:USER}/nocturne/sweep/imitation/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}
-    subdir: ${hydra.job.num}
-  launcher:
-    timeout_min: 2880
-    cpus_per_task: 80
-    gpus_per_node: 1
-    tasks_per_node: 1
-    mem_gb: 160
-    nodes: 1
-    submitit_folder: /checkpoint/${oc.env:USER}/nocturne/sweep/imitation/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/.slurm
diff --git a/configs/env_config.yaml b/configs/env_config.yaml
new file mode 100644
index 00000000..adf7a237
--- /dev/null
+++ b/configs/env_config.yaml
@@ -0,0 +1,93 @@
+seed: 0
+device: cuda:0
+debug: false
+experiment: intersection
+env: my_custom_multi_env_v1 # name of the env, hardcoded for now
+
+# all goals are achievable within 90 steps
+episode_length: 80
+# how many files of the total dataset to use. -1 indicates to use all of them
+num_files: 5
+dt: 0.1
+sims_per_step: 10
+img_as_state: false
+discretize_actions: true
+include_head_angle: false # Whether to include the head tilt/angle as part of a vehicle's action
+accel_discretization: 3
+accel_lower_bound: -2
+accel_upper_bound: 2
+steering_lower_bound: -0.25 # corresponds to about 40 degrees of max steering angle
+steering_upper_bound: 0.25 # corresponds to about 40 degrees of max steering angle
+steering_discretization: 3
+max_num_vehicles: 20
+randomize_goals: false
+scenario:
+  # initial timestep of the scenario (which ranges from timesteps 0 to 90)
+  start_time: 0
+  # if set to True, non-vehicle objects (eg. cyclists, pedestrians...) will be spawned
+  allow_non_vehicles: false
+  # for an object to be included into moving_objects
+  moving_threshold: 0.2  # its goal must be at least this distance from its initial position
+  speed_threshold: 0.05  # its speed must be superior to this value at some point
+  # maximum number of each objects visible in the object state
+  # if there are more objects, the closest ones are prioritized
+  # if there are less objects, the features vector is padded with zeros
+  #max_visible_objects: 16
+  max_visible_road_points: 500
+  #max_visible_traffic_lights: 20
+  #max_visible_stop_signs: 4
+  # from the set of road points that comprise each polyline, we take
+  # every n-th one of these
+  sample_every_n: 1
+  # if true we add all the road-edges (the edges you can collide with)
+  # to the visible road points first and only add the other points
+  # (road lines, lane lines) etc. if we have remaining states after
+  road_edge_first: false
+  invalid_position: -10000.0
+  context_length: 10
+
+# these configs are mostly used for aligning displacement error computations
+# with the standard way of doing it in other libraries i.e. we keep
+# the agent for the whole rollout and compute its distance from the expert
+# at all the points that the expert is valid
+remove_at_goal: true # if true, remove the agent when it reaches its goal
+remove_at_collide: true # if true, remove the agent when it collides
+
+# Reward settings
+rew_cfg:
+  shared_reward: false # agents get the collective reward instead of individual rewards
+  goal_tolerance: 0.5
+  reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms
+  collision_penalty: 0
+  shaped_goal_distance_scaling: 0.2
+  shaped_goal_distance: true
+  goal_distance_penalty: false # if shaped_goal_distance is true, then when this is True the goal distance
+                               # is a penalty for being far from
+                               # goal instead of a reward for being close
+  goal_achieved_bonus: 80
+  position_target: true # If True, goal is only achieved if you're within this tolerance on distance from goal
+  position_target_tolerance: 1.0
+  speed_target: true # If True, goal is only achieved if you're within this tolerance on final agent speed at goal position
+  speed_target_tolerance: 1.0
+  heading_target: false # If True, goal is only achieved if you're within this tolerance on final agent heading at goal position
+  heading_target_tolerance: 0.3
+  # we assume that vehicles are never more than 400 meters from their goal which makes
+  # sense as the episodes are 9 seconds long, i.e. we'd have to go more than 40 m/s to get there
+  goal_speed_scaling: 40.0
+
+# Agent settings
+subscriber:
+  view_angle: 3.14 # the distance which the cone extends before agents are not visible; set to pi rad to correct for missing head angle
+  view_dist: 80
+  use_ego_state: true # if True, add information about the ego state
+  use_observations: false # if True, add visible field
+  use_start_position: false # if True, add start (x, y)-position of the agent
+  use_current_position: false # if True, add current (x, y)-position of the agent
+  use_target_position: false # if True, add target (x, y)-position of the agent
+  use_distance_to_target: false # if True, add distance to target (dx, dy) of the agent
+
+  # for values greater than 1, we will stack inputs together (i.e. memory and equivalent of n_stacked_states)
+  n_frames_stacked: 1 # Agent memory
+
+# Path to folder with traffic scene(s) from which to create an environment
+data_path: ../data
diff --git a/examples/example_scenario.json b/data/example_scenario.json
similarity index 100%
rename from examples/example_scenario.json
rename to data/example_scenario.json
diff --git a/data/valid_files.json b/data/valid_files.json
new file mode 100644
index 00000000..7698869b
--- /dev/null
+++ b/data/valid_files.json
@@ -0,0 +1,3 @@
+{
+    "example_scenario.json": []
+}
diff --git a/environment.yml b/environment.yml
index b9e7ae19..33dc0588 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,29 +2,4 @@ name: nocturne
 channels:
   - defaults
 dependencies:
-  - python=3.8
-  - pip=21.1.3
-  - numpy=1.19.2
-  - jupyterlab=3.0.14
-  - pip:
-    - hydra-core==1.1.0
-    - hydra-submitit-launcher==1.1.5
-    - ipdb==0.13.9
-    - seaborn
-    - imageio==2.10.1
-    - moviepy==1.0.3
-    - opencv-python==4.5.5.64
-    - gym==0.20.0
-    - wandb==0.12.15
-    - imageio==2.10.1
-    - setproctitle==1.2.3
-    - tensorboardX==2.5
-    - pytest==7.1.1
-    - flake8==4.0.1
-    - pydocstyle==6.1.1
-    - pyvirtualdisplay
-    - ray==1.11.0
-    - dm-tree
-    - tabulate
-    - torch
-    - sample-factory==1.123.0
\ No newline at end of file
+  - python=3.10
diff --git a/examples/01_data_structure.ipynb b/examples/01_data_structure.ipynb
new file mode 100644
index 00000000..d79bf973
--- /dev/null
+++ b/examples/01_data_structure.ipynb
@@ -0,0 +1,4235 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data format of a traffic scene\n",
+    "\n",
+    "This notebook dives into the data format used to create simulations in Nocturne.\n",
+    "\n",
+    "_Last update: 10/2023_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import pandas as pd\n",
+    "\n",
+    "import os\n",
+    "os.chdir('..')\n",
+    "\n",
+    "cmap = ['r', 'g', 'b', 'y', 'c'] \n",
+    "%config InlineBackend.figure_format = 'svg'\n",
+    "sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (8, 3)})\n",
+    "sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Traffic scenes are constructed by utilizing the [Waymo Open Motion dataset](https://waymo.com/open/). Though every scene is unique, they all have the same basic data structure. \n",
+    "\n",
+    "To load a traffic scene:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['name', 'objects', 'roads', 'tl_states'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Take an example scene\n",
+    "data_path = './data/example_scenario.json'\n",
+    "\n",
+    "with open(data_path) as file:\n",
+    "    traffic_scene = json.load(file)\n",
+    "\n",
+    "traffic_scene.keys()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global Overview \n",
+    "A traffic scene consists of:\n",
+    "- `name`: the name of the traffic scenario.\n",
+    "- `objects`: the road objects or moving vehicles in the scene.\n",
+    "- `roads`: the road points in the scene, these are all the stationary objects.\n",
+    "- `tl_states`: the states of the traffic lights, which are filtered out for now. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "traffic_scene['tl_states']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'tfrecord-00358-of-01000_65.json'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "traffic_scene['name']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       "  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"483.757469pt\" height=\"253.606382pt\" viewBox=\"0 0 483.757469 253.606382\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n",
+       " <metadata>\n",
+       "  <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n",
+       "   <cc:Work>\n",
+       "    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n",
+       "    <dc:date>2023-10-03T10:23:25.972593</dc:date>\n",
+       "    <dc:format>image/svg+xml</dc:format>\n",
+       "    <dc:creator>\n",
+       "     <cc:Agent>\n",
+       "      <dc:title>Matplotlib v3.8.0, https://matplotlib.org/</dc:title>\n",
+       "     </cc:Agent>\n",
+       "    </dc:creator>\n",
+       "   </cc:Work>\n",
+       "  </rdf:RDF>\n",
+       " </metadata>\n",
+       " <defs>\n",
+       "  <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n",
+       " </defs>\n",
+       " <g id=\"figure_1\">\n",
+       "  <g id=\"patch_1\">\n",
+       "   <path d=\"M 0 253.606382 \n",
+       "L 483.757469 253.606382 \n",
+       "L 483.757469 0 \n",
+       "L 0 0 \n",
+       "L 0 253.606382 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "  </g>\n",
+       "  <g id=\"axes_1\">\n",
+       "   <g id=\"patch_2\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 476.557469 189.129188 \n",
+       "L 476.557469 22.809188 \n",
+       "L 30.157469 22.809188 \n",
+       "L 30.157469 189.129188 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_1\">\n",
+       "    <g id=\"xtick_1\">\n",
+       "     <g id=\"line2d_1\">\n",
+       "      <defs>\n",
+       "       <path id=\"m5686ae8697\" d=\"M 0 0 \n",
+       "L 0 6 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5686ae8697\" x=\"141.757469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_1\">\n",
+       "      <!-- vehicle -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(130.653395 231.385268) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-76\" d=\"M 1344 0 \n",
+       "L 81 3319 \n",
+       "L 675 3319 \n",
+       "L 1388 1331 \n",
+       "Q 1503 1009 1600 663 \n",
+       "Q 1675 925 1809 1294 \n",
+       "L 2547 3319 \n",
+       "L 3125 3319 \n",
+       "L 1869 0 \n",
+       "L 1344 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-65\" d=\"M 2694 1069 \n",
+       "L 3275 997 \n",
+       "Q 3138 488 2766 206 \n",
+       "Q 2394 -75 1816 -75 \n",
+       "Q 1088 -75 661 373 \n",
+       "Q 234 822 234 1631 \n",
+       "Q 234 2469 665 2931 \n",
+       "Q 1097 3394 1784 3394 \n",
+       "Q 2450 3394 2872 2941 \n",
+       "Q 3294 2488 3294 1666 \n",
+       "Q 3294 1616 3291 1516 \n",
+       "L 816 1516 \n",
+       "Q 847 969 1125 678 \n",
+       "Q 1403 388 1819 388 \n",
+       "Q 2128 388 2347 550 \n",
+       "Q 2566 713 2694 1069 \n",
+       "z\n",
+       "M 847 1978 \n",
+       "L 2700 1978 \n",
+       "Q 2663 2397 2488 2606 \n",
+       "Q 2219 2931 1791 2931 \n",
+       "Q 1403 2931 1139 2672 \n",
+       "Q 875 2413 847 1978 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-68\" d=\"M 422 0 \n",
+       "L 422 4581 \n",
+       "L 984 4581 \n",
+       "L 984 2938 \n",
+       "Q 1378 3394 1978 3394 \n",
+       "Q 2347 3394 2619 3248 \n",
+       "Q 2891 3103 3008 2847 \n",
+       "Q 3125 2591 3125 2103 \n",
+       "L 3125 0 \n",
+       "L 2563 0 \n",
+       "L 2563 2103 \n",
+       "Q 2563 2525 2380 2717 \n",
+       "Q 2197 2909 1863 2909 \n",
+       "Q 1613 2909 1392 2779 \n",
+       "Q 1172 2650 1078 2428 \n",
+       "Q 984 2206 984 1816 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-69\" d=\"M 425 3934 \n",
+       "L 425 4581 \n",
+       "L 988 4581 \n",
+       "L 988 3934 \n",
+       "L 425 3934 \n",
+       "z\n",
+       "M 425 0 \n",
+       "L 425 3319 \n",
+       "L 988 3319 \n",
+       "L 988 0 \n",
+       "L 425 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-63\" d=\"M 2588 1216 \n",
+       "L 3141 1144 \n",
+       "Q 3050 572 2676 248 \n",
+       "Q 2303 -75 1759 -75 \n",
+       "Q 1078 -75 664 370 \n",
+       "Q 250 816 250 1647 \n",
+       "Q 250 2184 428 2587 \n",
+       "Q 606 2991 970 3192 \n",
+       "Q 1334 3394 1763 3394 \n",
+       "Q 2303 3394 2647 3120 \n",
+       "Q 2991 2847 3088 2344 \n",
+       "L 2541 2259 \n",
+       "Q 2463 2594 2264 2762 \n",
+       "Q 2066 2931 1784 2931 \n",
+       "Q 1359 2931 1093 2626 \n",
+       "Q 828 2322 828 1663 \n",
+       "Q 828 994 1084 691 \n",
+       "Q 1341 388 1753 388 \n",
+       "Q 2084 388 2306 591 \n",
+       "Q 2528 794 2588 1216 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-6c\" d=\"M 409 0 \n",
+       "L 409 4581 \n",
+       "L 972 4581 \n",
+       "L 972 0 \n",
+       "L 409 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-76\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"50\"/>\n",
+       "       <use xlink:href=\"#ArialMT-68\" x=\"105.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-69\" x=\"161.230469\"/>\n",
+       "       <use xlink:href=\"#ArialMT-63\" x=\"183.447266\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6c\" x=\"233.447266\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"255.664062\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_2\">\n",
+       "     <g id=\"line2d_2\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5686ae8697\" x=\"364.957469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_2\">\n",
+       "      <!-- pedestrian -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(347.19309 244.705878) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-70\" d=\"M 422 -1272 \n",
+       "L 422 3319 \n",
+       "L 934 3319 \n",
+       "L 934 2888 \n",
+       "Q 1116 3141 1344 3267 \n",
+       "Q 1572 3394 1897 3394 \n",
+       "Q 2322 3394 2647 3175 \n",
+       "Q 2972 2956 3137 2557 \n",
+       "Q 3303 2159 3303 1684 \n",
+       "Q 3303 1175 3120 767 \n",
+       "Q 2938 359 2589 142 \n",
+       "Q 2241 -75 1856 -75 \n",
+       "Q 1575 -75 1351 44 \n",
+       "Q 1128 163 984 344 \n",
+       "L 984 -1272 \n",
+       "L 422 -1272 \n",
+       "z\n",
+       "M 931 1641 \n",
+       "Q 931 1000 1190 694 \n",
+       "Q 1450 388 1819 388 \n",
+       "Q 2194 388 2461 705 \n",
+       "Q 2728 1022 2728 1688 \n",
+       "Q 2728 2322 2467 2637 \n",
+       "Q 2206 2953 1844 2953 \n",
+       "Q 1484 2953 1207 2617 \n",
+       "Q 931 2281 931 1641 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-64\" d=\"M 2575 0 \n",
+       "L 2575 419 \n",
+       "Q 2259 -75 1647 -75 \n",
+       "Q 1250 -75 917 144 \n",
+       "Q 584 363 401 755 \n",
+       "Q 219 1147 219 1656 \n",
+       "Q 219 2153 384 2558 \n",
+       "Q 550 2963 881 3178 \n",
+       "Q 1213 3394 1622 3394 \n",
+       "Q 1922 3394 2156 3267 \n",
+       "Q 2391 3141 2538 2938 \n",
+       "L 2538 4581 \n",
+       "L 3097 4581 \n",
+       "L 3097 0 \n",
+       "L 2575 0 \n",
+       "z\n",
+       "M 797 1656 \n",
+       "Q 797 1019 1065 703 \n",
+       "Q 1334 388 1700 388 \n",
+       "Q 2069 388 2326 689 \n",
+       "Q 2584 991 2584 1609 \n",
+       "Q 2584 2291 2321 2609 \n",
+       "Q 2059 2928 1675 2928 \n",
+       "Q 1300 2928 1048 2622 \n",
+       "Q 797 2316 797 1656 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-73\" d=\"M 197 991 \n",
+       "L 753 1078 \n",
+       "Q 800 744 1014 566 \n",
+       "Q 1228 388 1613 388 \n",
+       "Q 2000 388 2187 545 \n",
+       "Q 2375 703 2375 916 \n",
+       "Q 2375 1106 2209 1216 \n",
+       "Q 2094 1291 1634 1406 \n",
+       "Q 1016 1563 777 1677 \n",
+       "Q 538 1791 414 1992 \n",
+       "Q 291 2194 291 2438 \n",
+       "Q 291 2659 392 2848 \n",
+       "Q 494 3038 669 3163 \n",
+       "Q 800 3259 1026 3326 \n",
+       "Q 1253 3394 1513 3394 \n",
+       "Q 1903 3394 2198 3281 \n",
+       "Q 2494 3169 2634 2976 \n",
+       "Q 2775 2784 2828 2463 \n",
+       "L 2278 2388 \n",
+       "Q 2241 2644 2061 2787 \n",
+       "Q 1881 2931 1553 2931 \n",
+       "Q 1166 2931 1000 2803 \n",
+       "Q 834 2675 834 2503 \n",
+       "Q 834 2394 903 2306 \n",
+       "Q 972 2216 1119 2156 \n",
+       "Q 1203 2125 1616 2013 \n",
+       "Q 2213 1853 2448 1751 \n",
+       "Q 2684 1650 2818 1456 \n",
+       "Q 2953 1263 2953 975 \n",
+       "Q 2953 694 2789 445 \n",
+       "Q 2625 197 2315 61 \n",
+       "Q 2006 -75 1616 -75 \n",
+       "Q 969 -75 630 194 \n",
+       "Q 291 463 197 991 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-74\" d=\"M 1650 503 \n",
+       "L 1731 6 \n",
+       "Q 1494 -44 1306 -44 \n",
+       "Q 1000 -44 831 53 \n",
+       "Q 663 150 594 308 \n",
+       "Q 525 466 525 972 \n",
+       "L 525 2881 \n",
+       "L 113 2881 \n",
+       "L 113 3319 \n",
+       "L 525 3319 \n",
+       "L 525 4141 \n",
+       "L 1084 4478 \n",
+       "L 1084 3319 \n",
+       "L 1650 3319 \n",
+       "L 1650 2881 \n",
+       "L 1084 2881 \n",
+       "L 1084 941 \n",
+       "Q 1084 700 1114 631 \n",
+       "Q 1144 563 1211 522 \n",
+       "Q 1278 481 1403 481 \n",
+       "Q 1497 481 1650 503 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-72\" d=\"M 416 0 \n",
+       "L 416 3319 \n",
+       "L 922 3319 \n",
+       "L 922 2816 \n",
+       "Q 1116 3169 1280 3281 \n",
+       "Q 1444 3394 1641 3394 \n",
+       "Q 1925 3394 2219 3213 \n",
+       "L 2025 2691 \n",
+       "Q 1819 2813 1613 2813 \n",
+       "Q 1428 2813 1281 2702 \n",
+       "Q 1134 2591 1072 2394 \n",
+       "Q 978 2094 978 1738 \n",
+       "L 978 0 \n",
+       "L 416 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-61\" d=\"M 2588 409 \n",
+       "Q 2275 144 1986 34 \n",
+       "Q 1697 -75 1366 -75 \n",
+       "Q 819 -75 525 192 \n",
+       "Q 231 459 231 875 \n",
+       "Q 231 1119 342 1320 \n",
+       "Q 453 1522 633 1644 \n",
+       "Q 813 1766 1038 1828 \n",
+       "Q 1203 1872 1538 1913 \n",
+       "Q 2219 1994 2541 2106 \n",
+       "Q 2544 2222 2544 2253 \n",
+       "Q 2544 2597 2384 2738 \n",
+       "Q 2169 2928 1744 2928 \n",
+       "Q 1347 2928 1158 2789 \n",
+       "Q 969 2650 878 2297 \n",
+       "L 328 2372 \n",
+       "Q 403 2725 575 2942 \n",
+       "Q 747 3159 1072 3276 \n",
+       "Q 1397 3394 1825 3394 \n",
+       "Q 2250 3394 2515 3294 \n",
+       "Q 2781 3194 2906 3042 \n",
+       "Q 3031 2891 3081 2659 \n",
+       "Q 3109 2516 3109 2141 \n",
+       "L 3109 1391 \n",
+       "Q 3109 606 3145 398 \n",
+       "Q 3181 191 3288 0 \n",
+       "L 2700 0 \n",
+       "Q 2613 175 2588 409 \n",
+       "z\n",
+       "M 2541 1666 \n",
+       "Q 2234 1541 1622 1453 \n",
+       "Q 1275 1403 1131 1340 \n",
+       "Q 988 1278 909 1158 \n",
+       "Q 831 1038 831 891 \n",
+       "Q 831 666 1001 516 \n",
+       "Q 1172 366 1500 366 \n",
+       "Q 1825 366 2078 508 \n",
+       "Q 2331 650 2450 897 \n",
+       "Q 2541 1088 2541 1459 \n",
+       "L 2541 1666 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-6e\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 928 3319 \n",
+       "L 928 2847 \n",
+       "Q 1294 3394 1984 3394 \n",
+       "Q 2284 3394 2536 3286 \n",
+       "Q 2788 3178 2913 3003 \n",
+       "Q 3038 2828 3088 2588 \n",
+       "Q 3119 2431 3119 2041 \n",
+       "L 3119 0 \n",
+       "L 2556 0 \n",
+       "L 2556 2019 \n",
+       "Q 2556 2363 2490 2533 \n",
+       "Q 2425 2703 2258 2804 \n",
+       "Q 2091 2906 1866 2906 \n",
+       "Q 1506 2906 1245 2678 \n",
+       "Q 984 2450 984 1813 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-70\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-64\" x=\"111.230469\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"166.845703\"/>\n",
+       "       <use xlink:href=\"#ArialMT-73\" x=\"222.460938\"/>\n",
+       "       <use xlink:href=\"#ArialMT-74\" x=\"272.460938\"/>\n",
+       "       <use xlink:href=\"#ArialMT-72\" x=\"300.244141\"/>\n",
+       "       <use xlink:href=\"#ArialMT-69\" x=\"333.544922\"/>\n",
+       "       <use xlink:href=\"#ArialMT-61\" x=\"355.761719\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6e\" x=\"411.376953\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_2\">\n",
+       "    <g id=\"ytick_1\">\n",
+       "     <g id=\"line2d_3\">\n",
+       "      <defs>\n",
+       "       <path id=\"m059797b5e1\" d=\"M 0 0 \n",
+       "L -6 0 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_3\">\n",
+       "      <!-- 0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(13.928734 193.459664) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-30\" d=\"M 266 2259 \n",
+       "Q 266 3072 433 3567 \n",
+       "Q 600 4063 929 4331 \n",
+       "Q 1259 4600 1759 4600 \n",
+       "Q 2128 4600 2406 4451 \n",
+       "Q 2684 4303 2865 4023 \n",
+       "Q 3047 3744 3150 3342 \n",
+       "Q 3253 2941 3253 2259 \n",
+       "Q 3253 1453 3087 958 \n",
+       "Q 2922 463 2592 192 \n",
+       "Q 2263 -78 1759 -78 \n",
+       "Q 1097 -78 719 397 \n",
+       "Q 266 969 266 2259 \n",
+       "z\n",
+       "M 844 2259 \n",
+       "Q 844 1131 1108 757 \n",
+       "Q 1372 384 1759 384 \n",
+       "Q 2147 384 2411 759 \n",
+       "Q 2675 1134 2675 2259 \n",
+       "Q 2675 3391 2411 3762 \n",
+       "Q 2147 4134 1753 4134 \n",
+       "Q 1366 4134 1134 3806 \n",
+       "Q 844 3388 844 2259 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_2\">\n",
+       "     <g id=\"line2d_4\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"157.449187\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_4\">\n",
+       "      <!-- 5 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(13.928734 161.779664) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-35\" d=\"M 266 1200 \n",
+       "L 856 1250 \n",
+       "Q 922 819 1161 601 \n",
+       "Q 1400 384 1738 384 \n",
+       "Q 2144 384 2425 690 \n",
+       "Q 2706 997 2706 1503 \n",
+       "Q 2706 1984 2436 2262 \n",
+       "Q 2166 2541 1728 2541 \n",
+       "Q 1456 2541 1237 2417 \n",
+       "Q 1019 2294 894 2097 \n",
+       "L 366 2166 \n",
+       "L 809 4519 \n",
+       "L 3088 4519 \n",
+       "L 3088 3981 \n",
+       "L 1259 3981 \n",
+       "L 1013 2750 \n",
+       "Q 1425 3038 1878 3038 \n",
+       "Q 2478 3038 2890 2622 \n",
+       "Q 3303 2206 3303 1553 \n",
+       "Q 3303 931 2941 478 \n",
+       "Q 2500 -78 1738 -78 \n",
+       "Q 1113 -78 717 272 \n",
+       "Q 322 622 266 1200 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-35\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_3\">\n",
+       "     <g id=\"line2d_5\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"125.769188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_5\">\n",
+       "      <!-- 10 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 130.099664) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-31\" d=\"M 2384 0 \n",
+       "L 1822 0 \n",
+       "L 1822 3584 \n",
+       "Q 1619 3391 1289 3197 \n",
+       "Q 959 3003 697 2906 \n",
+       "L 697 3450 \n",
+       "Q 1169 3672 1522 3987 \n",
+       "Q 1875 4303 2022 4600 \n",
+       "L 2384 4600 \n",
+       "L 2384 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-31\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_4\">\n",
+       "     <g id=\"line2d_6\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"94.089187\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_6\">\n",
+       "      <!-- 15 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 98.419664) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-31\"/>\n",
+       "       <use xlink:href=\"#ArialMT-35\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_5\">\n",
+       "     <g id=\"line2d_7\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"62.409188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_7\">\n",
+       "      <!-- 20 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 66.739664) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-32\" d=\"M 3222 541 \n",
+       "L 3222 0 \n",
+       "L 194 0 \n",
+       "Q 188 203 259 391 \n",
+       "Q 375 700 629 1000 \n",
+       "Q 884 1300 1366 1694 \n",
+       "Q 2113 2306 2375 2664 \n",
+       "Q 2638 3022 2638 3341 \n",
+       "Q 2638 3675 2398 3904 \n",
+       "Q 2159 4134 1775 4134 \n",
+       "Q 1369 4134 1125 3890 \n",
+       "Q 881 3647 878 3216 \n",
+       "L 300 3275 \n",
+       "Q 359 3922 746 4261 \n",
+       "Q 1134 4600 1788 4600 \n",
+       "Q 2447 4600 2831 4234 \n",
+       "Q 3216 3869 3216 3328 \n",
+       "Q 3216 3053 3103 2787 \n",
+       "Q 2991 2522 2730 2228 \n",
+       "Q 2469 1934 1863 1422 \n",
+       "Q 1356 997 1212 845 \n",
+       "Q 1069 694 975 541 \n",
+       "L 3222 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-32\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_6\">\n",
+       "     <g id=\"line2d_8\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m059797b5e1\" x=\"30.157469\" y=\"30.729187\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_8\">\n",
+       "      <!-- 25 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 35.059664) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-32\"/>\n",
+       "       <use xlink:href=\"#ArialMT-35\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"patch_3\">\n",
+       "    <path d=\"M 85.957469 189.129188 \n",
+       "L 197.557469 189.129188 \n",
+       "L 197.557469 30.729187 \n",
+       "L 85.957469 30.729187 \n",
+       "z\n",
+       "\" clip-path=\"url(#pedfa9caab2)\" style=\"fill: #c44e52; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_4\">\n",
+       "    <path d=\"M 309.157469 189.129188 \n",
+       "L 420.757469 189.129188 \n",
+       "L 420.757469 62.409188 \n",
+       "L 309.157469 62.409188 \n",
+       "z\n",
+       "\" clip-path=\"url(#pedfa9caab2)\" style=\"fill: #55a868; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_5\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 30.157469 22.809187 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_6\">\n",
+       "    <path d=\"M 476.557469 189.129188 \n",
+       "L 476.557469 22.809187 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_7\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 476.557469 189.129188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_8\">\n",
+       "    <path d=\"M 30.157469 22.809188 \n",
+       "L 476.557469 22.809188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"text_9\">\n",
+       "    <!-- Distribution of road objects in traffic scene. Total # objects: 45 -->\n",
+       "    <g style=\"fill: #262626\" transform=\"translate(74.210781 16.809188) scale(0.132 -0.132)\">\n",
+       "     <defs>\n",
+       "      <path id=\"ArialMT-44\" d=\"M 494 0 \n",
+       "L 494 4581 \n",
+       "L 2072 4581 \n",
+       "Q 2606 4581 2888 4516 \n",
+       "Q 3281 4425 3559 4188 \n",
+       "Q 3922 3881 4101 3404 \n",
+       "Q 4281 2928 4281 2316 \n",
+       "Q 4281 1794 4159 1391 \n",
+       "Q 4038 988 3847 723 \n",
+       "Q 3656 459 3429 307 \n",
+       "Q 3203 156 2883 78 \n",
+       "Q 2563 0 2147 0 \n",
+       "L 494 0 \n",
+       "z\n",
+       "M 1100 541 \n",
+       "L 2078 541 \n",
+       "Q 2531 541 2789 625 \n",
+       "Q 3047 709 3200 863 \n",
+       "Q 3416 1078 3536 1442 \n",
+       "Q 3656 1806 3656 2325 \n",
+       "Q 3656 3044 3420 3430 \n",
+       "Q 3184 3816 2847 3947 \n",
+       "Q 2603 4041 2063 4041 \n",
+       "L 1100 4041 \n",
+       "L 1100 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-62\" d=\"M 941 0 \n",
+       "L 419 0 \n",
+       "L 419 4581 \n",
+       "L 981 4581 \n",
+       "L 981 2947 \n",
+       "Q 1338 3394 1891 3394 \n",
+       "Q 2197 3394 2470 3270 \n",
+       "Q 2744 3147 2920 2923 \n",
+       "Q 3097 2700 3197 2384 \n",
+       "Q 3297 2069 3297 1709 \n",
+       "Q 3297 856 2875 390 \n",
+       "Q 2453 -75 1863 -75 \n",
+       "Q 1275 -75 941 416 \n",
+       "L 941 0 \n",
+       "z\n",
+       "M 934 1684 \n",
+       "Q 934 1088 1097 822 \n",
+       "Q 1363 388 1816 388 \n",
+       "Q 2184 388 2453 708 \n",
+       "Q 2722 1028 2722 1663 \n",
+       "Q 2722 2313 2464 2622 \n",
+       "Q 2206 2931 1841 2931 \n",
+       "Q 1472 2931 1203 2611 \n",
+       "Q 934 2291 934 1684 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-75\" d=\"M 2597 0 \n",
+       "L 2597 488 \n",
+       "Q 2209 -75 1544 -75 \n",
+       "Q 1250 -75 995 37 \n",
+       "Q 741 150 617 320 \n",
+       "Q 494 491 444 738 \n",
+       "Q 409 903 409 1263 \n",
+       "L 409 3319 \n",
+       "L 972 3319 \n",
+       "L 972 1478 \n",
+       "Q 972 1038 1006 884 \n",
+       "Q 1059 663 1231 536 \n",
+       "Q 1403 409 1656 409 \n",
+       "Q 1909 409 2131 539 \n",
+       "Q 2353 669 2445 892 \n",
+       "Q 2538 1116 2538 1541 \n",
+       "L 2538 3319 \n",
+       "L 3100 3319 \n",
+       "L 3100 0 \n",
+       "L 2597 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-6f\" d=\"M 213 1659 \n",
+       "Q 213 2581 725 3025 \n",
+       "Q 1153 3394 1769 3394 \n",
+       "Q 2453 3394 2887 2945 \n",
+       "Q 3322 2497 3322 1706 \n",
+       "Q 3322 1066 3130 698 \n",
+       "Q 2938 331 2570 128 \n",
+       "Q 2203 -75 1769 -75 \n",
+       "Q 1072 -75 642 372 \n",
+       "Q 213 819 213 1659 \n",
+       "z\n",
+       "M 791 1659 \n",
+       "Q 791 1022 1069 705 \n",
+       "Q 1347 388 1769 388 \n",
+       "Q 2188 388 2466 706 \n",
+       "Q 2744 1025 2744 1678 \n",
+       "Q 2744 2294 2464 2611 \n",
+       "Q 2184 2928 1769 2928 \n",
+       "Q 1347 2928 1069 2612 \n",
+       "Q 791 2297 791 1659 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-20\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-66\" d=\"M 556 0 \n",
+       "L 556 2881 \n",
+       "L 59 2881 \n",
+       "L 59 3319 \n",
+       "L 556 3319 \n",
+       "L 556 3672 \n",
+       "Q 556 4006 616 4169 \n",
+       "Q 697 4388 901 4523 \n",
+       "Q 1106 4659 1475 4659 \n",
+       "Q 1713 4659 2000 4603 \n",
+       "L 1916 4113 \n",
+       "Q 1741 4144 1584 4144 \n",
+       "Q 1328 4144 1222 4034 \n",
+       "Q 1116 3925 1116 3625 \n",
+       "L 1116 3319 \n",
+       "L 1763 3319 \n",
+       "L 1763 2881 \n",
+       "L 1116 2881 \n",
+       "L 1116 0 \n",
+       "L 556 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-6a\" d=\"M 419 3928 \n",
+       "L 419 4581 \n",
+       "L 981 4581 \n",
+       "L 981 3928 \n",
+       "L 419 3928 \n",
+       "z\n",
+       "M -294 -1288 \n",
+       "L -188 -809 \n",
+       "Q -19 -853 78 -853 \n",
+       "Q 250 -853 334 -739 \n",
+       "Q 419 -625 419 -169 \n",
+       "L 419 3319 \n",
+       "L 981 3319 \n",
+       "L 981 -181 \n",
+       "Q 981 -794 822 -1034 \n",
+       "Q 619 -1347 147 -1347 \n",
+       "Q -81 -1347 -294 -1288 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-2e\" d=\"M 581 0 \n",
+       "L 581 641 \n",
+       "L 1222 641 \n",
+       "L 1222 0 \n",
+       "L 581 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-54\" d=\"M 1659 0 \n",
+       "L 1659 4041 \n",
+       "L 150 4041 \n",
+       "L 150 4581 \n",
+       "L 3781 4581 \n",
+       "L 3781 4041 \n",
+       "L 2266 4041 \n",
+       "L 2266 0 \n",
+       "L 1659 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-23\" d=\"M 322 -78 \n",
+       "L 594 1253 \n",
+       "L 66 1253 \n",
+       "L 66 1719 \n",
+       "L 688 1719 \n",
+       "L 919 2853 \n",
+       "L 66 2853 \n",
+       "L 66 3319 \n",
+       "L 1013 3319 \n",
+       "L 1284 4659 \n",
+       "L 1753 4659 \n",
+       "L 1481 3319 \n",
+       "L 2466 3319 \n",
+       "L 2738 4659 \n",
+       "L 3209 4659 \n",
+       "L 2938 3319 \n",
+       "L 3478 3319 \n",
+       "L 3478 2853 \n",
+       "L 2844 2853 \n",
+       "L 2609 1719 \n",
+       "L 3478 1719 \n",
+       "L 3478 1253 \n",
+       "L 2516 1253 \n",
+       "L 2244 -78 \n",
+       "L 1775 -78 \n",
+       "L 2044 1253 \n",
+       "L 1063 1253 \n",
+       "L 791 -78 \n",
+       "L 322 -78 \n",
+       "z\n",
+       "M 1156 1719 \n",
+       "L 2138 1719 \n",
+       "L 2372 2853 \n",
+       "L 1388 2853 \n",
+       "L 1156 1719 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-3a\" d=\"M 578 2678 \n",
+       "L 578 3319 \n",
+       "L 1219 3319 \n",
+       "L 1219 2678 \n",
+       "L 578 2678 \n",
+       "z\n",
+       "M 578 0 \n",
+       "L 578 641 \n",
+       "L 1219 641 \n",
+       "L 1219 0 \n",
+       "L 578 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-34\" d=\"M 2069 0 \n",
+       "L 2069 1097 \n",
+       "L 81 1097 \n",
+       "L 81 1613 \n",
+       "L 2172 4581 \n",
+       "L 2631 4581 \n",
+       "L 2631 1613 \n",
+       "L 3250 1613 \n",
+       "L 3250 1097 \n",
+       "L 2631 1097 \n",
+       "L 2631 0 \n",
+       "L 2069 0 \n",
+       "z\n",
+       "M 2069 1613 \n",
+       "L 2069 3678 \n",
+       "L 634 1613 \n",
+       "L 2069 1613 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "     </defs>\n",
+       "     <use xlink:href=\"#ArialMT-44\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"72.216797\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"94.433594\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"144.433594\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"172.216797\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"205.517578\"/>\n",
+       "     <use xlink:href=\"#ArialMT-62\" x=\"227.734375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-75\" x=\"283.349609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"338.964844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"366.748047\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"388.964844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"444.580078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"500.195312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"527.978516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"583.59375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"611.376953\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"639.160156\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"672.460938\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"728.076172\"/>\n",
+       "     <use xlink:href=\"#ArialMT-64\" x=\"783.691406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"839.306641\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"867.089844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-62\" x=\"922.705078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6a\" x=\"978.320312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"1000.537109\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"1056.152344\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"1106.152344\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"1133.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1183.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"1211.71875\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"1233.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1289.550781\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"1317.333984\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"1345.117188\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"1378.417969\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"1434.033203\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"1460.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"1487.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"1510.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1560.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"1587.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"1637.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"1687.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"1743.464844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"1799.080078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-2e\" x=\"1854.695312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1882.478516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-54\" x=\"1908.511719\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"1958.470703\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"2014.085938\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"2041.869141\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6c\" x=\"2097.484375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2119.701172\"/>\n",
+       "     <use xlink:href=\"#ArialMT-23\" x=\"2147.484375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2203.099609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"2230.882812\"/>\n",
+       "     <use xlink:href=\"#ArialMT-62\" x=\"2286.498047\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6a\" x=\"2342.113281\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"2364.330078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"2419.945312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"2469.945312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"2497.728516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-3a\" x=\"2547.728516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2575.511719\"/>\n",
+       "     <use xlink:href=\"#ArialMT-34\" x=\"2603.294922\"/>\n",
+       "     <use xlink:href=\"#ArialMT-35\" x=\"2658.910156\"/>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "  </g>\n",
+       " </g>\n",
+       " <defs>\n",
+       "  <clipPath id=\"pedfa9caab2\">\n",
+       "   <rect x=\"30.157469\" y=\"22.809188\" width=\"446.4\" height=\"166.32\"/>\n",
+       "  </clipPath>\n",
+       " </defs>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<Figure size 800x300 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "pd.Series(\n",
+    "    [\n",
+    "        traffic_scene['objects'][idx]['type']\n",
+    "        for idx in range(len(traffic_scene['objects']))\n",
+    "    ]\n",
+    ").value_counts().plot(kind='bar', rot=45, color=cmap);\n",
+    "plt.title(f'Distribution of road objects in traffic scene. Total # objects: {len(traffic_scene[\"objects\"])}')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This traffic scenario only contains vehicles and pedestrians, some scenes have cyclists as well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       "  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"483.757469pt\" height=\"263.122249pt\" viewBox=\"0 0 483.757469 263.122249\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n",
+       " <metadata>\n",
+       "  <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n",
+       "   <cc:Work>\n",
+       "    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n",
+       "    <dc:date>2023-10-03T10:23:26.839616</dc:date>\n",
+       "    <dc:format>image/svg+xml</dc:format>\n",
+       "    <dc:creator>\n",
+       "     <cc:Agent>\n",
+       "      <dc:title>Matplotlib v3.8.0, https://matplotlib.org/</dc:title>\n",
+       "     </cc:Agent>\n",
+       "    </dc:creator>\n",
+       "   </cc:Work>\n",
+       "  </rdf:RDF>\n",
+       " </metadata>\n",
+       " <defs>\n",
+       "  <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n",
+       " </defs>\n",
+       " <g id=\"figure_1\">\n",
+       "  <g id=\"patch_1\">\n",
+       "   <path d=\"M 0 263.122249 \n",
+       "L 483.757469 263.122249 \n",
+       "L 483.757469 0 \n",
+       "L 0 0 \n",
+       "L 0 263.122249 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "  </g>\n",
+       "  <g id=\"axes_1\">\n",
+       "   <g id=\"patch_2\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 476.557469 189.129188 \n",
+       "L 476.557469 22.809188 \n",
+       "L 30.157469 22.809188 \n",
+       "L 30.157469 189.129188 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_1\">\n",
+       "    <g id=\"xtick_1\">\n",
+       "     <g id=\"line2d_1\">\n",
+       "      <defs>\n",
+       "       <path id=\"m95cc94c80d\" d=\"M 0 0 \n",
+       "L 0 6 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m95cc94c80d\" x=\"74.797469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_1\">\n",
+       "      <!-- lane -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(68.921909 220.928242) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-6c\" d=\"M 409 0 \n",
+       "L 409 4581 \n",
+       "L 972 4581 \n",
+       "L 972 0 \n",
+       "L 409 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-61\" d=\"M 2588 409 \n",
+       "Q 2275 144 1986 34 \n",
+       "Q 1697 -75 1366 -75 \n",
+       "Q 819 -75 525 192 \n",
+       "Q 231 459 231 875 \n",
+       "Q 231 1119 342 1320 \n",
+       "Q 453 1522 633 1644 \n",
+       "Q 813 1766 1038 1828 \n",
+       "Q 1203 1872 1538 1913 \n",
+       "Q 2219 1994 2541 2106 \n",
+       "Q 2544 2222 2544 2253 \n",
+       "Q 2544 2597 2384 2738 \n",
+       "Q 2169 2928 1744 2928 \n",
+       "Q 1347 2928 1158 2789 \n",
+       "Q 969 2650 878 2297 \n",
+       "L 328 2372 \n",
+       "Q 403 2725 575 2942 \n",
+       "Q 747 3159 1072 3276 \n",
+       "Q 1397 3394 1825 3394 \n",
+       "Q 2250 3394 2515 3294 \n",
+       "Q 2781 3194 2906 3042 \n",
+       "Q 3031 2891 3081 2659 \n",
+       "Q 3109 2516 3109 2141 \n",
+       "L 3109 1391 \n",
+       "Q 3109 606 3145 398 \n",
+       "Q 3181 191 3288 0 \n",
+       "L 2700 0 \n",
+       "Q 2613 175 2588 409 \n",
+       "z\n",
+       "M 2541 1666 \n",
+       "Q 2234 1541 1622 1453 \n",
+       "Q 1275 1403 1131 1340 \n",
+       "Q 988 1278 909 1158 \n",
+       "Q 831 1038 831 891 \n",
+       "Q 831 666 1001 516 \n",
+       "Q 1172 366 1500 366 \n",
+       "Q 1825 366 2078 508 \n",
+       "Q 2331 650 2450 897 \n",
+       "Q 2541 1088 2541 1459 \n",
+       "L 2541 1666 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-6e\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 928 3319 \n",
+       "L 928 2847 \n",
+       "Q 1294 3394 1984 3394 \n",
+       "Q 2284 3394 2536 3286 \n",
+       "Q 2788 3178 2913 3003 \n",
+       "Q 3038 2828 3088 2588 \n",
+       "Q 3119 2431 3119 2041 \n",
+       "L 3119 0 \n",
+       "L 2556 0 \n",
+       "L 2556 2019 \n",
+       "Q 2556 2363 2490 2533 \n",
+       "Q 2425 2703 2258 2804 \n",
+       "Q 2091 2906 1866 2906 \n",
+       "Q 1506 2906 1245 2678 \n",
+       "Q 984 2450 984 1813 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-65\" d=\"M 2694 1069 \n",
+       "L 3275 997 \n",
+       "Q 3138 488 2766 206 \n",
+       "Q 2394 -75 1816 -75 \n",
+       "Q 1088 -75 661 373 \n",
+       "Q 234 822 234 1631 \n",
+       "Q 234 2469 665 2931 \n",
+       "Q 1097 3394 1784 3394 \n",
+       "Q 2450 3394 2872 2941 \n",
+       "Q 3294 2488 3294 1666 \n",
+       "Q 3294 1616 3291 1516 \n",
+       "L 816 1516 \n",
+       "Q 847 969 1125 678 \n",
+       "Q 1403 388 1819 388 \n",
+       "Q 2128 388 2347 550 \n",
+       "Q 2566 713 2694 1069 \n",
+       "z\n",
+       "M 847 1978 \n",
+       "L 2700 1978 \n",
+       "Q 2663 2397 2488 2606 \n",
+       "Q 2219 2931 1791 2931 \n",
+       "Q 1403 2931 1139 2672 \n",
+       "Q 875 2413 847 1978 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-6c\"/>\n",
+       "       <use xlink:href=\"#ArialMT-61\" x=\"22.216797\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6e\" x=\"77.832031\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"133.447266\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_2\">\n",
+       "     <g id=\"line2d_2\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m95cc94c80d\" x=\"164.077469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_2\">\n",
+       "      <!-- road_edge -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(145.78302 245.665754) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-72\" d=\"M 416 0 \n",
+       "L 416 3319 \n",
+       "L 922 3319 \n",
+       "L 922 2816 \n",
+       "Q 1116 3169 1280 3281 \n",
+       "Q 1444 3394 1641 3394 \n",
+       "Q 1925 3394 2219 3213 \n",
+       "L 2025 2691 \n",
+       "Q 1819 2813 1613 2813 \n",
+       "Q 1428 2813 1281 2702 \n",
+       "Q 1134 2591 1072 2394 \n",
+       "Q 978 2094 978 1738 \n",
+       "L 978 0 \n",
+       "L 416 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-6f\" d=\"M 213 1659 \n",
+       "Q 213 2581 725 3025 \n",
+       "Q 1153 3394 1769 3394 \n",
+       "Q 2453 3394 2887 2945 \n",
+       "Q 3322 2497 3322 1706 \n",
+       "Q 3322 1066 3130 698 \n",
+       "Q 2938 331 2570 128 \n",
+       "Q 2203 -75 1769 -75 \n",
+       "Q 1072 -75 642 372 \n",
+       "Q 213 819 213 1659 \n",
+       "z\n",
+       "M 791 1659 \n",
+       "Q 791 1022 1069 705 \n",
+       "Q 1347 388 1769 388 \n",
+       "Q 2188 388 2466 706 \n",
+       "Q 2744 1025 2744 1678 \n",
+       "Q 2744 2294 2464 2611 \n",
+       "Q 2184 2928 1769 2928 \n",
+       "Q 1347 2928 1069 2612 \n",
+       "Q 791 2297 791 1659 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-64\" d=\"M 2575 0 \n",
+       "L 2575 419 \n",
+       "Q 2259 -75 1647 -75 \n",
+       "Q 1250 -75 917 144 \n",
+       "Q 584 363 401 755 \n",
+       "Q 219 1147 219 1656 \n",
+       "Q 219 2153 384 2558 \n",
+       "Q 550 2963 881 3178 \n",
+       "Q 1213 3394 1622 3394 \n",
+       "Q 1922 3394 2156 3267 \n",
+       "Q 2391 3141 2538 2938 \n",
+       "L 2538 4581 \n",
+       "L 3097 4581 \n",
+       "L 3097 0 \n",
+       "L 2575 0 \n",
+       "z\n",
+       "M 797 1656 \n",
+       "Q 797 1019 1065 703 \n",
+       "Q 1334 388 1700 388 \n",
+       "Q 2069 388 2326 689 \n",
+       "Q 2584 991 2584 1609 \n",
+       "Q 2584 2291 2321 2609 \n",
+       "Q 2059 2928 1675 2928 \n",
+       "Q 1300 2928 1048 2622 \n",
+       "Q 797 2316 797 1656 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-5f\" d=\"M -97 -1272 \n",
+       "L -97 -866 \n",
+       "L 3631 -866 \n",
+       "L 3631 -1272 \n",
+       "L -97 -1272 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-67\" d=\"M 319 -275 \n",
+       "L 866 -356 \n",
+       "Q 900 -609 1056 -725 \n",
+       "Q 1266 -881 1628 -881 \n",
+       "Q 2019 -881 2231 -725 \n",
+       "Q 2444 -569 2519 -288 \n",
+       "Q 2563 -116 2559 434 \n",
+       "Q 2191 0 1641 0 \n",
+       "Q 956 0 581 494 \n",
+       "Q 206 988 206 1678 \n",
+       "Q 206 2153 378 2554 \n",
+       "Q 550 2956 876 3175 \n",
+       "Q 1203 3394 1644 3394 \n",
+       "Q 2231 3394 2613 2919 \n",
+       "L 2613 3319 \n",
+       "L 3131 3319 \n",
+       "L 3131 450 \n",
+       "Q 3131 -325 2973 -648 \n",
+       "Q 2816 -972 2473 -1159 \n",
+       "Q 2131 -1347 1631 -1347 \n",
+       "Q 1038 -1347 672 -1080 \n",
+       "Q 306 -813 319 -275 \n",
+       "z\n",
+       "M 784 1719 \n",
+       "Q 784 1066 1043 766 \n",
+       "Q 1303 466 1694 466 \n",
+       "Q 2081 466 2343 764 \n",
+       "Q 2606 1063 2606 1700 \n",
+       "Q 2606 2309 2336 2618 \n",
+       "Q 2066 2928 1684 2928 \n",
+       "Q 1309 2928 1046 2623 \n",
+       "Q 784 2319 784 1719 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-72\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6f\" x=\"33.300781\"/>\n",
+       "       <use xlink:href=\"#ArialMT-61\" x=\"88.916016\"/>\n",
+       "       <use xlink:href=\"#ArialMT-64\" x=\"144.53125\"/>\n",
+       "       <use xlink:href=\"#ArialMT-5f\" x=\"200.146484\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"255.761719\"/>\n",
+       "       <use xlink:href=\"#ArialMT-64\" x=\"311.376953\"/>\n",
+       "       <use xlink:href=\"#ArialMT-67\" x=\"366.992188\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"422.607422\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_3\">\n",
+       "     <g id=\"line2d_3\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m95cc94c80d\" x=\"253.357469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_3\">\n",
+       "      <!-- road_line -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(237.970052 239.951955) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-69\" d=\"M 425 3934 \n",
+       "L 425 4581 \n",
+       "L 988 4581 \n",
+       "L 988 3934 \n",
+       "L 425 3934 \n",
+       "z\n",
+       "M 425 0 \n",
+       "L 425 3319 \n",
+       "L 988 3319 \n",
+       "L 988 0 \n",
+       "L 425 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-72\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6f\" x=\"33.300781\"/>\n",
+       "       <use xlink:href=\"#ArialMT-61\" x=\"88.916016\"/>\n",
+       "       <use xlink:href=\"#ArialMT-64\" x=\"144.53125\"/>\n",
+       "       <use xlink:href=\"#ArialMT-5f\" x=\"200.146484\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6c\" x=\"255.761719\"/>\n",
+       "       <use xlink:href=\"#ArialMT-69\" x=\"277.978516\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6e\" x=\"300.195312\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"355.810547\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_4\">\n",
+       "     <g id=\"line2d_4\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m95cc94c80d\" x=\"342.637469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_4\">\n",
+       "      <!-- speed_bump -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(320.115157 254.221746) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-73\" d=\"M 197 991 \n",
+       "L 753 1078 \n",
+       "Q 800 744 1014 566 \n",
+       "Q 1228 388 1613 388 \n",
+       "Q 2000 388 2187 545 \n",
+       "Q 2375 703 2375 916 \n",
+       "Q 2375 1106 2209 1216 \n",
+       "Q 2094 1291 1634 1406 \n",
+       "Q 1016 1563 777 1677 \n",
+       "Q 538 1791 414 1992 \n",
+       "Q 291 2194 291 2438 \n",
+       "Q 291 2659 392 2848 \n",
+       "Q 494 3038 669 3163 \n",
+       "Q 800 3259 1026 3326 \n",
+       "Q 1253 3394 1513 3394 \n",
+       "Q 1903 3394 2198 3281 \n",
+       "Q 2494 3169 2634 2976 \n",
+       "Q 2775 2784 2828 2463 \n",
+       "L 2278 2388 \n",
+       "Q 2241 2644 2061 2787 \n",
+       "Q 1881 2931 1553 2931 \n",
+       "Q 1166 2931 1000 2803 \n",
+       "Q 834 2675 834 2503 \n",
+       "Q 834 2394 903 2306 \n",
+       "Q 972 2216 1119 2156 \n",
+       "Q 1203 2125 1616 2013 \n",
+       "Q 2213 1853 2448 1751 \n",
+       "Q 2684 1650 2818 1456 \n",
+       "Q 2953 1263 2953 975 \n",
+       "Q 2953 694 2789 445 \n",
+       "Q 2625 197 2315 61 \n",
+       "Q 2006 -75 1616 -75 \n",
+       "Q 969 -75 630 194 \n",
+       "Q 291 463 197 991 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-70\" d=\"M 422 -1272 \n",
+       "L 422 3319 \n",
+       "L 934 3319 \n",
+       "L 934 2888 \n",
+       "Q 1116 3141 1344 3267 \n",
+       "Q 1572 3394 1897 3394 \n",
+       "Q 2322 3394 2647 3175 \n",
+       "Q 2972 2956 3137 2557 \n",
+       "Q 3303 2159 3303 1684 \n",
+       "Q 3303 1175 3120 767 \n",
+       "Q 2938 359 2589 142 \n",
+       "Q 2241 -75 1856 -75 \n",
+       "Q 1575 -75 1351 44 \n",
+       "Q 1128 163 984 344 \n",
+       "L 984 -1272 \n",
+       "L 422 -1272 \n",
+       "z\n",
+       "M 931 1641 \n",
+       "Q 931 1000 1190 694 \n",
+       "Q 1450 388 1819 388 \n",
+       "Q 2194 388 2461 705 \n",
+       "Q 2728 1022 2728 1688 \n",
+       "Q 2728 2322 2467 2637 \n",
+       "Q 2206 2953 1844 2953 \n",
+       "Q 1484 2953 1207 2617 \n",
+       "Q 931 2281 931 1641 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-62\" d=\"M 941 0 \n",
+       "L 419 0 \n",
+       "L 419 4581 \n",
+       "L 981 4581 \n",
+       "L 981 2947 \n",
+       "Q 1338 3394 1891 3394 \n",
+       "Q 2197 3394 2470 3270 \n",
+       "Q 2744 3147 2920 2923 \n",
+       "Q 3097 2700 3197 2384 \n",
+       "Q 3297 2069 3297 1709 \n",
+       "Q 3297 856 2875 390 \n",
+       "Q 2453 -75 1863 -75 \n",
+       "Q 1275 -75 941 416 \n",
+       "L 941 0 \n",
+       "z\n",
+       "M 934 1684 \n",
+       "Q 934 1088 1097 822 \n",
+       "Q 1363 388 1816 388 \n",
+       "Q 2184 388 2453 708 \n",
+       "Q 2722 1028 2722 1663 \n",
+       "Q 2722 2313 2464 2622 \n",
+       "Q 2206 2931 1841 2931 \n",
+       "Q 1472 2931 1203 2611 \n",
+       "Q 934 2291 934 1684 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-75\" d=\"M 2597 0 \n",
+       "L 2597 488 \n",
+       "Q 2209 -75 1544 -75 \n",
+       "Q 1250 -75 995 37 \n",
+       "Q 741 150 617 320 \n",
+       "Q 494 491 444 738 \n",
+       "Q 409 903 409 1263 \n",
+       "L 409 3319 \n",
+       "L 972 3319 \n",
+       "L 972 1478 \n",
+       "Q 972 1038 1006 884 \n",
+       "Q 1059 663 1231 536 \n",
+       "Q 1403 409 1656 409 \n",
+       "Q 1909 409 2131 539 \n",
+       "Q 2353 669 2445 892 \n",
+       "Q 2538 1116 2538 1541 \n",
+       "L 2538 3319 \n",
+       "L 3100 3319 \n",
+       "L 3100 0 \n",
+       "L 2597 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-6d\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 925 3319 \n",
+       "L 925 2853 \n",
+       "Q 1081 3097 1340 3245 \n",
+       "Q 1600 3394 1931 3394 \n",
+       "Q 2300 3394 2536 3241 \n",
+       "Q 2772 3088 2869 2813 \n",
+       "Q 3263 3394 3894 3394 \n",
+       "Q 4388 3394 4653 3120 \n",
+       "Q 4919 2847 4919 2278 \n",
+       "L 4919 0 \n",
+       "L 4359 0 \n",
+       "L 4359 2091 \n",
+       "Q 4359 2428 4304 2576 \n",
+       "Q 4250 2725 4106 2815 \n",
+       "Q 3963 2906 3769 2906 \n",
+       "Q 3419 2906 3187 2673 \n",
+       "Q 2956 2441 2956 1928 \n",
+       "L 2956 0 \n",
+       "L 2394 0 \n",
+       "L 2394 2156 \n",
+       "Q 2394 2531 2256 2718 \n",
+       "Q 2119 2906 1806 2906 \n",
+       "Q 1569 2906 1367 2781 \n",
+       "Q 1166 2656 1075 2415 \n",
+       "Q 984 2175 984 1722 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-73\"/>\n",
+       "       <use xlink:href=\"#ArialMT-70\" x=\"50\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"105.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-65\" x=\"161.230469\"/>\n",
+       "       <use xlink:href=\"#ArialMT-64\" x=\"216.845703\"/>\n",
+       "       <use xlink:href=\"#ArialMT-5f\" x=\"272.460938\"/>\n",
+       "       <use xlink:href=\"#ArialMT-62\" x=\"328.076172\"/>\n",
+       "       <use xlink:href=\"#ArialMT-75\" x=\"383.691406\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6d\" x=\"439.306641\"/>\n",
+       "       <use xlink:href=\"#ArialMT-70\" x=\"522.607422\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_5\">\n",
+       "     <g id=\"line2d_5\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m95cc94c80d\" x=\"431.917469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_5\">\n",
+       "      <!-- stop_sign -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(415.767365 241.377063) rotate(-45) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-74\" d=\"M 1650 503 \n",
+       "L 1731 6 \n",
+       "Q 1494 -44 1306 -44 \n",
+       "Q 1000 -44 831 53 \n",
+       "Q 663 150 594 308 \n",
+       "Q 525 466 525 972 \n",
+       "L 525 2881 \n",
+       "L 113 2881 \n",
+       "L 113 3319 \n",
+       "L 525 3319 \n",
+       "L 525 4141 \n",
+       "L 1084 4478 \n",
+       "L 1084 3319 \n",
+       "L 1650 3319 \n",
+       "L 1650 2881 \n",
+       "L 1084 2881 \n",
+       "L 1084 941 \n",
+       "Q 1084 700 1114 631 \n",
+       "Q 1144 563 1211 522 \n",
+       "Q 1278 481 1403 481 \n",
+       "Q 1497 481 1650 503 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-73\"/>\n",
+       "       <use xlink:href=\"#ArialMT-74\" x=\"50\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6f\" x=\"77.783203\"/>\n",
+       "       <use xlink:href=\"#ArialMT-70\" x=\"133.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-5f\" x=\"189.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-73\" x=\"244.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-69\" x=\"294.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-67\" x=\"316.845703\"/>\n",
+       "       <use xlink:href=\"#ArialMT-6e\" x=\"372.460938\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_2\">\n",
+       "    <g id=\"ytick_1\">\n",
+       "     <g id=\"line2d_6\">\n",
+       "      <defs>\n",
+       "       <path id=\"m7a50c448be\" d=\"M 0 0 \n",
+       "L -6 0 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m7a50c448be\" x=\"30.157469\" y=\"189.129188\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_6\">\n",
+       "      <!-- 0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(13.928734 193.459664) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-30\" d=\"M 266 2259 \n",
+       "Q 266 3072 433 3567 \n",
+       "Q 600 4063 929 4331 \n",
+       "Q 1259 4600 1759 4600 \n",
+       "Q 2128 4600 2406 4451 \n",
+       "Q 2684 4303 2865 4023 \n",
+       "Q 3047 3744 3150 3342 \n",
+       "Q 3253 2941 3253 2259 \n",
+       "Q 3253 1453 3087 958 \n",
+       "Q 2922 463 2592 192 \n",
+       "Q 2263 -78 1759 -78 \n",
+       "Q 1097 -78 719 397 \n",
+       "Q 266 969 266 2259 \n",
+       "z\n",
+       "M 844 2259 \n",
+       "Q 844 1131 1108 757 \n",
+       "Q 1372 384 1759 384 \n",
+       "Q 2147 384 2411 759 \n",
+       "Q 2675 1134 2675 2259 \n",
+       "Q 2675 3391 2411 3762 \n",
+       "Q 2147 4134 1753 4134 \n",
+       "Q 1366 4134 1134 3806 \n",
+       "Q 844 3388 844 2259 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_2\">\n",
+       "     <g id=\"line2d_7\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m7a50c448be\" x=\"30.157469\" y=\"146.318377\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_7\">\n",
+       "      <!-- 20 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 150.648853) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-32\" d=\"M 3222 541 \n",
+       "L 3222 0 \n",
+       "L 194 0 \n",
+       "Q 188 203 259 391 \n",
+       "Q 375 700 629 1000 \n",
+       "Q 884 1300 1366 1694 \n",
+       "Q 2113 2306 2375 2664 \n",
+       "Q 2638 3022 2638 3341 \n",
+       "Q 2638 3675 2398 3904 \n",
+       "Q 2159 4134 1775 4134 \n",
+       "Q 1369 4134 1125 3890 \n",
+       "Q 881 3647 878 3216 \n",
+       "L 300 3275 \n",
+       "Q 359 3922 746 4261 \n",
+       "Q 1134 4600 1788 4600 \n",
+       "Q 2447 4600 2831 4234 \n",
+       "Q 3216 3869 3216 3328 \n",
+       "Q 3216 3053 3103 2787 \n",
+       "Q 2991 2522 2730 2228 \n",
+       "Q 2469 1934 1863 1422 \n",
+       "Q 1356 997 1212 845 \n",
+       "Q 1069 694 975 541 \n",
+       "L 3222 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-32\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_3\">\n",
+       "     <g id=\"line2d_8\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m7a50c448be\" x=\"30.157469\" y=\"103.507566\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_8\">\n",
+       "      <!-- 40 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 107.838042) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-34\" d=\"M 2069 0 \n",
+       "L 2069 1097 \n",
+       "L 81 1097 \n",
+       "L 81 1613 \n",
+       "L 2172 4581 \n",
+       "L 2631 4581 \n",
+       "L 2631 1613 \n",
+       "L 3250 1613 \n",
+       "L 3250 1097 \n",
+       "L 2631 1097 \n",
+       "L 2631 0 \n",
+       "L 2069 0 \n",
+       "z\n",
+       "M 2069 1613 \n",
+       "L 2069 3678 \n",
+       "L 634 1613 \n",
+       "L 2069 1613 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-34\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_4\">\n",
+       "     <g id=\"line2d_9\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m7a50c448be\" x=\"30.157469\" y=\"60.696755\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_9\">\n",
+       "      <!-- 60 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(7.2 65.027232) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-36\" d=\"M 3184 3459 \n",
+       "L 2625 3416 \n",
+       "Q 2550 3747 2413 3897 \n",
+       "Q 2184 4138 1850 4138 \n",
+       "Q 1581 4138 1378 3988 \n",
+       "Q 1113 3794 959 3422 \n",
+       "Q 806 3050 800 2363 \n",
+       "Q 1003 2672 1297 2822 \n",
+       "Q 1591 2972 1913 2972 \n",
+       "Q 2475 2972 2870 2558 \n",
+       "Q 3266 2144 3266 1488 \n",
+       "Q 3266 1056 3080 686 \n",
+       "Q 2894 316 2569 119 \n",
+       "Q 2244 -78 1831 -78 \n",
+       "Q 1128 -78 684 439 \n",
+       "Q 241 956 241 2144 \n",
+       "Q 241 3472 731 4075 \n",
+       "Q 1159 4600 1884 4600 \n",
+       "Q 2425 4600 2770 4297 \n",
+       "Q 3116 3994 3184 3459 \n",
+       "z\n",
+       "M 888 1484 \n",
+       "Q 888 1194 1011 928 \n",
+       "Q 1134 663 1356 523 \n",
+       "Q 1578 384 1822 384 \n",
+       "Q 2178 384 2434 671 \n",
+       "Q 2691 959 2691 1453 \n",
+       "Q 2691 1928 2437 2201 \n",
+       "Q 2184 2475 1800 2475 \n",
+       "Q 1419 2475 1153 2201 \n",
+       "Q 888 1928 888 1484 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-36\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"patch_3\">\n",
+       "    <path d=\"M 52.477469 189.129188 \n",
+       "L 97.117469 189.129188 \n",
+       "L 97.117469 30.729188 \n",
+       "L 52.477469 30.729188 \n",
+       "z\n",
+       "\" clip-path=\"url(#pf187f43ab3)\" style=\"fill: #c44e52; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_4\">\n",
+       "    <path d=\"M 141.757469 189.129188 \n",
+       "L 186.397469 189.129188 \n",
+       "L 186.397469 107.788647 \n",
+       "L 141.757469 107.788647 \n",
+       "z\n",
+       "\" clip-path=\"url(#pf187f43ab3)\" style=\"fill: #55a868; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_5\">\n",
+       "    <path d=\"M 231.037469 189.129188 \n",
+       "L 275.677469 189.129188 \n",
+       "L 275.677469 172.004863 \n",
+       "L 231.037469 172.004863 \n",
+       "z\n",
+       "\" clip-path=\"url(#pf187f43ab3)\" style=\"fill: #4c72b0; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_6\">\n",
+       "    <path d=\"M 320.317469 189.129188 \n",
+       "L 364.957469 189.129188 \n",
+       "L 364.957469 172.004863 \n",
+       "L 320.317469 172.004863 \n",
+       "z\n",
+       "\" clip-path=\"url(#pf187f43ab3)\" style=\"fill: #ccb974; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_7\">\n",
+       "    <path d=\"M 409.597469 189.129188 \n",
+       "L 454.237469 189.129188 \n",
+       "L 454.237469 176.285944 \n",
+       "L 409.597469 176.285944 \n",
+       "z\n",
+       "\" clip-path=\"url(#pf187f43ab3)\" style=\"fill: #64b5cd; stroke: #ffffff; stroke-linejoin: miter\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_8\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 30.157469 22.809188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_9\">\n",
+       "    <path d=\"M 476.557469 189.129188 \n",
+       "L 476.557469 22.809188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_10\">\n",
+       "    <path d=\"M 30.157469 189.129188 \n",
+       "L 476.557469 189.129188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_11\">\n",
+       "    <path d=\"M 30.157469 22.809188 \n",
+       "L 476.557469 22.809188 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"text_10\">\n",
+       "    <!-- Distribution of road points in traffic scene. Total # points: 134 -->\n",
+       "    <g style=\"fill: #262626\" transform=\"translate(77.140562 16.809188) scale(0.132 -0.132)\">\n",
+       "     <defs>\n",
+       "      <path id=\"ArialMT-44\" d=\"M 494 0 \n",
+       "L 494 4581 \n",
+       "L 2072 4581 \n",
+       "Q 2606 4581 2888 4516 \n",
+       "Q 3281 4425 3559 4188 \n",
+       "Q 3922 3881 4101 3404 \n",
+       "Q 4281 2928 4281 2316 \n",
+       "Q 4281 1794 4159 1391 \n",
+       "Q 4038 988 3847 723 \n",
+       "Q 3656 459 3429 307 \n",
+       "Q 3203 156 2883 78 \n",
+       "Q 2563 0 2147 0 \n",
+       "L 494 0 \n",
+       "z\n",
+       "M 1100 541 \n",
+       "L 2078 541 \n",
+       "Q 2531 541 2789 625 \n",
+       "Q 3047 709 3200 863 \n",
+       "Q 3416 1078 3536 1442 \n",
+       "Q 3656 1806 3656 2325 \n",
+       "Q 3656 3044 3420 3430 \n",
+       "Q 3184 3816 2847 3947 \n",
+       "Q 2603 4041 2063 4041 \n",
+       "L 1100 4041 \n",
+       "L 1100 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-20\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-66\" d=\"M 556 0 \n",
+       "L 556 2881 \n",
+       "L 59 2881 \n",
+       "L 59 3319 \n",
+       "L 556 3319 \n",
+       "L 556 3672 \n",
+       "Q 556 4006 616 4169 \n",
+       "Q 697 4388 901 4523 \n",
+       "Q 1106 4659 1475 4659 \n",
+       "Q 1713 4659 2000 4603 \n",
+       "L 1916 4113 \n",
+       "Q 1741 4144 1584 4144 \n",
+       "Q 1328 4144 1222 4034 \n",
+       "Q 1116 3925 1116 3625 \n",
+       "L 1116 3319 \n",
+       "L 1763 3319 \n",
+       "L 1763 2881 \n",
+       "L 1116 2881 \n",
+       "L 1116 0 \n",
+       "L 556 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-63\" d=\"M 2588 1216 \n",
+       "L 3141 1144 \n",
+       "Q 3050 572 2676 248 \n",
+       "Q 2303 -75 1759 -75 \n",
+       "Q 1078 -75 664 370 \n",
+       "Q 250 816 250 1647 \n",
+       "Q 250 2184 428 2587 \n",
+       "Q 606 2991 970 3192 \n",
+       "Q 1334 3394 1763 3394 \n",
+       "Q 2303 3394 2647 3120 \n",
+       "Q 2991 2847 3088 2344 \n",
+       "L 2541 2259 \n",
+       "Q 2463 2594 2264 2762 \n",
+       "Q 2066 2931 1784 2931 \n",
+       "Q 1359 2931 1093 2626 \n",
+       "Q 828 2322 828 1663 \n",
+       "Q 828 994 1084 691 \n",
+       "Q 1341 388 1753 388 \n",
+       "Q 2084 388 2306 591 \n",
+       "Q 2528 794 2588 1216 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-2e\" d=\"M 581 0 \n",
+       "L 581 641 \n",
+       "L 1222 641 \n",
+       "L 1222 0 \n",
+       "L 581 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-54\" d=\"M 1659 0 \n",
+       "L 1659 4041 \n",
+       "L 150 4041 \n",
+       "L 150 4581 \n",
+       "L 3781 4581 \n",
+       "L 3781 4041 \n",
+       "L 2266 4041 \n",
+       "L 2266 0 \n",
+       "L 1659 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-23\" d=\"M 322 -78 \n",
+       "L 594 1253 \n",
+       "L 66 1253 \n",
+       "L 66 1719 \n",
+       "L 688 1719 \n",
+       "L 919 2853 \n",
+       "L 66 2853 \n",
+       "L 66 3319 \n",
+       "L 1013 3319 \n",
+       "L 1284 4659 \n",
+       "L 1753 4659 \n",
+       "L 1481 3319 \n",
+       "L 2466 3319 \n",
+       "L 2738 4659 \n",
+       "L 3209 4659 \n",
+       "L 2938 3319 \n",
+       "L 3478 3319 \n",
+       "L 3478 2853 \n",
+       "L 2844 2853 \n",
+       "L 2609 1719 \n",
+       "L 3478 1719 \n",
+       "L 3478 1253 \n",
+       "L 2516 1253 \n",
+       "L 2244 -78 \n",
+       "L 1775 -78 \n",
+       "L 2044 1253 \n",
+       "L 1063 1253 \n",
+       "L 791 -78 \n",
+       "L 322 -78 \n",
+       "z\n",
+       "M 1156 1719 \n",
+       "L 2138 1719 \n",
+       "L 2372 2853 \n",
+       "L 1388 2853 \n",
+       "L 1156 1719 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-3a\" d=\"M 578 2678 \n",
+       "L 578 3319 \n",
+       "L 1219 3319 \n",
+       "L 1219 2678 \n",
+       "L 578 2678 \n",
+       "z\n",
+       "M 578 0 \n",
+       "L 578 641 \n",
+       "L 1219 641 \n",
+       "L 1219 0 \n",
+       "L 578 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-31\" d=\"M 2384 0 \n",
+       "L 1822 0 \n",
+       "L 1822 3584 \n",
+       "Q 1619 3391 1289 3197 \n",
+       "Q 959 3003 697 2906 \n",
+       "L 697 3450 \n",
+       "Q 1169 3672 1522 3987 \n",
+       "Q 1875 4303 2022 4600 \n",
+       "L 2384 4600 \n",
+       "L 2384 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      <path id=\"ArialMT-33\" d=\"M 269 1209 \n",
+       "L 831 1284 \n",
+       "Q 928 806 1161 595 \n",
+       "Q 1394 384 1728 384 \n",
+       "Q 2125 384 2398 659 \n",
+       "Q 2672 934 2672 1341 \n",
+       "Q 2672 1728 2419 1979 \n",
+       "Q 2166 2231 1775 2231 \n",
+       "Q 1616 2231 1378 2169 \n",
+       "L 1441 2663 \n",
+       "Q 1497 2656 1531 2656 \n",
+       "Q 1891 2656 2178 2843 \n",
+       "Q 2466 3031 2466 3422 \n",
+       "Q 2466 3731 2256 3934 \n",
+       "Q 2047 4138 1716 4138 \n",
+       "Q 1388 4138 1169 3931 \n",
+       "Q 950 3725 888 3313 \n",
+       "L 325 3413 \n",
+       "Q 428 3978 793 4289 \n",
+       "Q 1159 4600 1703 4600 \n",
+       "Q 2078 4600 2393 4439 \n",
+       "Q 2709 4278 2876 4000 \n",
+       "Q 3044 3722 3044 3409 \n",
+       "Q 3044 3113 2884 2869 \n",
+       "Q 2725 2625 2413 2481 \n",
+       "Q 2819 2388 3044 2092 \n",
+       "Q 3269 1797 3269 1353 \n",
+       "Q 3269 753 2831 336 \n",
+       "Q 2394 -81 1725 -81 \n",
+       "Q 1122 -81 723 278 \n",
+       "Q 325 638 269 1209 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "     </defs>\n",
+       "     <use xlink:href=\"#ArialMT-44\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"72.216797\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"94.433594\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"144.433594\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"172.216797\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"205.517578\"/>\n",
+       "     <use xlink:href=\"#ArialMT-62\" x=\"227.734375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-75\" x=\"283.349609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"338.964844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"366.748047\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"388.964844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"444.580078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"500.195312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"527.978516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"583.59375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"611.376953\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"639.160156\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"672.460938\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"728.076172\"/>\n",
+       "     <use xlink:href=\"#ArialMT-64\" x=\"783.691406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"839.306641\"/>\n",
+       "     <use xlink:href=\"#ArialMT-70\" x=\"867.089844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"922.705078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"978.320312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"1000.537109\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"1056.152344\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"1083.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1133.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"1161.71875\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"1183.935547\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1239.550781\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"1267.333984\"/>\n",
+       "     <use xlink:href=\"#ArialMT-72\" x=\"1295.117188\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"1328.417969\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"1384.033203\"/>\n",
+       "     <use xlink:href=\"#ArialMT-66\" x=\"1410.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"1437.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"1460.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1510.066406\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"1537.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-63\" x=\"1587.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"1637.849609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"1693.464844\"/>\n",
+       "     <use xlink:href=\"#ArialMT-65\" x=\"1749.080078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-2e\" x=\"1804.695312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"1832.478516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-54\" x=\"1858.511719\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"1908.470703\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"1964.085938\"/>\n",
+       "     <use xlink:href=\"#ArialMT-61\" x=\"1991.869141\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6c\" x=\"2047.484375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2069.701172\"/>\n",
+       "     <use xlink:href=\"#ArialMT-23\" x=\"2097.484375\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2153.099609\"/>\n",
+       "     <use xlink:href=\"#ArialMT-70\" x=\"2180.882812\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6f\" x=\"2236.498047\"/>\n",
+       "     <use xlink:href=\"#ArialMT-69\" x=\"2292.113281\"/>\n",
+       "     <use xlink:href=\"#ArialMT-6e\" x=\"2314.330078\"/>\n",
+       "     <use xlink:href=\"#ArialMT-74\" x=\"2369.945312\"/>\n",
+       "     <use xlink:href=\"#ArialMT-73\" x=\"2397.728516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-3a\" x=\"2447.728516\"/>\n",
+       "     <use xlink:href=\"#ArialMT-20\" x=\"2475.511719\"/>\n",
+       "     <use xlink:href=\"#ArialMT-31\" x=\"2503.294922\"/>\n",
+       "     <use xlink:href=\"#ArialMT-33\" x=\"2558.910156\"/>\n",
+       "     <use xlink:href=\"#ArialMT-34\" x=\"2614.525391\"/>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "  </g>\n",
+       " </g>\n",
+       " <defs>\n",
+       "  <clipPath id=\"pf187f43ab3\">\n",
+       "   <rect x=\"30.157469\" y=\"22.809188\" width=\"446.4\" height=\"166.32\"/>\n",
+       "  </clipPath>\n",
+       " </defs>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<Figure size 800x300 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "pd.Series(\n",
+    "    [\n",
+    "        traffic_scene['roads'][idx]['type']\n",
+    "        for idx in range(len(traffic_scene['roads']))\n",
+    "    ]\n",
+    ").value_counts().plot(kind='bar', rot=45, color=cmap);\n",
+    "plt.title(f'Distribution of road points in traffic scene. Total # points: {len(traffic_scene[\"roads\"])}')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### In-Depth: Road Objects\n",
+    "\n",
+    "This is a list of different road objects in the traffic scene. For each road object, we have information about its position, velocity, size, in which direction it's heading, whether it's a valid object, the type, and the final position of the vehicle."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['position', 'width', 'length', 'heading', 'velocity', 'valid', 'goalPosition', 'type'])"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Take the first object\n",
+    "idx = 0\n",
+    "\n",
+    "# For each object, we have this information:\n",
+    "traffic_scene['objects'][idx].keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "    {\n",
+      "        \"x\": 9037.7138671875,\n",
+      "        \"y\": -2720.373779296875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9037.7607421875,\n",
+      "        \"y\": -2720.306640625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9037.822265625,\n",
+      "        \"y\": -2720.217529296875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9037.8916015625,\n",
+      "        \"y\": -2720.146240234375\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9037.9482421875,\n",
+      "        \"y\": -2720.070068359375\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9038.01953125,\n",
+      "        \"y\": -2719.994384765625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9038.1005859375,\n",
+      "        \"y\": -2719.903076171875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9038.1953125,\n",
+      "        \"y\": -2719.830810546875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9038.279296875,\n",
+      "        \"y\": -2719.74462890625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 9038.3564453125,\n",
+      "        \"y\": -2719.674560546875\n",
+      "    }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Position contains the (x, y) coordinates for the vehicle at every time step\n",
+    "print(json.dumps(traffic_scene['objects'][idx]['position'][:10], indent=4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.6877052187919617, 0.6777269244194031)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Width and length together make the size of the object, and is used to see if there is a collision \n",
+    "traffic_scene['objects'][idx]['width'], traffic_scene['objects'][idx]['length'] "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An object's heading refers to the direction it is pointing or moving in. The default coordinate system in Nocturne is right-handed, where the positive x and y axes point to the right and downwards, respectively. In a right-handed coordinate system, 0 degrees is located on the x-axis and the angle increases counter-clockwise.\n",
+    "\n",
+    "Because the scene is created from the viewpoint of an ego driver, there may be instances where the heading of certain vehicles is not available. These cases are represented by the value `-10_000`, to indicate that these steps should be filtered out or are invalid."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       "  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"527.237328pt\" height=\"217.357641pt\" viewBox=\"0 0 527.237328 217.357641\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n",
+       " <metadata>\n",
+       "  <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n",
+       "   <cc:Work>\n",
+       "    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n",
+       "    <dc:date>2023-10-03T10:23:28.800884</dc:date>\n",
+       "    <dc:format>image/svg+xml</dc:format>\n",
+       "    <dc:creator>\n",
+       "     <cc:Agent>\n",
+       "      <dc:title>Matplotlib v3.8.0, https://matplotlib.org/</dc:title>\n",
+       "     </cc:Agent>\n",
+       "    </dc:creator>\n",
+       "   </cc:Work>\n",
+       "  </rdf:RDF>\n",
+       " </metadata>\n",
+       " <defs>\n",
+       "  <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n",
+       " </defs>\n",
+       " <g id=\"figure_1\">\n",
+       "  <g id=\"patch_1\">\n",
+       "   <path d=\"M 0 217.357641 \n",
+       "L 527.237328 217.357641 \n",
+       "L 527.237328 0 \n",
+       "L 0 0 \n",
+       "L 0 217.357641 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "  </g>\n",
+       "  <g id=\"axes_1\">\n",
+       "   <g id=\"patch_2\">\n",
+       "    <path d=\"M 73.637328 173.52 \n",
+       "L 520.037328 173.52 \n",
+       "L 520.037328 7.2 \n",
+       "L 73.637328 7.2 \n",
+       "L 73.637328 173.52 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_1\">\n",
+       "    <g id=\"xtick_1\">\n",
+       "     <g id=\"line2d_1\">\n",
+       "      <defs>\n",
+       "       <path id=\"m5295b56288\" d=\"M 0 0 \n",
+       "L 0 6 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5295b56288\" x=\"93.928237\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_1\">\n",
+       "      <!-- 0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(90.56387 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-30\" d=\"M 266 2259 \n",
+       "Q 266 3072 433 3567 \n",
+       "Q 600 4063 929 4331 \n",
+       "Q 1259 4600 1759 4600 \n",
+       "Q 2128 4600 2406 4451 \n",
+       "Q 2684 4303 2865 4023 \n",
+       "Q 3047 3744 3150 3342 \n",
+       "Q 3253 2941 3253 2259 \n",
+       "Q 3253 1453 3087 958 \n",
+       "Q 2922 463 2592 192 \n",
+       "Q 2263 -78 1759 -78 \n",
+       "Q 1097 -78 719 397 \n",
+       "Q 266 969 266 2259 \n",
+       "z\n",
+       "M 844 2259 \n",
+       "Q 844 1131 1108 757 \n",
+       "Q 1372 384 1759 384 \n",
+       "Q 2147 384 2411 759 \n",
+       "Q 2675 1134 2675 2259 \n",
+       "Q 2675 3391 2411 3762 \n",
+       "Q 2147 4134 1753 4134 \n",
+       "Q 1366 4134 1134 3806 \n",
+       "Q 844 3388 844 2259 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_2\">\n",
+       "     <g id=\"line2d_2\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5295b56288\" x=\"184.110055\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_2\">\n",
+       "      <!-- 20 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(177.381321 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-32\" d=\"M 3222 541 \n",
+       "L 3222 0 \n",
+       "L 194 0 \n",
+       "Q 188 203 259 391 \n",
+       "Q 375 700 629 1000 \n",
+       "Q 884 1300 1366 1694 \n",
+       "Q 2113 2306 2375 2664 \n",
+       "Q 2638 3022 2638 3341 \n",
+       "Q 2638 3675 2398 3904 \n",
+       "Q 2159 4134 1775 4134 \n",
+       "Q 1369 4134 1125 3890 \n",
+       "Q 881 3647 878 3216 \n",
+       "L 300 3275 \n",
+       "Q 359 3922 746 4261 \n",
+       "Q 1134 4600 1788 4600 \n",
+       "Q 2447 4600 2831 4234 \n",
+       "Q 3216 3869 3216 3328 \n",
+       "Q 3216 3053 3103 2787 \n",
+       "Q 2991 2522 2730 2228 \n",
+       "Q 2469 1934 1863 1422 \n",
+       "Q 1356 997 1212 845 \n",
+       "Q 1069 694 975 541 \n",
+       "L 3222 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-32\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_3\">\n",
+       "     <g id=\"line2d_3\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5295b56288\" x=\"274.291874\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_3\">\n",
+       "      <!-- 40 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(267.563139 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-34\" d=\"M 2069 0 \n",
+       "L 2069 1097 \n",
+       "L 81 1097 \n",
+       "L 81 1613 \n",
+       "L 2172 4581 \n",
+       "L 2631 4581 \n",
+       "L 2631 1613 \n",
+       "L 3250 1613 \n",
+       "L 3250 1097 \n",
+       "L 2631 1097 \n",
+       "L 2631 0 \n",
+       "L 2069 0 \n",
+       "z\n",
+       "M 2069 1613 \n",
+       "L 2069 3678 \n",
+       "L 634 1613 \n",
+       "L 2069 1613 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-34\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_4\">\n",
+       "     <g id=\"line2d_4\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5295b56288\" x=\"364.473692\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_4\">\n",
+       "      <!-- 60 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(357.744957 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-36\" d=\"M 3184 3459 \n",
+       "L 2625 3416 \n",
+       "Q 2550 3747 2413 3897 \n",
+       "Q 2184 4138 1850 4138 \n",
+       "Q 1581 4138 1378 3988 \n",
+       "Q 1113 3794 959 3422 \n",
+       "Q 806 3050 800 2363 \n",
+       "Q 1003 2672 1297 2822 \n",
+       "Q 1591 2972 1913 2972 \n",
+       "Q 2475 2972 2870 2558 \n",
+       "Q 3266 2144 3266 1488 \n",
+       "Q 3266 1056 3080 686 \n",
+       "Q 2894 316 2569 119 \n",
+       "Q 2244 -78 1831 -78 \n",
+       "Q 1128 -78 684 439 \n",
+       "Q 241 956 241 2144 \n",
+       "Q 241 3472 731 4075 \n",
+       "Q 1159 4600 1884 4600 \n",
+       "Q 2425 4600 2770 4297 \n",
+       "Q 3116 3994 3184 3459 \n",
+       "z\n",
+       "M 888 1484 \n",
+       "Q 888 1194 1011 928 \n",
+       "Q 1134 663 1356 523 \n",
+       "Q 1578 384 1822 384 \n",
+       "Q 2178 384 2434 671 \n",
+       "Q 2691 959 2691 1453 \n",
+       "Q 2691 1928 2437 2201 \n",
+       "Q 2184 2475 1800 2475 \n",
+       "Q 1419 2475 1153 2201 \n",
+       "Q 888 1928 888 1484 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-36\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_5\">\n",
+       "     <g id=\"line2d_5\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m5295b56288\" x=\"454.65551\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_5\">\n",
+       "      <!-- 80 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(447.926776 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-38\" d=\"M 1131 2484 \n",
+       "Q 781 2613 612 2850 \n",
+       "Q 444 3088 444 3419 \n",
+       "Q 444 3919 803 4259 \n",
+       "Q 1163 4600 1759 4600 \n",
+       "Q 2359 4600 2725 4251 \n",
+       "Q 3091 3903 3091 3403 \n",
+       "Q 3091 3084 2923 2848 \n",
+       "Q 2756 2613 2416 2484 \n",
+       "Q 2838 2347 3058 2040 \n",
+       "Q 3278 1734 3278 1309 \n",
+       "Q 3278 722 2862 322 \n",
+       "Q 2447 -78 1769 -78 \n",
+       "Q 1091 -78 675 323 \n",
+       "Q 259 725 259 1325 \n",
+       "Q 259 1772 486 2073 \n",
+       "Q 713 2375 1131 2484 \n",
+       "z\n",
+       "M 1019 3438 \n",
+       "Q 1019 3113 1228 2906 \n",
+       "Q 1438 2700 1772 2700 \n",
+       "Q 2097 2700 2305 2904 \n",
+       "Q 2513 3109 2513 3406 \n",
+       "Q 2513 3716 2298 3927 \n",
+       "Q 2084 4138 1766 4138 \n",
+       "Q 1444 4138 1231 3931 \n",
+       "Q 1019 3725 1019 3438 \n",
+       "z\n",
+       "M 838 1322 \n",
+       "Q 838 1081 952 856 \n",
+       "Q 1066 631 1291 507 \n",
+       "Q 1516 384 1775 384 \n",
+       "Q 2178 384 2440 643 \n",
+       "Q 2703 903 2703 1303 \n",
+       "Q 2703 1709 2433 1975 \n",
+       "Q 2163 2241 1756 2241 \n",
+       "Q 1359 2241 1098 1978 \n",
+       "Q 838 1716 838 1322 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-38\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"text_6\">\n",
+       "     <!-- Time step -->\n",
+       "     <g style=\"fill: #262626\" transform=\"translate(268.111859 207.534141) scale(0.132 -0.132)\">\n",
+       "      <defs>\n",
+       "       <path id=\"ArialMT-54\" d=\"M 1659 0 \n",
+       "L 1659 4041 \n",
+       "L 150 4041 \n",
+       "L 150 4581 \n",
+       "L 3781 4581 \n",
+       "L 3781 4041 \n",
+       "L 2266 4041 \n",
+       "L 2266 0 \n",
+       "L 1659 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-69\" d=\"M 425 3934 \n",
+       "L 425 4581 \n",
+       "L 988 4581 \n",
+       "L 988 3934 \n",
+       "L 425 3934 \n",
+       "z\n",
+       "M 425 0 \n",
+       "L 425 3319 \n",
+       "L 988 3319 \n",
+       "L 988 0 \n",
+       "L 425 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-6d\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 925 3319 \n",
+       "L 925 2853 \n",
+       "Q 1081 3097 1340 3245 \n",
+       "Q 1600 3394 1931 3394 \n",
+       "Q 2300 3394 2536 3241 \n",
+       "Q 2772 3088 2869 2813 \n",
+       "Q 3263 3394 3894 3394 \n",
+       "Q 4388 3394 4653 3120 \n",
+       "Q 4919 2847 4919 2278 \n",
+       "L 4919 0 \n",
+       "L 4359 0 \n",
+       "L 4359 2091 \n",
+       "Q 4359 2428 4304 2576 \n",
+       "Q 4250 2725 4106 2815 \n",
+       "Q 3963 2906 3769 2906 \n",
+       "Q 3419 2906 3187 2673 \n",
+       "Q 2956 2441 2956 1928 \n",
+       "L 2956 0 \n",
+       "L 2394 0 \n",
+       "L 2394 2156 \n",
+       "Q 2394 2531 2256 2718 \n",
+       "Q 2119 2906 1806 2906 \n",
+       "Q 1569 2906 1367 2781 \n",
+       "Q 1166 2656 1075 2415 \n",
+       "Q 984 2175 984 1722 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-65\" d=\"M 2694 1069 \n",
+       "L 3275 997 \n",
+       "Q 3138 488 2766 206 \n",
+       "Q 2394 -75 1816 -75 \n",
+       "Q 1088 -75 661 373 \n",
+       "Q 234 822 234 1631 \n",
+       "Q 234 2469 665 2931 \n",
+       "Q 1097 3394 1784 3394 \n",
+       "Q 2450 3394 2872 2941 \n",
+       "Q 3294 2488 3294 1666 \n",
+       "Q 3294 1616 3291 1516 \n",
+       "L 816 1516 \n",
+       "Q 847 969 1125 678 \n",
+       "Q 1403 388 1819 388 \n",
+       "Q 2128 388 2347 550 \n",
+       "Q 2566 713 2694 1069 \n",
+       "z\n",
+       "M 847 1978 \n",
+       "L 2700 1978 \n",
+       "Q 2663 2397 2488 2606 \n",
+       "Q 2219 2931 1791 2931 \n",
+       "Q 1403 2931 1139 2672 \n",
+       "Q 875 2413 847 1978 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-20\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-73\" d=\"M 197 991 \n",
+       "L 753 1078 \n",
+       "Q 800 744 1014 566 \n",
+       "Q 1228 388 1613 388 \n",
+       "Q 2000 388 2187 545 \n",
+       "Q 2375 703 2375 916 \n",
+       "Q 2375 1106 2209 1216 \n",
+       "Q 2094 1291 1634 1406 \n",
+       "Q 1016 1563 777 1677 \n",
+       "Q 538 1791 414 1992 \n",
+       "Q 291 2194 291 2438 \n",
+       "Q 291 2659 392 2848 \n",
+       "Q 494 3038 669 3163 \n",
+       "Q 800 3259 1026 3326 \n",
+       "Q 1253 3394 1513 3394 \n",
+       "Q 1903 3394 2198 3281 \n",
+       "Q 2494 3169 2634 2976 \n",
+       "Q 2775 2784 2828 2463 \n",
+       "L 2278 2388 \n",
+       "Q 2241 2644 2061 2787 \n",
+       "Q 1881 2931 1553 2931 \n",
+       "Q 1166 2931 1000 2803 \n",
+       "Q 834 2675 834 2503 \n",
+       "Q 834 2394 903 2306 \n",
+       "Q 972 2216 1119 2156 \n",
+       "Q 1203 2125 1616 2013 \n",
+       "Q 2213 1853 2448 1751 \n",
+       "Q 2684 1650 2818 1456 \n",
+       "Q 2953 1263 2953 975 \n",
+       "Q 2953 694 2789 445 \n",
+       "Q 2625 197 2315 61 \n",
+       "Q 2006 -75 1616 -75 \n",
+       "Q 969 -75 630 194 \n",
+       "Q 291 463 197 991 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-74\" d=\"M 1650 503 \n",
+       "L 1731 6 \n",
+       "Q 1494 -44 1306 -44 \n",
+       "Q 1000 -44 831 53 \n",
+       "Q 663 150 594 308 \n",
+       "Q 525 466 525 972 \n",
+       "L 525 2881 \n",
+       "L 113 2881 \n",
+       "L 113 3319 \n",
+       "L 525 3319 \n",
+       "L 525 4141 \n",
+       "L 1084 4478 \n",
+       "L 1084 3319 \n",
+       "L 1650 3319 \n",
+       "L 1650 2881 \n",
+       "L 1084 2881 \n",
+       "L 1084 941 \n",
+       "Q 1084 700 1114 631 \n",
+       "Q 1144 563 1211 522 \n",
+       "Q 1278 481 1403 481 \n",
+       "Q 1497 481 1650 503 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-70\" d=\"M 422 -1272 \n",
+       "L 422 3319 \n",
+       "L 934 3319 \n",
+       "L 934 2888 \n",
+       "Q 1116 3141 1344 3267 \n",
+       "Q 1572 3394 1897 3394 \n",
+       "Q 2322 3394 2647 3175 \n",
+       "Q 2972 2956 3137 2557 \n",
+       "Q 3303 2159 3303 1684 \n",
+       "Q 3303 1175 3120 767 \n",
+       "Q 2938 359 2589 142 \n",
+       "Q 2241 -75 1856 -75 \n",
+       "Q 1575 -75 1351 44 \n",
+       "Q 1128 163 984 344 \n",
+       "L 984 -1272 \n",
+       "L 422 -1272 \n",
+       "z\n",
+       "M 931 1641 \n",
+       "Q 931 1000 1190 694 \n",
+       "Q 1450 388 1819 388 \n",
+       "Q 2194 388 2461 705 \n",
+       "Q 2728 1022 2728 1688 \n",
+       "Q 2728 2322 2467 2637 \n",
+       "Q 2206 2953 1844 2953 \n",
+       "Q 1484 2953 1207 2617 \n",
+       "Q 931 2281 931 1641 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      </defs>\n",
+       "      <use xlink:href=\"#ArialMT-54\"/>\n",
+       "      <use xlink:href=\"#ArialMT-69\" x=\"57.333984\"/>\n",
+       "      <use xlink:href=\"#ArialMT-6d\" x=\"79.550781\"/>\n",
+       "      <use xlink:href=\"#ArialMT-65\" x=\"162.851562\"/>\n",
+       "      <use xlink:href=\"#ArialMT-20\" x=\"218.466797\"/>\n",
+       "      <use xlink:href=\"#ArialMT-73\" x=\"246.25\"/>\n",
+       "      <use xlink:href=\"#ArialMT-74\" x=\"296.25\"/>\n",
+       "      <use xlink:href=\"#ArialMT-65\" x=\"324.033203\"/>\n",
+       "      <use xlink:href=\"#ArialMT-70\" x=\"379.648438\"/>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_2\">\n",
+       "    <g id=\"ytick_1\">\n",
+       "     <g id=\"line2d_6\">\n",
+       "      <defs>\n",
+       "       <path id=\"m1813069e95\" d=\"M 0 0 \n",
+       "L -6 0 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"165.96\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_7\">\n",
+       "      <!-- −10000 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.4265 170.290477) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-2212\" d=\"M 3381 1997 \n",
+       "L 356 1997 \n",
+       "L 356 2522 \n",
+       "L 3381 2522 \n",
+       "L 3381 1997 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "        <path id=\"ArialMT-31\" d=\"M 2384 0 \n",
+       "L 1822 0 \n",
+       "L 1822 3584 \n",
+       "Q 1619 3391 1289 3197 \n",
+       "Q 959 3003 697 2906 \n",
+       "L 697 3450 \n",
+       "Q 1169 3672 1522 3987 \n",
+       "Q 1875 4303 2022 4600 \n",
+       "L 2384 4600 \n",
+       "L 2384 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-2212\"/>\n",
+       "       <use xlink:href=\"#ArialMT-31\" x=\"58.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"114.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"169.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"225.244141\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"280.859375\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_2\">\n",
+       "     <g id=\"line2d_7\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"135.89293\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_8\">\n",
+       "      <!-- −8000 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(30.155234 140.223407) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-2212\"/>\n",
+       "       <use xlink:href=\"#ArialMT-38\" x=\"58.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"114.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"169.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"225.244141\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_3\">\n",
+       "     <g id=\"line2d_8\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"105.82586\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_9\">\n",
+       "      <!-- −6000 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(30.155234 110.156337) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-2212\"/>\n",
+       "       <use xlink:href=\"#ArialMT-36\" x=\"58.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"114.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"169.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"225.244141\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_4\">\n",
+       "     <g id=\"line2d_9\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"75.758791\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_10\">\n",
+       "      <!-- −4000 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(30.155234 80.089267) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-2212\"/>\n",
+       "       <use xlink:href=\"#ArialMT-34\" x=\"58.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"114.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"169.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"225.244141\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_5\">\n",
+       "     <g id=\"line2d_10\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"45.691721\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_11\">\n",
+       "      <!-- −2000 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(30.155234 50.022197) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-2212\"/>\n",
+       "       <use xlink:href=\"#ArialMT-32\" x=\"58.398438\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"114.013672\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"169.628906\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"225.244141\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_6\">\n",
+       "     <g id=\"line2d_11\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m1813069e95\" x=\"73.637328\" y=\"15.624651\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_12\">\n",
+       "      <!-- 0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(57.408594 19.955128) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"text_13\">\n",
+       "     <!-- Heading -->\n",
+       "     <g style=\"fill: #262626\" transform=\"translate(16.648313 114.943969) rotate(-90) scale(0.132 -0.132)\">\n",
+       "      <defs>\n",
+       "       <path id=\"ArialMT-48\" d=\"M 513 0 \n",
+       "L 513 4581 \n",
+       "L 1119 4581 \n",
+       "L 1119 2700 \n",
+       "L 3500 2700 \n",
+       "L 3500 4581 \n",
+       "L 4106 4581 \n",
+       "L 4106 0 \n",
+       "L 3500 0 \n",
+       "L 3500 2159 \n",
+       "L 1119 2159 \n",
+       "L 1119 0 \n",
+       "L 513 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-61\" d=\"M 2588 409 \n",
+       "Q 2275 144 1986 34 \n",
+       "Q 1697 -75 1366 -75 \n",
+       "Q 819 -75 525 192 \n",
+       "Q 231 459 231 875 \n",
+       "Q 231 1119 342 1320 \n",
+       "Q 453 1522 633 1644 \n",
+       "Q 813 1766 1038 1828 \n",
+       "Q 1203 1872 1538 1913 \n",
+       "Q 2219 1994 2541 2106 \n",
+       "Q 2544 2222 2544 2253 \n",
+       "Q 2544 2597 2384 2738 \n",
+       "Q 2169 2928 1744 2928 \n",
+       "Q 1347 2928 1158 2789 \n",
+       "Q 969 2650 878 2297 \n",
+       "L 328 2372 \n",
+       "Q 403 2725 575 2942 \n",
+       "Q 747 3159 1072 3276 \n",
+       "Q 1397 3394 1825 3394 \n",
+       "Q 2250 3394 2515 3294 \n",
+       "Q 2781 3194 2906 3042 \n",
+       "Q 3031 2891 3081 2659 \n",
+       "Q 3109 2516 3109 2141 \n",
+       "L 3109 1391 \n",
+       "Q 3109 606 3145 398 \n",
+       "Q 3181 191 3288 0 \n",
+       "L 2700 0 \n",
+       "Q 2613 175 2588 409 \n",
+       "z\n",
+       "M 2541 1666 \n",
+       "Q 2234 1541 1622 1453 \n",
+       "Q 1275 1403 1131 1340 \n",
+       "Q 988 1278 909 1158 \n",
+       "Q 831 1038 831 891 \n",
+       "Q 831 666 1001 516 \n",
+       "Q 1172 366 1500 366 \n",
+       "Q 1825 366 2078 508 \n",
+       "Q 2331 650 2450 897 \n",
+       "Q 2541 1088 2541 1459 \n",
+       "L 2541 1666 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-64\" d=\"M 2575 0 \n",
+       "L 2575 419 \n",
+       "Q 2259 -75 1647 -75 \n",
+       "Q 1250 -75 917 144 \n",
+       "Q 584 363 401 755 \n",
+       "Q 219 1147 219 1656 \n",
+       "Q 219 2153 384 2558 \n",
+       "Q 550 2963 881 3178 \n",
+       "Q 1213 3394 1622 3394 \n",
+       "Q 1922 3394 2156 3267 \n",
+       "Q 2391 3141 2538 2938 \n",
+       "L 2538 4581 \n",
+       "L 3097 4581 \n",
+       "L 3097 0 \n",
+       "L 2575 0 \n",
+       "z\n",
+       "M 797 1656 \n",
+       "Q 797 1019 1065 703 \n",
+       "Q 1334 388 1700 388 \n",
+       "Q 2069 388 2326 689 \n",
+       "Q 2584 991 2584 1609 \n",
+       "Q 2584 2291 2321 2609 \n",
+       "Q 2059 2928 1675 2928 \n",
+       "Q 1300 2928 1048 2622 \n",
+       "Q 797 2316 797 1656 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-6e\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 928 3319 \n",
+       "L 928 2847 \n",
+       "Q 1294 3394 1984 3394 \n",
+       "Q 2284 3394 2536 3286 \n",
+       "Q 2788 3178 2913 3003 \n",
+       "Q 3038 2828 3088 2588 \n",
+       "Q 3119 2431 3119 2041 \n",
+       "L 3119 0 \n",
+       "L 2556 0 \n",
+       "L 2556 2019 \n",
+       "Q 2556 2363 2490 2533 \n",
+       "Q 2425 2703 2258 2804 \n",
+       "Q 2091 2906 1866 2906 \n",
+       "Q 1506 2906 1245 2678 \n",
+       "Q 984 2450 984 1813 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-67\" d=\"M 319 -275 \n",
+       "L 866 -356 \n",
+       "Q 900 -609 1056 -725 \n",
+       "Q 1266 -881 1628 -881 \n",
+       "Q 2019 -881 2231 -725 \n",
+       "Q 2444 -569 2519 -288 \n",
+       "Q 2563 -116 2559 434 \n",
+       "Q 2191 0 1641 0 \n",
+       "Q 956 0 581 494 \n",
+       "Q 206 988 206 1678 \n",
+       "Q 206 2153 378 2554 \n",
+       "Q 550 2956 876 3175 \n",
+       "Q 1203 3394 1644 3394 \n",
+       "Q 2231 3394 2613 2919 \n",
+       "L 2613 3319 \n",
+       "L 3131 3319 \n",
+       "L 3131 450 \n",
+       "Q 3131 -325 2973 -648 \n",
+       "Q 2816 -972 2473 -1159 \n",
+       "Q 2131 -1347 1631 -1347 \n",
+       "Q 1038 -1347 672 -1080 \n",
+       "Q 306 -813 319 -275 \n",
+       "z\n",
+       "M 784 1719 \n",
+       "Q 784 1066 1043 766 \n",
+       "Q 1303 466 1694 466 \n",
+       "Q 2081 466 2343 764 \n",
+       "Q 2606 1063 2606 1700 \n",
+       "Q 2606 2309 2336 2618 \n",
+       "Q 2066 2928 1684 2928 \n",
+       "Q 1309 2928 1046 2623 \n",
+       "Q 784 2319 784 1719 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      </defs>\n",
+       "      <use xlink:href=\"#ArialMT-48\"/>\n",
+       "      <use xlink:href=\"#ArialMT-65\" x=\"72.216797\"/>\n",
+       "      <use xlink:href=\"#ArialMT-61\" x=\"127.832031\"/>\n",
+       "      <use xlink:href=\"#ArialMT-64\" x=\"183.447266\"/>\n",
+       "      <use xlink:href=\"#ArialMT-69\" x=\"239.0625\"/>\n",
+       "      <use xlink:href=\"#ArialMT-6e\" x=\"261.279297\"/>\n",
+       "      <use xlink:href=\"#ArialMT-67\" x=\"316.894531\"/>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"line2d_12\">\n",
+       "    <path d=\"M 93.928237 20.291222 \n",
+       "L 98.437328 20.295311 \n",
+       "L 102.946419 20.295055 \n",
+       "L 107.45551 20.295746 \n",
+       "L 111.964601 20.301475 \n",
+       "L 116.473692 20.30664 \n",
+       "L 120.982783 20.316978 \n",
+       "L 125.491874 20.343817 \n",
+       "L 130.000964 20.365106 \n",
+       "L 134.510055 20.371958 \n",
+       "L 139.019146 20.373267 \n",
+       "L 143.528237 20.377304 \n",
+       "L 148.037328 20.355323 \n",
+       "L 152.546419 20.356218 \n",
+       "L 157.05551 20.348428 \n",
+       "L 161.564601 20.354478 \n",
+       "L 166.073692 20.377712 \n",
+       "L 170.582783 14.76 \n",
+       "L 175.091874 14.786416 \n",
+       "L 179.600964 20.359611 \n",
+       "L 184.110055 165.96 \n",
+       "L 188.619146 165.96 \n",
+       "L 193.128237 165.96 \n",
+       "L 197.637328 165.96 \n",
+       "L 202.146419 165.96 \n",
+       "L 206.65551 165.96 \n",
+       "L 211.164601 165.96 \n",
+       "L 215.673692 165.96 \n",
+       "L 220.182783 165.96 \n",
+       "L 224.691874 165.96 \n",
+       "L 229.200964 165.96 \n",
+       "L 233.710055 165.96 \n",
+       "L 238.219146 165.96 \n",
+       "L 242.728237 20.299395 \n",
+       "L 247.237328 165.96 \n",
+       "L 251.746419 20.318917 \n",
+       "L 256.25551 20.314916 \n",
+       "L 260.764601 20.285419 \n",
+       "L 265.273692 20.294863 \n",
+       "L 269.782783 165.96 \n",
+       "L 274.291874 165.96 \n",
+       "L 278.800964 165.96 \n",
+       "L 283.310055 165.96 \n",
+       "L 287.819146 165.96 \n",
+       "L 292.328237 165.96 \n",
+       "L 296.837328 20.298438 \n",
+       "L 301.346419 20.314007 \n",
+       "L 305.85551 20.314005 \n",
+       "L 310.364601 20.300105 \n",
+       "L 314.873692 165.96 \n",
+       "L 319.382783 20.318716 \n",
+       "L 323.891874 165.96 \n",
+       "L 328.400964 20.335939 \n",
+       "L 332.910055 165.96 \n",
+       "L 337.419146 20.357045 \n",
+       "L 341.928237 165.96 \n",
+       "L 346.437328 20.369409 \n",
+       "L 350.946419 20.388468 \n",
+       "L 355.45551 20.411764 \n",
+       "L 359.964601 165.96 \n",
+       "L 364.473692 165.96 \n",
+       "L 368.982783 165.96 \n",
+       "L 373.491874 165.96 \n",
+       "L 378.000964 165.96 \n",
+       "L 382.510055 165.96 \n",
+       "L 387.019146 165.96 \n",
+       "L 391.528237 165.96 \n",
+       "L 396.037328 165.96 \n",
+       "L 400.546419 165.96 \n",
+       "L 405.05551 165.96 \n",
+       "L 409.564601 165.96 \n",
+       "L 414.073692 165.96 \n",
+       "L 418.582783 165.96 \n",
+       "L 423.091874 165.96 \n",
+       "L 427.600964 165.96 \n",
+       "L 432.110055 165.96 \n",
+       "L 436.619146 165.96 \n",
+       "L 441.128237 165.96 \n",
+       "L 445.637328 165.96 \n",
+       "L 450.146419 165.96 \n",
+       "L 454.65551 165.96 \n",
+       "L 459.164601 165.96 \n",
+       "L 463.673692 165.96 \n",
+       "L 468.182783 165.96 \n",
+       "L 472.691874 165.96 \n",
+       "L 477.200964 165.96 \n",
+       "L 481.710055 165.96 \n",
+       "L 486.219146 165.96 \n",
+       "L 490.728237 165.96 \n",
+       "L 495.237328 165.96 \n",
+       "L 499.746419 165.96 \n",
+       "\" clip-path=\"url(#p2733a9d8e5)\" style=\"fill: none; stroke: #4c72b0; stroke-width: 1.5; stroke-linecap: round\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_3\">\n",
+       "    <path d=\"M 73.637328 173.52 \n",
+       "L 73.637328 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_4\">\n",
+       "    <path d=\"M 520.037328 173.52 \n",
+       "L 520.037328 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_5\">\n",
+       "    <path d=\"M 73.637328 173.52 \n",
+       "L 520.037328 173.52 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_6\">\n",
+       "    <path d=\"M 73.637328 7.2 \n",
+       "L 520.037328 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "  </g>\n",
+       " </g>\n",
+       " <defs>\n",
+       "  <clipPath id=\"p2733a9d8e5\">\n",
+       "   <rect x=\"73.637328\" y=\"7.2\" width=\"446.4\" height=\"166.32\"/>\n",
+       "  </clipPath>\n",
+       " </defs>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<Figure size 800x300 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Heading is the direction in which the vehicle is pointing \n",
+    "plt.plot(traffic_scene['objects'][idx]['heading']);\n",
+    "plt.xlabel('Time step')\n",
+    "plt.ylabel('Heading')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "    {\n",
+      "        \"x\": 0.634765625,\n",
+      "        \"y\": 0.72265625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.46875,\n",
+      "        \"y\": 0.67138671875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.615234375,\n",
+      "        \"y\": 0.89111328125\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.693359375,\n",
+      "        \"y\": 0.712890625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.56640625,\n",
+      "        \"y\": 0.76171875\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.712890625,\n",
+      "        \"y\": 0.7568359375\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.810546875,\n",
+      "        \"y\": 0.9130859375\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.947265625,\n",
+      "        \"y\": 0.72265625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.83984375,\n",
+      "        \"y\": 0.86181640625\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 0.771484375,\n",
+      "        \"y\": 0.70068359375\n",
+      "    }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Velocity shows the velocity in the x- and y- directions\n",
+    "print(json.dumps(traffic_scene['objects'][idx]['velocity'][:10], indent=4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       "  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"503.190812pt\" height=\"217.357641pt\" viewBox=\"0 0 503.190812 217.357641\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n",
+       " <metadata>\n",
+       "  <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n",
+       "   <cc:Work>\n",
+       "    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n",
+       "    <dc:date>2023-10-03T10:23:29.389521</dc:date>\n",
+       "    <dc:format>image/svg+xml</dc:format>\n",
+       "    <dc:creator>\n",
+       "     <cc:Agent>\n",
+       "      <dc:title>Matplotlib v3.8.0, https://matplotlib.org/</dc:title>\n",
+       "     </cc:Agent>\n",
+       "    </dc:creator>\n",
+       "   </cc:Work>\n",
+       "  </rdf:RDF>\n",
+       " </metadata>\n",
+       " <defs>\n",
+       "  <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n",
+       " </defs>\n",
+       " <g id=\"figure_1\">\n",
+       "  <g id=\"patch_1\">\n",
+       "   <path d=\"M 0 217.357641 \n",
+       "L 503.190812 217.357641 \n",
+       "L 503.190812 0 \n",
+       "L 0 0 \n",
+       "L 0 217.357641 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "  </g>\n",
+       "  <g id=\"axes_1\">\n",
+       "   <g id=\"patch_2\">\n",
+       "    <path d=\"M 49.590813 173.52 \n",
+       "L 495.990813 173.52 \n",
+       "L 495.990813 7.2 \n",
+       "L 49.590813 7.2 \n",
+       "L 49.590813 173.52 \n",
+       "z\n",
+       "\" style=\"fill: none\"/>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_1\">\n",
+       "    <g id=\"xtick_1\">\n",
+       "     <g id=\"line2d_1\">\n",
+       "      <defs>\n",
+       "       <path id=\"m57012d854e\" d=\"M 0 0 \n",
+       "L 0 6 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m57012d854e\" x=\"69.881722\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_1\">\n",
+       "      <!-- 0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(66.517354 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-30\" d=\"M 266 2259 \n",
+       "Q 266 3072 433 3567 \n",
+       "Q 600 4063 929 4331 \n",
+       "Q 1259 4600 1759 4600 \n",
+       "Q 2128 4600 2406 4451 \n",
+       "Q 2684 4303 2865 4023 \n",
+       "Q 3047 3744 3150 3342 \n",
+       "Q 3253 2941 3253 2259 \n",
+       "Q 3253 1453 3087 958 \n",
+       "Q 2922 463 2592 192 \n",
+       "Q 2263 -78 1759 -78 \n",
+       "Q 1097 -78 719 397 \n",
+       "Q 266 969 266 2259 \n",
+       "z\n",
+       "M 844 2259 \n",
+       "Q 844 1131 1108 757 \n",
+       "Q 1372 384 1759 384 \n",
+       "Q 2147 384 2411 759 \n",
+       "Q 2675 1134 2675 2259 \n",
+       "Q 2675 3391 2411 3762 \n",
+       "Q 2147 4134 1753 4134 \n",
+       "Q 1366 4134 1134 3806 \n",
+       "Q 844 3388 844 2259 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_2\">\n",
+       "     <g id=\"line2d_2\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m57012d854e\" x=\"160.06354\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_2\">\n",
+       "      <!-- 20 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(153.334805 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-32\" d=\"M 3222 541 \n",
+       "L 3222 0 \n",
+       "L 194 0 \n",
+       "Q 188 203 259 391 \n",
+       "Q 375 700 629 1000 \n",
+       "Q 884 1300 1366 1694 \n",
+       "Q 2113 2306 2375 2664 \n",
+       "Q 2638 3022 2638 3341 \n",
+       "Q 2638 3675 2398 3904 \n",
+       "Q 2159 4134 1775 4134 \n",
+       "Q 1369 4134 1125 3890 \n",
+       "Q 881 3647 878 3216 \n",
+       "L 300 3275 \n",
+       "Q 359 3922 746 4261 \n",
+       "Q 1134 4600 1788 4600 \n",
+       "Q 2447 4600 2831 4234 \n",
+       "Q 3216 3869 3216 3328 \n",
+       "Q 3216 3053 3103 2787 \n",
+       "Q 2991 2522 2730 2228 \n",
+       "Q 2469 1934 1863 1422 \n",
+       "Q 1356 997 1212 845 \n",
+       "Q 1069 694 975 541 \n",
+       "L 3222 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-32\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_3\">\n",
+       "     <g id=\"line2d_3\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m57012d854e\" x=\"250.245358\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_3\">\n",
+       "      <!-- 40 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(243.516624 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-34\" d=\"M 2069 0 \n",
+       "L 2069 1097 \n",
+       "L 81 1097 \n",
+       "L 81 1613 \n",
+       "L 2172 4581 \n",
+       "L 2631 4581 \n",
+       "L 2631 1613 \n",
+       "L 3250 1613 \n",
+       "L 3250 1097 \n",
+       "L 2631 1097 \n",
+       "L 2631 0 \n",
+       "L 2069 0 \n",
+       "z\n",
+       "M 2069 1613 \n",
+       "L 2069 3678 \n",
+       "L 634 1613 \n",
+       "L 2069 1613 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-34\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_4\">\n",
+       "     <g id=\"line2d_4\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m57012d854e\" x=\"340.427176\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_4\">\n",
+       "      <!-- 60 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(333.698442 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-36\" d=\"M 3184 3459 \n",
+       "L 2625 3416 \n",
+       "Q 2550 3747 2413 3897 \n",
+       "Q 2184 4138 1850 4138 \n",
+       "Q 1581 4138 1378 3988 \n",
+       "Q 1113 3794 959 3422 \n",
+       "Q 806 3050 800 2363 \n",
+       "Q 1003 2672 1297 2822 \n",
+       "Q 1591 2972 1913 2972 \n",
+       "Q 2475 2972 2870 2558 \n",
+       "Q 3266 2144 3266 1488 \n",
+       "Q 3266 1056 3080 686 \n",
+       "Q 2894 316 2569 119 \n",
+       "Q 2244 -78 1831 -78 \n",
+       "Q 1128 -78 684 439 \n",
+       "Q 241 956 241 2144 \n",
+       "Q 241 3472 731 4075 \n",
+       "Q 1159 4600 1884 4600 \n",
+       "Q 2425 4600 2770 4297 \n",
+       "Q 3116 3994 3184 3459 \n",
+       "z\n",
+       "M 888 1484 \n",
+       "Q 888 1194 1011 928 \n",
+       "Q 1134 663 1356 523 \n",
+       "Q 1578 384 1822 384 \n",
+       "Q 2178 384 2434 671 \n",
+       "Q 2691 959 2691 1453 \n",
+       "Q 2691 1928 2437 2201 \n",
+       "Q 2184 2475 1800 2475 \n",
+       "Q 1419 2475 1153 2201 \n",
+       "Q 888 1928 888 1484 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-36\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"xtick_5\">\n",
+       "     <g id=\"line2d_5\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#m57012d854e\" x=\"430.608994\" y=\"173.52\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_5\">\n",
+       "      <!-- 80 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(423.88026 191.680953) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-38\" d=\"M 1131 2484 \n",
+       "Q 781 2613 612 2850 \n",
+       "Q 444 3088 444 3419 \n",
+       "Q 444 3919 803 4259 \n",
+       "Q 1163 4600 1759 4600 \n",
+       "Q 2359 4600 2725 4251 \n",
+       "Q 3091 3903 3091 3403 \n",
+       "Q 3091 3084 2923 2848 \n",
+       "Q 2756 2613 2416 2484 \n",
+       "Q 2838 2347 3058 2040 \n",
+       "Q 3278 1734 3278 1309 \n",
+       "Q 3278 722 2862 322 \n",
+       "Q 2447 -78 1769 -78 \n",
+       "Q 1091 -78 675 323 \n",
+       "Q 259 725 259 1325 \n",
+       "Q 259 1772 486 2073 \n",
+       "Q 713 2375 1131 2484 \n",
+       "z\n",
+       "M 1019 3438 \n",
+       "Q 1019 3113 1228 2906 \n",
+       "Q 1438 2700 1772 2700 \n",
+       "Q 2097 2700 2305 2904 \n",
+       "Q 2513 3109 2513 3406 \n",
+       "Q 2513 3716 2298 3927 \n",
+       "Q 2084 4138 1766 4138 \n",
+       "Q 1444 4138 1231 3931 \n",
+       "Q 1019 3725 1019 3438 \n",
+       "z\n",
+       "M 838 1322 \n",
+       "Q 838 1081 952 856 \n",
+       "Q 1066 631 1291 507 \n",
+       "Q 1516 384 1775 384 \n",
+       "Q 2178 384 2440 643 \n",
+       "Q 2703 903 2703 1303 \n",
+       "Q 2703 1709 2433 1975 \n",
+       "Q 2163 2241 1756 2241 \n",
+       "Q 1359 2241 1098 1978 \n",
+       "Q 838 1716 838 1322 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-38\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"55.615234\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"text_6\">\n",
+       "     <!-- Time step -->\n",
+       "     <g style=\"fill: #262626\" transform=\"translate(244.065344 207.534141) scale(0.132 -0.132)\">\n",
+       "      <defs>\n",
+       "       <path id=\"ArialMT-54\" d=\"M 1659 0 \n",
+       "L 1659 4041 \n",
+       "L 150 4041 \n",
+       "L 150 4581 \n",
+       "L 3781 4581 \n",
+       "L 3781 4041 \n",
+       "L 2266 4041 \n",
+       "L 2266 0 \n",
+       "L 1659 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-69\" d=\"M 425 3934 \n",
+       "L 425 4581 \n",
+       "L 988 4581 \n",
+       "L 988 3934 \n",
+       "L 425 3934 \n",
+       "z\n",
+       "M 425 0 \n",
+       "L 425 3319 \n",
+       "L 988 3319 \n",
+       "L 988 0 \n",
+       "L 425 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-6d\" d=\"M 422 0 \n",
+       "L 422 3319 \n",
+       "L 925 3319 \n",
+       "L 925 2853 \n",
+       "Q 1081 3097 1340 3245 \n",
+       "Q 1600 3394 1931 3394 \n",
+       "Q 2300 3394 2536 3241 \n",
+       "Q 2772 3088 2869 2813 \n",
+       "Q 3263 3394 3894 3394 \n",
+       "Q 4388 3394 4653 3120 \n",
+       "Q 4919 2847 4919 2278 \n",
+       "L 4919 0 \n",
+       "L 4359 0 \n",
+       "L 4359 2091 \n",
+       "Q 4359 2428 4304 2576 \n",
+       "Q 4250 2725 4106 2815 \n",
+       "Q 3963 2906 3769 2906 \n",
+       "Q 3419 2906 3187 2673 \n",
+       "Q 2956 2441 2956 1928 \n",
+       "L 2956 0 \n",
+       "L 2394 0 \n",
+       "L 2394 2156 \n",
+       "Q 2394 2531 2256 2718 \n",
+       "Q 2119 2906 1806 2906 \n",
+       "Q 1569 2906 1367 2781 \n",
+       "Q 1166 2656 1075 2415 \n",
+       "Q 984 2175 984 1722 \n",
+       "L 984 0 \n",
+       "L 422 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-65\" d=\"M 2694 1069 \n",
+       "L 3275 997 \n",
+       "Q 3138 488 2766 206 \n",
+       "Q 2394 -75 1816 -75 \n",
+       "Q 1088 -75 661 373 \n",
+       "Q 234 822 234 1631 \n",
+       "Q 234 2469 665 2931 \n",
+       "Q 1097 3394 1784 3394 \n",
+       "Q 2450 3394 2872 2941 \n",
+       "Q 3294 2488 3294 1666 \n",
+       "Q 3294 1616 3291 1516 \n",
+       "L 816 1516 \n",
+       "Q 847 969 1125 678 \n",
+       "Q 1403 388 1819 388 \n",
+       "Q 2128 388 2347 550 \n",
+       "Q 2566 713 2694 1069 \n",
+       "z\n",
+       "M 847 1978 \n",
+       "L 2700 1978 \n",
+       "Q 2663 2397 2488 2606 \n",
+       "Q 2219 2931 1791 2931 \n",
+       "Q 1403 2931 1139 2672 \n",
+       "Q 875 2413 847 1978 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-20\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-73\" d=\"M 197 991 \n",
+       "L 753 1078 \n",
+       "Q 800 744 1014 566 \n",
+       "Q 1228 388 1613 388 \n",
+       "Q 2000 388 2187 545 \n",
+       "Q 2375 703 2375 916 \n",
+       "Q 2375 1106 2209 1216 \n",
+       "Q 2094 1291 1634 1406 \n",
+       "Q 1016 1563 777 1677 \n",
+       "Q 538 1791 414 1992 \n",
+       "Q 291 2194 291 2438 \n",
+       "Q 291 2659 392 2848 \n",
+       "Q 494 3038 669 3163 \n",
+       "Q 800 3259 1026 3326 \n",
+       "Q 1253 3394 1513 3394 \n",
+       "Q 1903 3394 2198 3281 \n",
+       "Q 2494 3169 2634 2976 \n",
+       "Q 2775 2784 2828 2463 \n",
+       "L 2278 2388 \n",
+       "Q 2241 2644 2061 2787 \n",
+       "Q 1881 2931 1553 2931 \n",
+       "Q 1166 2931 1000 2803 \n",
+       "Q 834 2675 834 2503 \n",
+       "Q 834 2394 903 2306 \n",
+       "Q 972 2216 1119 2156 \n",
+       "Q 1203 2125 1616 2013 \n",
+       "Q 2213 1853 2448 1751 \n",
+       "Q 2684 1650 2818 1456 \n",
+       "Q 2953 1263 2953 975 \n",
+       "Q 2953 694 2789 445 \n",
+       "Q 2625 197 2315 61 \n",
+       "Q 2006 -75 1616 -75 \n",
+       "Q 969 -75 630 194 \n",
+       "Q 291 463 197 991 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-74\" d=\"M 1650 503 \n",
+       "L 1731 6 \n",
+       "Q 1494 -44 1306 -44 \n",
+       "Q 1000 -44 831 53 \n",
+       "Q 663 150 594 308 \n",
+       "Q 525 466 525 972 \n",
+       "L 525 2881 \n",
+       "L 113 2881 \n",
+       "L 113 3319 \n",
+       "L 525 3319 \n",
+       "L 525 4141 \n",
+       "L 1084 4478 \n",
+       "L 1084 3319 \n",
+       "L 1650 3319 \n",
+       "L 1650 2881 \n",
+       "L 1084 2881 \n",
+       "L 1084 941 \n",
+       "Q 1084 700 1114 631 \n",
+       "Q 1144 563 1211 522 \n",
+       "Q 1278 481 1403 481 \n",
+       "Q 1497 481 1650 503 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-70\" d=\"M 422 -1272 \n",
+       "L 422 3319 \n",
+       "L 934 3319 \n",
+       "L 934 2888 \n",
+       "Q 1116 3141 1344 3267 \n",
+       "Q 1572 3394 1897 3394 \n",
+       "Q 2322 3394 2647 3175 \n",
+       "Q 2972 2956 3137 2557 \n",
+       "Q 3303 2159 3303 1684 \n",
+       "Q 3303 1175 3120 767 \n",
+       "Q 2938 359 2589 142 \n",
+       "Q 2241 -75 1856 -75 \n",
+       "Q 1575 -75 1351 44 \n",
+       "Q 1128 163 984 344 \n",
+       "L 984 -1272 \n",
+       "L 422 -1272 \n",
+       "z\n",
+       "M 931 1641 \n",
+       "Q 931 1000 1190 694 \n",
+       "Q 1450 388 1819 388 \n",
+       "Q 2194 388 2461 705 \n",
+       "Q 2728 1022 2728 1688 \n",
+       "Q 2728 2322 2467 2637 \n",
+       "Q 2206 2953 1844 2953 \n",
+       "Q 1484 2953 1207 2617 \n",
+       "Q 931 2281 931 1641 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      </defs>\n",
+       "      <use xlink:href=\"#ArialMT-54\"/>\n",
+       "      <use xlink:href=\"#ArialMT-69\" x=\"57.333984\"/>\n",
+       "      <use xlink:href=\"#ArialMT-6d\" x=\"79.550781\"/>\n",
+       "      <use xlink:href=\"#ArialMT-65\" x=\"162.851562\"/>\n",
+       "      <use xlink:href=\"#ArialMT-20\" x=\"218.466797\"/>\n",
+       "      <use xlink:href=\"#ArialMT-73\" x=\"246.25\"/>\n",
+       "      <use xlink:href=\"#ArialMT-74\" x=\"296.25\"/>\n",
+       "      <use xlink:href=\"#ArialMT-65\" x=\"324.033203\"/>\n",
+       "      <use xlink:href=\"#ArialMT-70\" x=\"379.648438\"/>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"matplotlib.axis_2\">\n",
+       "    <g id=\"ytick_1\">\n",
+       "     <g id=\"line2d_6\">\n",
+       "      <defs>\n",
+       "       <path id=\"ma6cf721824\" d=\"M 0 0 \n",
+       "L -6 0 \n",
+       "\" style=\"stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </defs>\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"165.96\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_7\">\n",
+       "      <!-- 0.0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 170.290477) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-2e\" d=\"M 581 0 \n",
+       "L 581 641 \n",
+       "L 1222 641 \n",
+       "L 1222 0 \n",
+       "L 581 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_2\">\n",
+       "     <g id=\"line2d_7\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"135.72\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_8\">\n",
+       "      <!-- 0.2 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 140.050477) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-32\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_3\">\n",
+       "     <g id=\"line2d_8\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"105.48\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_9\">\n",
+       "      <!-- 0.4 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 109.810477) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-34\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_4\">\n",
+       "     <g id=\"line2d_9\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"75.24\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_10\">\n",
+       "      <!-- 0.6 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 79.570477) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-36\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_5\">\n",
+       "     <g id=\"line2d_10\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"45\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_11\">\n",
+       "      <!-- 0.8 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 49.330477) scale(0.121 -0.121)\">\n",
+       "       <use xlink:href=\"#ArialMT-30\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-38\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"ytick_6\">\n",
+       "     <g id=\"line2d_11\">\n",
+       "      <g>\n",
+       "       <use xlink:href=\"#ma6cf721824\" x=\"49.590813\" y=\"14.76\" style=\"fill: #262626; stroke: #262626; stroke-width: 1.25\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "     <g id=\"text_12\">\n",
+       "      <!-- 1.0 -->\n",
+       "      <g style=\"fill: #262626\" transform=\"translate(23.271813 19.090477) scale(0.121 -0.121)\">\n",
+       "       <defs>\n",
+       "        <path id=\"ArialMT-31\" d=\"M 2384 0 \n",
+       "L 1822 0 \n",
+       "L 1822 3584 \n",
+       "Q 1619 3391 1289 3197 \n",
+       "Q 959 3003 697 2906 \n",
+       "L 697 3450 \n",
+       "Q 1169 3672 1522 3987 \n",
+       "Q 1875 4303 2022 4600 \n",
+       "L 2384 4600 \n",
+       "L 2384 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       </defs>\n",
+       "       <use xlink:href=\"#ArialMT-31\"/>\n",
+       "       <use xlink:href=\"#ArialMT-2e\" x=\"55.615234\"/>\n",
+       "       <use xlink:href=\"#ArialMT-30\" x=\"83.398438\"/>\n",
+       "      </g>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "    <g id=\"text_13\">\n",
+       "     <!-- IS VALID -->\n",
+       "     <g style=\"fill: #262626\" transform=\"translate(16.648313 117.017813) rotate(-90) scale(0.132 -0.132)\">\n",
+       "      <defs>\n",
+       "       <path id=\"ArialMT-49\" d=\"M 597 0 \n",
+       "L 597 4581 \n",
+       "L 1203 4581 \n",
+       "L 1203 0 \n",
+       "L 597 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-53\" d=\"M 288 1472 \n",
+       "L 859 1522 \n",
+       "Q 900 1178 1048 958 \n",
+       "Q 1197 738 1509 602 \n",
+       "Q 1822 466 2213 466 \n",
+       "Q 2559 466 2825 569 \n",
+       "Q 3091 672 3220 851 \n",
+       "Q 3350 1031 3350 1244 \n",
+       "Q 3350 1459 3225 1620 \n",
+       "Q 3100 1781 2813 1891 \n",
+       "Q 2628 1963 1997 2114 \n",
+       "Q 1366 2266 1113 2400 \n",
+       "Q 784 2572 623 2826 \n",
+       "Q 463 3081 463 3397 \n",
+       "Q 463 3744 659 4045 \n",
+       "Q 856 4347 1234 4503 \n",
+       "Q 1613 4659 2075 4659 \n",
+       "Q 2584 4659 2973 4495 \n",
+       "Q 3363 4331 3572 4012 \n",
+       "Q 3781 3694 3797 3291 \n",
+       "L 3216 3247 \n",
+       "Q 3169 3681 2898 3903 \n",
+       "Q 2628 4125 2100 4125 \n",
+       "Q 1550 4125 1298 3923 \n",
+       "Q 1047 3722 1047 3438 \n",
+       "Q 1047 3191 1225 3031 \n",
+       "Q 1400 2872 2139 2705 \n",
+       "Q 2878 2538 3153 2413 \n",
+       "Q 3553 2228 3743 1945 \n",
+       "Q 3934 1663 3934 1294 \n",
+       "Q 3934 928 3725 604 \n",
+       "Q 3516 281 3123 101 \n",
+       "Q 2731 -78 2241 -78 \n",
+       "Q 1619 -78 1198 103 \n",
+       "Q 778 284 539 648 \n",
+       "Q 300 1013 288 1472 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-56\" d=\"M 1803 0 \n",
+       "L 28 4581 \n",
+       "L 684 4581 \n",
+       "L 1875 1253 \n",
+       "Q 2019 853 2116 503 \n",
+       "Q 2222 878 2363 1253 \n",
+       "L 3600 4581 \n",
+       "L 4219 4581 \n",
+       "L 2425 0 \n",
+       "L 1803 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-41\" d=\"M -9 0 \n",
+       "L 1750 4581 \n",
+       "L 2403 4581 \n",
+       "L 4278 0 \n",
+       "L 3588 0 \n",
+       "L 3053 1388 \n",
+       "L 1138 1388 \n",
+       "L 634 0 \n",
+       "L -9 0 \n",
+       "z\n",
+       "M 1313 1881 \n",
+       "L 2866 1881 \n",
+       "L 2388 3150 \n",
+       "Q 2169 3728 2063 4100 \n",
+       "Q 1975 3659 1816 3225 \n",
+       "L 1313 1881 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-4c\" d=\"M 469 0 \n",
+       "L 469 4581 \n",
+       "L 1075 4581 \n",
+       "L 1075 541 \n",
+       "L 3331 541 \n",
+       "L 3331 0 \n",
+       "L 469 0 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "       <path id=\"ArialMT-44\" d=\"M 494 0 \n",
+       "L 494 4581 \n",
+       "L 2072 4581 \n",
+       "Q 2606 4581 2888 4516 \n",
+       "Q 3281 4425 3559 4188 \n",
+       "Q 3922 3881 4101 3404 \n",
+       "Q 4281 2928 4281 2316 \n",
+       "Q 4281 1794 4159 1391 \n",
+       "Q 4038 988 3847 723 \n",
+       "Q 3656 459 3429 307 \n",
+       "Q 3203 156 2883 78 \n",
+       "Q 2563 0 2147 0 \n",
+       "L 494 0 \n",
+       "z\n",
+       "M 1100 541 \n",
+       "L 2078 541 \n",
+       "Q 2531 541 2789 625 \n",
+       "Q 3047 709 3200 863 \n",
+       "Q 3416 1078 3536 1442 \n",
+       "Q 3656 1806 3656 2325 \n",
+       "Q 3656 3044 3420 3430 \n",
+       "Q 3184 3816 2847 3947 \n",
+       "Q 2603 4041 2063 4041 \n",
+       "L 1100 4041 \n",
+       "L 1100 541 \n",
+       "z\n",
+       "\" transform=\"scale(0.015625)\"/>\n",
+       "      </defs>\n",
+       "      <use xlink:href=\"#ArialMT-49\"/>\n",
+       "      <use xlink:href=\"#ArialMT-53\" x=\"27.783203\"/>\n",
+       "      <use xlink:href=\"#ArialMT-20\" x=\"94.482422\"/>\n",
+       "      <use xlink:href=\"#ArialMT-56\" x=\"122.265625\"/>\n",
+       "      <use xlink:href=\"#ArialMT-41\" x=\"181.589844\"/>\n",
+       "      <use xlink:href=\"#ArialMT-4c\" x=\"248.289062\"/>\n",
+       "      <use xlink:href=\"#ArialMT-49\" x=\"303.904297\"/>\n",
+       "      <use xlink:href=\"#ArialMT-44\" x=\"331.6875\"/>\n",
+       "     </g>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"line2d_12\">\n",
+       "    <defs>\n",
+       "     <path id=\"m26ba1eb1b2\" d=\"M 3 0 \n",
+       "L -3 -0 \n",
+       "\" style=\"stroke: #4c72b0\"/>\n",
+       "    </defs>\n",
+       "    <g clip-path=\"url(#p576785bda3)\">\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"69.881722\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"74.390813\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"78.899903\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"83.408994\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"87.918085\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"92.427176\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"96.936267\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"101.445358\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"105.954449\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"110.46354\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"114.972631\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"119.481722\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"123.990813\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"128.499903\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"133.008994\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"137.518085\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"142.027176\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"146.536267\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"151.045358\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"155.554449\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"160.06354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"164.572631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"169.081722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"173.590812\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"178.099903\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"182.608994\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"187.118085\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"191.627176\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"196.136267\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"200.645358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"205.154449\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"209.66354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"214.172631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"218.681722\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"223.190812\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"227.699903\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"232.208994\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"236.718085\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"241.227176\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"245.736267\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"250.245358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"254.754449\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"259.26354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"263.772631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"268.281722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"272.790813\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"277.299903\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"281.808994\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"286.318085\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"290.827176\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"295.336267\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"299.845358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"304.354449\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"308.86354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"313.372631\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"317.881722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"322.390812\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"326.899903\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"331.408994\" y=\"14.76\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"335.918085\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"340.427176\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"344.936267\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"349.445358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"353.954449\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"358.46354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"362.972631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"367.481722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"371.990812\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"376.499903\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"381.008994\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"385.518085\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"390.027176\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"394.536267\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"399.045358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"403.554449\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"408.06354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"412.572631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"417.081722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"421.590812\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"426.099903\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"430.608994\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"435.118085\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"439.627176\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"444.136267\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"448.645358\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"453.154449\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"457.66354\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"462.172631\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"466.681722\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"471.190812\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "     <use xlink:href=\"#m26ba1eb1b2\" x=\"475.699903\" y=\"165.96\" style=\"fill: #4c72b0; stroke: #4c72b0\"/>\n",
+       "    </g>\n",
+       "   </g>\n",
+       "   <g id=\"patch_3\">\n",
+       "    <path d=\"M 49.590813 173.52 \n",
+       "L 49.590813 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_4\">\n",
+       "    <path d=\"M 495.990813 173.52 \n",
+       "L 495.990813 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_5\">\n",
+       "    <path d=\"M 49.590813 173.52 \n",
+       "L 495.990812 173.52 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "   <g id=\"patch_6\">\n",
+       "    <path d=\"M 49.590813 7.2 \n",
+       "L 495.990812 7.2 \n",
+       "\" style=\"fill: none; stroke: #262626; stroke-width: 1.25; stroke-linejoin: miter; stroke-linecap: square\"/>\n",
+       "   </g>\n",
+       "  </g>\n",
+       " </g>\n",
+       " <defs>\n",
+       "  <clipPath id=\"p576785bda3\">\n",
+       "   <rect x=\"49.590813\" y=\"7.2\" width=\"446.4\" height=\"166.32\"/>\n",
+       "  </clipPath>\n",
+       " </defs>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<Figure size 800x300 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Valid indicates if the state of the vehicle was observed for each timepoint\n",
+    "plt.xlabel('Time step')\n",
+    "plt.ylabel('IS VALID');\n",
+    "plt.plot(traffic_scene['objects'][idx]['valid'], '_', lw=5)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'x': 9041.1259765625, 'y': -2716.647216796875}"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Each object has a goalPosition, an (x, y) position within the scene\n",
+    "traffic_scene['objects'][idx]['goalPosition']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'pedestrian'"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Finally, we have the type of the vehicle\n",
+    "traffic_scene['objects'][idx]['type']"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### In-Depth: Road Points\n",
+    "\n",
+    "Road points are static objects in the scene."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['geometry', 'type'])"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "traffic_scene['roads'][idx].keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'road_edge'"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This point represents the edge of a road\n",
+    "traffic_scene['roads'][idx]['type']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "    {\n",
+      "        \"x\": 8922.911733810946,\n",
+      "        \"y\": -2849.426741530589\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8923.216436260553,\n",
+      "        \"y\": -2849.038518766975\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8923.50673911804,\n",
+      "        \"y\": -2848.63941352788\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8923.782254084921,\n",
+      "        \"y\": -2848.2299596442986\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8924.042612639492,\n",
+      "        \"y\": -2847.8107047886665\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8924.287466537296,\n",
+      "        \"y\": -2847.382209743547\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8924.516488266596,\n",
+      "        \"y\": -2846.945047650609\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8924.729371495881,\n",
+      "        \"y\": -2846.49980324385\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8924.91688626026,\n",
+      "        \"y\": -2846.067714357487\n",
+      "    },\n",
+      "    {\n",
+      "        \"x\": 8925.087545312272,\n",
+      "        \"y\": -2845.6286986979553\n",
+      "    }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Geometry contains the (x, y) position(s) for a road point\n",
+    "# Note that this will be a list for road lanes and edges but a single (x, y) tuple for stop signs and alike\n",
+    "print(json.dumps(traffic_scene['roads'][idx]['geometry'][:10], indent=4));"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nocturne-research",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/02_nocturne_concepts.ipynb b/examples/02_nocturne_concepts.ipynb
new file mode 100644
index 00000000..0863e19b
--- /dev/null
+++ b/examples/02_nocturne_concepts.ipynb
@@ -0,0 +1,785 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Nocturne concepts\n",
+    "\n",
+    "This page introduces the most basic elements of nocturne. You can find further information about these [in Section 3 of the Nocturne paper](https://arxiv.org/abs/2206.09889).\n",
+    "\n",
+    "_Last update: 10/2023_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "import os\n",
+    "os.chdir('..')\n",
+    "\n",
+    "data_path = './data/example_scenario.json'"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Summary\n",
+    "\n",
+    "- Nocturne simulations are **discretized traffic scenarios**. A scenario is a constructed snapshot of traffic situation at a particular timepoint.\n",
+    "- The state of the vehicle of focus is referred to as the **ego state**. Each vehicle has their **own partial view of the traffic scene**; and a visible state is constructed by parameterizing the view distance, head angle and cone radius of the driver. The action for each vehicle is a `(1, 3)` tuple with the acceleration, steering and head angle of the vehicle. \n",
+    "- The **step method advances the simulation** with a desired step size. By default, the dynamics of vehicles are driven by a kinematic bicycle model. If a vehicle is set to expert-controlled mode, its position, heading, and speed will be updated according to a trajectory recorded from a human driver."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Simulation\n",
+    "\n",
+    "In Nocturne, a simulation discretizes an existing traffic scenario. At the moment, Nocturne supports traffic scenarios from the Waymo Open Dataset, but can be further extended to work with other driving datasets. \n",
+    "\n",
+    "<figure>\n",
+    "<center>\n",
+    "<img src='https://drive.google.com/uc?id=1nv5Rbyf7ZfdqTdaUduXvEI7ncdkLpDjc' width=650'/>\n",
+    "<figcaption></figcaption>An example of a set of traffic scenario's in Nocturne. Upon initialization, a start time is chosen. After each iteration we take a step in the simulation, which gets us to the next scenario. This is done until we reach the end of the simulation. </center>\n",
+    "</figure>\n",
+    "\n",
+    "We show an example of this using `example_scenario.json`, where our traffic data is extracted from the Waymo open motion dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nocturne import Simulation\n",
+    "\n",
+    "scenario_config = {\n",
+    "    'start_time': 0, # When to start the simulation\n",
+    "    'allow_non_vehicles': True, # Whether to include cyclists and pedestrians \n",
+    "    'max_visible_road_points': 10, # Maximum number of road points for a vehicle\n",
+    "    'max_visible_objects': 10, # Maximum number of road objects for a vehicle\n",
+    "    'max_visible_traffic_lights': 10, # Maximum number of traffic lights in constructed view\n",
+    "    'max_visible_stop_signs': 10, # Maximum number of stop signs in constructed view\n",
+    "}\n",
+    "\n",
+    "# Create simulation\n",
+    "sim = Simulation(data_path, scenario_config)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Scenario\n",
+    "\n",
+    "A simulation consists of a set of scenarios. A scenario is a snapshot of the traffic scene at a particular timepoint. \n",
+    "\n",
+    "Here is how to create a scenario object:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get traffic scenario at timepoint\n",
+    "scenario = sim.getScenario()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `scenario` objects holds information we are interested in. Here are a couple of examples:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "33"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The number of road objects in the scene\n",
+    "len(scenario.getObjects())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total # moving objects: 15\n",
+      "\n",
+      "Object IDs of moving vehicles: \n",
+      " [0, 1, 2, 3, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32] \n"
+     ]
+    }
+   ],
+   "source": [
+    "# The road objects that moved at a particular timepoint\n",
+    "objects_that_moved = scenario.getObjectsThatMoved()\n",
+    "\n",
+    "print(f'Total # moving objects: {len(objects_that_moved)}\\n')\n",
+    "print(f'Object IDs of moving vehicles: \\n {[obj.getID() for obj in objects_that_moved]} ')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "128"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of road lines\n",
+    "len(scenario.road_lines())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<nocturne_cpp.Vehicle at 0x114731130>,\n",
+       " <nocturne_cpp.Vehicle at 0x1133374f0>,\n",
+       " <nocturne_cpp.Vehicle at 0x114756530>,\n",
+       " <nocturne_cpp.Vehicle at 0x114756630>,\n",
+       " <nocturne_cpp.Vehicle at 0x114755730>]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scenario.getVehicles()[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# No cyclists in this scene\n",
+    "scenario.getCyclists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 2 moving vehicles in scene: [3, 32]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Select all moving vehicles that move \n",
+    "moving_vehicles = [obj for obj in scenario.getVehicles() if obj in objects_that_moved]\n",
+    "\n",
+    "print(f'Found {len(moving_vehicles)} moving vehicles in scene: {[vehicle.getID() for vehicle in moving_vehicles]}')"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Ego state\n",
+    "\n",
+    "The **ego state** is an array with features that describe the current vehicle. This array holds the following information: \n",
+    "- 0: length of ego vehicle\n",
+    "- 1: width of ego vehicle\n",
+    "- 2: speed of ego vehicle\n",
+    "- 3: distance to the goal position of ego vehicle\n",
+    "- 4: angle to the goal (target azimuth) \n",
+    "- 5: desired heading at goal position\n",
+    "- 6: desired speed at goal position\n",
+    "- 7: current acceleration\n",
+    "- 8: current steering position\n",
+    "- 9: current head angle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Selected vehicle # 3\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([ 4.4936213 ,  1.9770377 ,  0.07662283,  4.24219   , -0.05617166,\n",
+       "       -0.05909407,  1.6792779 ,  0.        ,  0.        ,  0.        ],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Select an arbitrary vehicle\n",
+    "ego_vehicle = moving_vehicles[0]\n",
+    "\n",
+    "print(f'Selected vehicle # {ego_vehicle.getID()}')\n",
+    "\n",
+    "# Get the state for ego vehicle\n",
+    "scenario.ego_state(ego_vehicle)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Visible state\n",
+    "\n",
+    "We use the ego vehicle state, together with a view distance (how far the vehicle can see) and a view angle to construct the **visible state**. The figure below shows this procedure for a simplified traffic scene. \n",
+    "\n",
+    "Calling `scenario.visible_state()` returns a dictionary with four matrices:\n",
+    "- `stop_signs`: The visible stop signs \n",
+    "- `traffic_lights`: The states for the traffic lights from the perspective of the ego driver(red, yellow, green).\n",
+    "- `road_points`: The observable road points (static elements in the scene).\n",
+    "- `objects`: The observable road objects (vehicles, pedestrians and cyclists).\n",
+    "\n",
+    "<figure>\n",
+    "<center>\n",
+    "<img src='https://drive.google.com/uc?id=1fG43NvPCzaimmW99asRdB73qY-F4u-q0' width='700'/>\n",
+    "<figcaption>To investigate coordination under partial observability, agents in Nocturne can only see an obstructed view of their environment. In this simplified traffic scene, we construct the state for the red ego driver. Note that Nocturne assumes that stop signs can be viewed, even if they are behind another driver. </figcaption></center>\n",
+    "</figure>\n",
+    "\n",
+    "\\begin{align*}\n",
+    "\\end{align*}\n",
+    "\n",
+    "<figure>\n",
+    "<center>\n",
+    "<img src='https://drive.google.com/uc?id=1egNkFArE-n4cp6KbeoQyWeePiQ28jYYE' width='300'/>\n",
+    "<figcaption>The same scene, this time showing the view of the yellow car.</figcaption></center>\n",
+    "</figure>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The shape of the visible state is a function of the maximum number of visible objects defined at initialization (traffic lights, stop signs, road objects, and road points) and whether we add padding. If `padding = True`, an array is of size `(max visible objects, # features)` is always constructed, even if there are no visible objects. Otherwise, if `padding = False` new entries are only created when objects are visible. \n",
+    "\n",
+    "For example, say a vehicle does not observe any stop signs at a given timepoint. If we set `padding=False`, and run `visible_state['stop_signs']`, we'll get back an empty array with the shape `(0, 3)`, where 3 is the number of features per stop sign. However, if the vehicle observes two stop signs using the same setting, then `visible_state['stop_signs']` will return an array with the shape `(2, 3)`.\n",
+    "\n",
+    "On the other hand, if we set `padding=True`, the resulting array will always have a shape of `(max visible stop signs, 3)`, irrespective of how many stop signs the vehicle actually observes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['stop_signs', 'traffic_lights', 'road_points', 'objects'])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Define viewing distance, radius and head angle\n",
+    "view_distance = 80 \n",
+    "view_angle = np.radians(120) \n",
+    "head_angle = 0\n",
+    "padding = True \n",
+    "\n",
+    "# Construct the visible state for ego vehicle\n",
+    "visible_state = scenario.visible_state(\n",
+    "    ego_vehicle, \n",
+    "    view_dist=view_distance, \n",
+    "    view_angle=view_angle,\n",
+    "    head_angle=head_angle,\n",
+    "    padding=padding,\n",
+    ")\n",
+    "\n",
+    "visible_state.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# There are no visible stop signs at this point\n",
+    "visible_state['stop_signs'].T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Traffic light states are filtered out in this version of Nocturne\n",
+    "visible_state['traffic_lights']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10, 13)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Max visible road points x 13 features\n",
+    "visible_state['road_points'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10, 13)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of visible road objects x 13 features \n",
+    "visible_state['objects'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dimension flattened visible state: 410\n"
+     ]
+    }
+   ],
+   "source": [
+    "visible_state_dim = sum([val.flatten().shape[0] for key, val in visible_state.items()])\n",
+    "\n",
+    "print(f'Dimension flattened visible state: {visible_state_dim}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(410,)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# We can also flatten the visible state\n",
+    "# flattened has padding: if we miss an object --> zeros\n",
+    "visible_state_flat = scenario.flattened_visible_state(\n",
+    "        ego_vehicle, \n",
+    "        view_dist=view_distance, \n",
+    "        view_angle=view_angle, \n",
+    "        head_angle=head_angle,    \n",
+    ")\n",
+    "\n",
+    "visible_state_flat.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `.flattened_visible_state()` adds padding by default."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step \n",
+    "\n",
+    "`step(dt)` is a method call on an instance of the Simulation class, where `dt` is a scalar that represents the length of each simulation timestep in seconds. It advances the simulation by one timestep, which can result in changes to the state of the simulation (for example, new positions of objects, updated velocities, etc.) based on the physical laws and rules defined in the simulation.\n",
+    "\n",
+    "In the Waymo dataset, the length of the expert data is 9 seconds, a step size of 0.1 is used to discretize each traffic scene. The first second is used as a warm-start, leaving the remaining 8 seconds (80 steps) for the simulation (Details in Section 3.3)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dt = 0.1\n",
+    "\n",
+    "# Step the simulation\n",
+    "sim.step(dt)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Vehicle control\n",
+    "\n",
+    "By default, vehicles in Nocturne are driven by a **kinematic bicycle model**. This means that calling the `step(dt)` method evolves the dynamics of a vehicle according to the following set of equations (Appendix D in the paper):\n",
+    "\n",
+    "\\begin{align*}\n",
+    "    \\textbf{position: } x_{t+1} &= x_t + \\dot{x} \\, \\Delta t \\\\\n",
+    "    y_{t+1} &= y_t + \\dot{y} \\, \\Delta t \\\\\n",
+    "    \\textbf{heading: } \\theta_{t+1} &= \\theta_t + \\dot{\\theta} \\, \\Delta t \\\\ \n",
+    "    \\textbf{speed: } v_{t+1} &= \\text{clip}(v_t + \\dot{v} \\, \\Delta t, -v_{\\text{max}}, v_{\\text{max}}) \\\\\n",
+    "\\end{align*}\n",
+    "\n",
+    "with\n",
+    "\n",
+    "\\begin{align*}\n",
+    "    \\dot{v} &= a \\\\ \n",
+    "    \\bar{v} &= \\text{clip}(v_t, + 0.5 \\, \\dot{v} \\, \\Delta \\, t ,\\, - v_{\\text{max}}, v_{\\text{max}}) \\\\\n",
+    "    \\beta &= \\tan^{-1} \\left( \\frac{l_r \\tan (\\delta)}{L}  \\right) \\\\\n",
+    "          &= \\tan^{-1} (0.5 \\tan(\\delta)) \\\\\n",
+    "    \\dot{x} &= \\bar{v} \\cos (\\theta + \\beta) \\\\\n",
+    "    \\dot{y} &= \\bar{v} \\sin (\\theta + \\beta) \\\\\n",
+    "    \\dot{\\theta} &= \\frac{\\bar{v} \\cos (\\beta)\\tan(\\delta)}{L}\n",
+    "\\end{align*}\n",
+    "\n",
+    "where $(x_t, y_t)$ is the position of a vehicle at time $t$, $\\theta_t$ is the vehicles heading angle, $a$ is the acceleration and $\\delta$ is the steering angle. Finally, $L$ is the length of the car and $l_r = 0.5L$ is the distance to the rear axle of the car.\n",
+    "\n",
+    "If we set a vehicle to be **expert-controlled** instead, it will follow the same path as the respective human driver. This means that when we call the `step(dt)` function, the vehicle's position, heading, and speed will be updated to match the next point in the recorded human trajectory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# By default, all vehicles are not expert controlled\n",
+    "ego_vehicle.expert_control"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set a vehicle to be expert controlled:\n",
+    "ego_vehicle.expert_control = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "> **Pseudocode**: How `step(dt)` advances the simulation for every vehicle. Full code is implemented in [scenario.cc](https://github.com/facebookresearch/nocturne/blob/ae0a4e361457caf6b7e397675cc86f46161405ed/nocturne/cpp/src/scenario.cc#L264)\n",
+    "\n",
+    "---\n",
+    "\n",
+    "```Python\n",
+    "for vehicle in vehicles:\n",
+    "\n",
+    "    if object is not expert controlled:\n",
+    "        step vehicle dynamics following the kinematic bicycle model\n",
+    "    \n",
+    "    if vehicle is expert controlled:\n",
+    "        get current time & vehicle idx\n",
+    "        vehicle position = expert trajectories[vehicle_idx, time]\n",
+    "        vehicle heading = expert headings[vehicle_idx, time]\n",
+    "        vehicle speed = expert speeds[vehicle_idx, time]\n",
+    "```"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Action space\n",
+    "\n",
+    "The action set for a vehicle consists of three components: acceleration, steering and the head angle. Actions are discretized based on a provided upper and lower bound.\n",
+    "\n",
+    "The experiments in the paper use:\n",
+    "- 6 discrete actions for **acceleration** uniformly split between $[-3, 2] \\, \\frac{m}{s^2}$\n",
+    "- 21 discrete actions for **steering** between $[-0.7, 0.7]$ radians \n",
+    "- 5 discrete actions for **head tilt** between $[-1.6, 1.6]$ radians\n",
+    "\n",
+    "This is how you can access an expert action for a vehicle in Nocturne:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{acceleration: -0.224648, steering: -0.360994, head_angle: 0.000000}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Choose an arbitrary timepoint\n",
+    "time = 5\n",
+    "\n",
+    "# Show expert action at timepoint\n",
+    "scenario.expert_action(ego_vehicle, time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expert_action = scenario.expert_action(ego_vehicle, time)\n",
+    "\n",
+    "expert_action = expert_action.numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "acceleration = expert_action[0]\n",
+    "steering = expert_action[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "nocturne_cpp.Action"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(scenario.expert_action(ego_vehicle, time))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(-0.005859, 0.004639)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# How did the vehicle's position change after taking this action?\n",
+    "scenario.expert_pos_shift(ego_vehicle, time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-0.0007097125053405762"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# How did the head angle change?\n",
+    "scenario.expert_heading_shift(ego_vehicle, time)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nocturne-research",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/03_basic_rl_usage.ipynb b/examples/03_basic_rl_usage.ipynb
new file mode 100644
index 00000000..0e4baa7f
--- /dev/null
+++ b/examples/03_basic_rl_usage.ipynb
@@ -0,0 +1,262 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Basic RL usage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initializing environments\n",
+    "\n",
+    "\n",
+    "#### **Environment settings**\n",
+    "\n",
+    "- Initializing an environment is done with the `BaseEnv` class. The `BaseEnv` class leverages the `nocturne` simulator to create a basic RL interface, based on the provided traffic scenario(s). \n",
+    "\n",
+    "---\n",
+    "> 📝 The `env_config.yaml` file defines our environment settings, such as the action space, observation space and traffic scenarios to use.\n",
+    "---\n",
+    "\n",
+    "Check out `configs/env_config` for all the details!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "from nocturne.envs.base_env import BaseEnv\n",
+    "\n",
+    "# Load environment settings\n",
+    "with open(f\"../configs/env_config.yaml\", \"r\") as stream:\n",
+    "    env_config = yaml.safe_load(stream)\n",
+    "\n",
+    "# Initialize environment\n",
+    "env = BaseEnv(config=env_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "controlling agents # [32, 3]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'controlling agents # {[agent.id for agent in env.controlled_vehicles]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **Data**\n",
+    "\n",
+    "- Within `env_config.yaml`, we specify the path to the folder containing the traffic scenarios to use as follows:\n",
+    "\n",
+    "```yaml\n",
+    "# Path to folder with traffic scene(s) from which to create an environment\n",
+    "data_path: ../data\n",
+    "```\n",
+    "\n",
+    "- [Here](https://github.com/facebookresearch/nocturne/tree/main#downloading-the-dataset) are the instructions to access the complete dataset of traffic scenes. \n",
+    "\n",
+    "- The data folder also has a file named `valid_files.json`. This file lists the names of all the valid traffic scenarios along with the ids of the vehicles that are not valid. These vehicles are excluded from our experiment.\n",
+    "\n",
+    "For simplicity, we currently added a single traffic scenario that includes two vehicles in our data folder. Both vehicles can be used, so our `valid_files.json` looks like this:\n",
+    "\n",
+    "```yaml\n",
+    "{\n",
+    "    \"example_scenario.json\": []\n",
+    "}\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Interacting with the environment\n",
+    "\n",
+    "The classic agent-environment loop of reinforcement learning is implemented as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done after 80 steps -- total return in episode: {3: 1.7939045316631903, 32: 1.76165686989652}\n",
+      "Done after 80 steps -- total return in episode: {3: 2.155318604202339, 32: 1.481429297893312}\n",
+      "Done after 80 steps -- total return in episode: {3: 9.465985459353316, 32: 2.2998212474249136}\n",
+      "Done after 80 steps -- total return in episode: {3: 1.7018343298612015, 32: 1.3247767658493788}\n",
+      "Done after 80 steps -- total return in episode: {3: 1.8384227205755483, 32: 10.332866871900634}\n",
+      "Done after 80 steps -- total return in episode: {3: 1.1086751511448438, 32: 2.2523170773066994}\n",
+      "Done after 68 steps -- total return in episode: {3: 9.61291282706631, 32: 1.7437541483099983}\n",
+      "Done after 80 steps -- total return in episode: {3: 1.3500529425191474, 32: 1.4489859636190936}\n",
+      "Done after 80 steps -- total return in episode: {3: 0.2037829695907602, 32: 1.79063201755183}\n",
+      "Done after 80 steps -- total return in episode: {3: 0.5679890269139611, 32: 1.1160696685449862}\n",
+      "Done after 80 steps -- total return in episode: {3: 1.2231784562099877, 32: 10.2609964920322}\n",
+      "Done after 80 steps -- total return in episode: {3: 0.1683594772814569, 32: 1.8316186898723619}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Reset\n",
+    "obs_dict = env.reset()\n",
+    "\n",
+    "# Get info\n",
+    "agent_ids = [agent_id for agent_id in obs_dict.keys()]\n",
+    "dead_agent_ids = []\n",
+    "num_agents = len(agent_ids)\n",
+    "rewards = {agent_id: 0 for agent_id in agent_ids}\n",
+    "\n",
+    "for step in range(1000):\n",
+    "\n",
+    "    # Sample actions\n",
+    "    action_dict = {\n",
+    "        agent_id: env.action_space.sample() \n",
+    "        for agent_id in agent_ids\n",
+    "        if agent_id not in dead_agent_ids\n",
+    "    }\n",
+    "    \n",
+    "    # Step in env\n",
+    "    obs_dict, rew_dict, done_dict, info_dict = env.step(action_dict)\n",
+    "\n",
+    "    for agent_id in action_dict.keys():\n",
+    "        rewards[agent_id] += rew_dict[agent_id]\n",
+    "\n",
+    "    # Update dead agents\n",
+    "    for agent_id, is_done in done_dict.items():\n",
+    "        if is_done and agent_id not in dead_agent_ids:\n",
+    "            dead_agent_ids.append(agent_id)\n",
+    "\n",
+    "    # Reset if all agents are done\n",
+    "    if done_dict[\"__all__\"]:\n",
+    "        print(f'Done after {env.step_num} steps -- total return in episode: {rewards}')\n",
+    "        obs_dict = env.reset()\n",
+    "        dead_agent_ids = []\n",
+    "        rewards = {agent_id: 0 for agent_id in agent_ids}\n",
+    "\n",
+    "# Close environment\n",
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Accessing information about the environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Box(-inf, inf, (10,), float32)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The observation space \n",
+    "env.observation_space\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Discrete(9)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The size of the joint action space \n",
+    "env.action_space\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<nocturne_cpp.Vehicle at 0x135413430>, <nocturne_cpp.Vehicle at 0x135413eb0>]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Which agents are controlled?\n",
+    "env.controlled_vehicles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### \n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/04_ppo_with_sb3.ipynb b/examples/04_ppo_with_sb3.ipynb
new file mode 100644
index 00000000..e1cbbc64
--- /dev/null
+++ b/examples/04_ppo_with_sb3.ipynb
@@ -0,0 +1,231 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PPO with single-agent control\n",
+    "\n",
+    "In this notebook, we show how to use Proximal Policy Optimization (PPO) with Nocturne and [Stable Baselines 3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/index.html). SB3 is a library that has implementations of various well-known RL algorithms."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wrappers\n",
+    "\n",
+    "The Nocturne `BaseEnv` returns output as dictionaries, but the SB3 `PPO` class expects numpy arrays. To make our environment compatible with SB3, we create a wrapper class. Wrappers modify an environment without altering code directly, which reduces boilerplate and increasing modularity."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "import wandb\n",
+    "# Import base environment and wrapper\n",
+    "from nocturne.envs.base_env import BaseEnv\n",
+    "from nocturne.wrappers.sb3_wrappers import NocturneToSB3\n",
+    "\n",
+    "# import os\n",
+    "# os.chdir('..')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load environment settings\n",
+    "with open(f\"../configs/env_config.yaml\", \"r\") as stream:\n",
+    "    env_config = yaml.safe_load(stream)\n",
+    "\n",
+    "# Make sure to only control a single agent at a time. This is achieved by setting max_num_vehicles = 1\n",
+    "env_config[\"max_num_vehicles\"] = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize env and wrap it with SB3 wrapper\n",
+    "env = NocturneToSB3(BaseEnv(env_config))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### PPO\n",
+    "\n",
+    "Now all we have to do is initialize the SB3 `PPO` class and we're ready to learn! We use Weights & Biases (`wandb`) to take care of the logging. If you prefer not to use `wandb`, set `LOGGING = False` and `verbose=1`. \n",
+    "\n",
+    "\n",
+    "---\n",
+    "\n",
+    "> 🔦 More info on PPO and settings can be found in the [SB3 docs](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html).\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from stable_baselines3 import PPO"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "LOGGING = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mdaphnecor\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.12"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/Daphne/git_repos/nocturne_lab/examples/wandb/run-20231004_215340-rmy7acy1</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/daphnecor/single_agent_control_sb3_ppo/runs/rmy7acy1' target=\"_blank\">blooming-eon-12</a></strong> to <a href='https://wandb.ai/daphnecor/single_agent_control_sb3_ppo' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/daphnecor/single_agent_control_sb3_ppo' target=\"_blank\">https://wandb.ai/daphnecor/single_agent_control_sb3_ppo</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/daphnecor/single_agent_control_sb3_ppo/runs/rmy7acy1' target=\"_blank\">https://wandb.ai/daphnecor/single_agent_control_sb3_ppo/runs/rmy7acy1</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "if LOGGING:\n",
+    "    wandb.login()\n",
+    "    run = wandb.init(\n",
+    "        project=\"single_agent_control_sb3_ppo\",\n",
+    "        sync_tensorboard=True,\n",
+    "    )\n",
+    "    run_id = run.id\n",
+    "else:\n",
+    "    run_id = None\n",
+    "\n",
+    "# Init PPO algorithm\n",
+    "model = PPO(      \n",
+    "    policy=\"MlpPolicy\",  # Policy type\n",
+    "    n_steps=4096, # Number of steps per rollout\n",
+    "    batch_size=128, # Minibatch size\n",
+    "    env=env, # Our wrapped environment\n",
+    "    seed=42, # Always seed for reproducibility\n",
+    "    verbose=0,\n",
+    "    tensorboard_log=f\"runs/{run_id}\" if run_id is not None else None, # Sync with wandb\n",
+    ")\n",
+    "\n",
+    "# Learn\n",
+    "model.learn(total_timesteps=200_000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 🤔 How good is your policy?\n",
+    "\n",
+    "Hooray! You have just trained your first PPO agent in Nocturne! 🏁 \n",
+    "\n",
+    "Now take a look at information you've logged over training; did we learn? (if you want to compare, [this is how my run looks like](https://api.wandb.ai/links/daphnecor/iarufxw9))\n",
+    "\n",
+    "One important metric for assess the effectiveness of your policy is the average cumulative reward per episode. In our case, the **maximum** achievable return per episode is approximately between 9 and 10 (it varies per traffic scene and per agent). With the configurations above, your policy should approach this value in 150,000 steps. Here, steps (the `global_step`) represents the total number of **frames** our policy network has seen, you can think of it as the accumulated experience."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nocturne_lab",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/create_env.py b/examples/create_env.py
deleted file mode 100644
index 7b9355f5..00000000
--- a/examples/create_env.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Test step and rendering functions."""
-import hydra
-
-from cfgs.config import set_display_window
-from nocturne import Action
-from nocturne.envs.wrappers import create_env
-
-
-@hydra.main(config_path="../cfgs/", config_name="config")
-def create_rl_env(cfg):
-    """Test step and rendering functions."""
-    set_display_window()
-    env = create_env(cfg)
-    _ = env.reset()
-    # quick check that rendering works
-    _ = env.scenario.getConeImage(
-        env.scenario.getVehicles()[0],
-        # how far the agent can see
-        view_dist=cfg['subscriber']['view_dist'],
-        # the angle formed by the view cone
-        view_angle=cfg['subscriber']['view_angle'],
-        # the agent's head angle
-        head_angle=0.0,
-        # whether to draw the goal position in the image
-        draw_target_position=False)
-    for _ in range(80):
-        # grab the list of vehicles that actually need to
-        # move some distance to get to their goal
-        moving_vehs = env.scenario.getObjectsThatMoved()
-        # obs, rew, done, info
-        # each of these objects is a dict keyed by the vehicle ID
-        # info[veh_id] contains the following useful keys:
-        # 'collided': did the agent collide with a road object or edge
-        # 'veh_veh_collision': did the agent collide with a vehicle
-        # 'veh_edge_collision': did the agent collide with a road edge
-        # 'goal_achieved': did we get to our target
-        _, _, _, _ = env.step({
-            veh.id: Action(acceleration=2.0, steering=1.0, head_angle=0.5)
-            for veh in moving_vehs
-        })
-
-
-if __name__ == '__main__':
-    create_rl_env()
diff --git a/examples/imitation_learning/filters.py b/examples/imitation_learning/filters.py
deleted file mode 100644
index fdab3118..00000000
--- a/examples/imitation_learning/filters.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""A streaming mean-std filter used to whiten inputs."""
-import torch
-from torch import nn
-
-
-class MeanStdFilter(nn.Module):
-    """adapted from https://www.johndcook.com/blog/standard_deviation/."""
-
-    def __init__(self, input_shape, eps=1e-05):
-        super().__init__()
-        self.input_shape = input_shape
-        self.eps = eps
-        self.track_running_states = True
-        self.counter = 0
-        self._M = nn.Parameter(torch.zeros(input_shape), requires_grad=False)
-        self._S = nn.Parameter(torch.zeros(input_shape), requires_grad=False)
-        self._n = 0
-
-    def train(self, mode):
-        """Turn on updates to mean and standard deviation."""
-        self.track_running_states = True
-
-    def eval(self):
-        """Turn off updates to mean and standard deviation."""
-        self.track_running_states = False
-
-    def forward(self, x):
-        """Whiten and optionally update."""
-        if self.track_running_states:
-            for i in range(x.shape[0]):
-                self.push(x[i])
-        x = x - self.mean
-        x = x / (self.std + self.eps)
-        return x
-
-    def push(self, x):
-        """Unvectorized update of the running statistics."""
-        if x.shape != self._M.shape:
-            raise ValueError(
-                "Unexpected input shape {}, expected {}, value = {}".format(
-                    x.shape, self._M.shape, x))
-        n1 = self._n
-        self._n += 1
-        if self._n == 1:
-            self._M[...] = x
-        else:
-            delta = x - self._M
-            self._M[...] += delta / self._n
-            self._S[...] += delta * delta * n1 / self._n
-
-    @property
-    def n(self):
-        """Return the number of samples."""
-        return self._n
-
-    @property
-    def mean(self):
-        """Return the mean."""
-        return self._M
-
-    @property
-    def var(self):
-        """Compute the variance."""
-        return self._S / (self._n - 1) if self._n > 1 else torch.square(
-            self._M)
-
-    @property
-    def std(self):
-        """Compute the standard deviation."""
-        return torch.sqrt(self.var)
-
-    @property
-    def shape(self):
-        """Get the means shape."""
-        return self._M.shape
diff --git a/examples/imitation_learning/model.py b/examples/imitation_learning/model.py
deleted file mode 100644
index d3030f17..00000000
--- a/examples/imitation_learning/model.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Model for an imitation learning agent."""
-import torch
-from torch import nn
-from torch.distributions.multivariate_normal import MultivariateNormal
-from torch.distributions.categorical import Categorical
-
-from examples.imitation_learning.filters import MeanStdFilter
-
-
-class ImitationAgent(nn.Module):
-    """Pytorch Module for imitation. Output is a Multivariable Gaussian."""
-
-    def __init__(self, cfg):
-        """Initialize."""
-        super(ImitationAgent, self).__init__()
-
-        self.n_states = cfg['n_inputs']
-        self.hidden_layers = cfg.get('hidden_layers', [256, 256])
-
-        self.discrete = cfg['discrete']
-
-        if self.discrete:
-            self.actions_discretizations = cfg['actions_discretizations']
-            self.actions_bounds = cfg['actions_bounds']
-            self.actions_grids = [
-                torch.linspace(a_min, a_max, a_count,
-                               requires_grad=False).to(cfg['device'])
-                for (a_min, a_max), a_count in zip(
-                    self.actions_bounds, self.actions_discretizations)
-            ]
-        else:
-            # neural network outputs between -1 and 1 (tanh filter)
-            # then output is sampled from a Gaussian distribution
-            # N(nn output * mean_scalings, std_devs)
-            self.mean_scalings = torch.tensor(cfg['mean_scalings'])
-            self.std_devs = torch.tensor(cfg['std_devs'])
-            self.covariance_matrix = torch.diag_embed(self.std_devs)
-
-        self._build_model()
-
-    def _build_model(self):
-        """Build agent MLP that outputs an action mean and variance from a state input."""
-        if self.hidden_layers is None or len(self.hidden_layers) == 0:
-            self.nn = nn.Identity()
-            pre_head_size = self.n_states
-        else:
-            self.nn = nn.Sequential(
-                MeanStdFilter(self.n_states),
-                nn.Linear(self.n_states, self.hidden_layers[0]),
-                nn.Tanh(),
-                *[
-                    nn.Sequential(
-                        nn.Linear(self.hidden_layers[i],
-                                  self.hidden_layers[i + 1]),
-                        nn.Tanh(),
-                    ) for i in range(len(self.hidden_layers) - 1)
-                ],
-            )
-            pre_head_size = self.hidden_layers[-1]
-
-        if self.discrete:
-            self.heads = nn.ModuleList([
-                nn.Linear(pre_head_size, discretization)
-                for discretization in self.actions_discretizations
-            ])
-        else:
-            self.head = nn.Sequential(
-                nn.Linear(pre_head_size, len(self.mean_scalings)), nn.Tanh())
-
-    def dist(self, state):
-        """Construct a distribution from tensor input."""
-        x_out = self.nn(state)
-        if self.discrete:
-            return [Categorical(logits=head(x_out)) for head in self.heads]
-        else:
-            return MultivariateNormal(
-                self.head(x_out) * self.mean_scalings, self.covariance_matrix)
-
-    def forward(self, state, deterministic=False, return_indexes=False):
-        """Generate an output from tensor input."""
-        dists = self.dist(state)
-        if self.discrete:
-            actions_idx = [
-                d.logits.argmax(axis=-1) if deterministic else d.sample()
-                for d in dists
-            ]
-            actions = [
-                action_grid[action_idx] for action_grid, action_idx in zip(
-                    self.actions_grids, actions_idx)
-            ]
-            return (actions, actions_idx) if return_indexes else actions
-        else:
-            return [dist.argmax(axis=-1) for dist in dists
-                    ] if deterministic else [dist.sample() for dist in dists]
-
-    def log_prob(self, state, ground_truth_action, return_indexes=False):
-        """Compute the log prob of the expert action for a given input tensor."""
-        dist = self.dist(state)
-        if self.discrete:
-            # find indexes in actions grids whose values are the closest to the ground truth actions
-            actions_idx = self.action_to_grid_idx(ground_truth_action)
-            # sum log probs of actions indexes wrt. Categorial variables for each action dimension
-            log_prob = sum(
-                [d.log_prob(actions_idx[:, i]) for i, d in enumerate(dist)])
-            return (log_prob, actions_idx) if return_indexes else log_prob
-        else:
-            return dist.log_prob(ground_truth_action)
-
-    def action_to_grid_idx(self, action):
-        """Convert a batch of actions to a batch of action indexes (for discrete actions only)."""
-        # action is of shape (batch_size, n_actions)
-        # we want to transform it into an array of same shape, but with indexes instead of actions
-        # credits https://stackoverflow.com/a/46184652/16207351
-        output = torch.zeros_like(action)
-        for i, action_grid in enumerate(self.actions_grids):
-            actions = action[:, i]
-
-            # get indexes where actions would be inserted in action_grid to keep it sorted
-            idxs = torch.searchsorted(action_grid, actions)
-
-            # if it would be inserted at the end, we're looking at the last action
-            idxs[idxs == len(action_grid)] -= 1
-
-            # find indexes where previous index is closer (simple grid has constant sampling intervals)
-            idxs[action_grid[idxs] - actions > torch.diff(action_grid).mean() *
-                 0.5] -= 1
-
-            # write indexes in output
-            output[:, i] = idxs
-        return output
-
-
-if __name__ == '__main__':
-    model_cfg = {
-        'n_inputs': 100,
-        'hidden_layers': [256, 256],
-        'discrete': False,
-        'mean_scalings': [1, 10, 10000],
-        'std_devs': [1.0, 1.0, 1.0],
-    }
-    if True:
-        model_cfg.update({
-            'discrete': True,
-            'actions_discretizations': [5, 10],
-            'actions_bounds': [[-3, 3], [0, 10]],
-        })
-
-    model = ImitationAgent(model_cfg)
-
-    sample_states = torch.rand(3, model_cfg['n_inputs'])
-    actions = model(sample_states)
-    print(actions)
-    print(model.log_prob(sample_states, actions))
diff --git a/examples/imitation_learning/replay_video.py b/examples/imitation_learning/replay_video.py
deleted file mode 100644
index 9221e431..00000000
--- a/examples/imitation_learning/replay_video.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Replay a video of a trained controller."""
-from collections import defaultdict
-import json
-from pathlib import Path
-import sys
-
-import imageio
-import numpy as np
-import subprocess
-import torch
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, set_display_window
-from nocturne import Simulation, Vector2D
-
-OUTPUT_PATH = str(PROJECT_PATH / 'vids')
-
-MODEL_PATH = Path(
-    '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\
-        ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_220.pth'
-)
-CONFIG_PATH = MODEL_PATH.parent / 'configs.json'
-GOAL_TOLERANCE = 1.0
-
-if __name__ == '__main__':
-    set_display_window()
-    output_dir = Path(OUTPUT_PATH)
-    output_dir.mkdir(exist_ok=True)
-
-    with open(CONFIG_PATH, 'r') as f:
-        configs = json.load(f)
-
-    data_path = PROCESSED_TRAIN_NO_TL
-    files = [
-        file for file in Path(data_path).iterdir() if 'tfrecord' in file.stem
-    ]
-    scenario_config = configs['scenario_cfg']
-    dataloader_config = configs['dataloader_cfg']
-    files = files[:600]
-    np.random.shuffle(files)
-    model = torch.load(MODEL_PATH).to('cpu')
-    model.eval()
-    for traj_path in files:
-        sim = Simulation(str(traj_path), scenario_config)
-        output_str = traj_path.stem.split('.')[0].split('/')[-1]
-
-        def policy(state):
-            """Get model output."""
-            state = torch.as_tensor(np.array([state]), dtype=torch.float32)
-            return model.forward(state,
-                                 deterministic=True,
-                                 return_indexes=False)
-
-        with torch.no_grad():
-            for expert_control_vehicles, mp4_name in [
-                (False, f'{output_str}_policy_rollout.mp4'),
-                (True, f'{output_str}_true_rollout.mp4')
-            ]:
-                frames = []
-                sim.reset()
-                scenario = sim.getScenario()
-
-                objects_of_interest = [
-                    obj for obj in scenario.getVehicles()
-                    if obj in scenario.getObjectsThatMoved()
-                ]
-
-                for obj in objects_of_interest:
-                    obj.expert_control = True
-
-                relevant_obj_ids = [
-                    obj.getID() for obj in objects_of_interest[0:2]
-                ]
-
-                view_dist = configs['dataloader_cfg']['view_dist']
-                view_angle = configs['dataloader_cfg']['view_angle']
-                state_normalization = configs['dataloader_cfg'][
-                    'state_normalization']
-                dt = configs['dataloader_cfg']['dt']
-
-                n_stacked_states = configs['dataloader_cfg'][
-                    'n_stacked_states']
-                state_size = configs['model_cfg'][
-                    'n_inputs'] // n_stacked_states
-                state_dict = defaultdict(
-                    lambda: np.zeros(state_size * n_stacked_states))
-                for i in range(n_stacked_states):
-                    for veh in objects_of_interest:
-                        ego_state = scenario.ego_state(veh)
-                        visible_state = scenario.flattened_visible_state(
-                            veh, view_dist=view_dist, view_angle=view_angle)
-                        state = np.concatenate(
-                            (ego_state, visible_state)) / state_normalization
-                        state_dict[veh.getID()] = np.roll(
-                            state_dict[veh.getID()], len(state))
-                        state_dict[veh.getID()][:len(state)] = state
-
-                    sim.step(dt)
-
-                for obj in scenario.getObjectsThatMoved():
-                    obj.expert_control = True
-                # we only actually want to take control once the vehicle
-                # has been placed into the network
-                for veh in objects_of_interest:
-                    if np.isclose(veh.position.x, -10000.0):
-                        veh.expert_control = True
-                    else:
-                        if veh.getID() in relevant_obj_ids:
-                            veh.expert_control = expert_control_vehicles
-                            veh.highlight = True
-
-                for i in range(90 - n_stacked_states):
-                    # we only actually want to take control once the vehicle
-                    # has been placed into the network
-                    # so vehicles that should be controlled by our agent
-                    # are overriden to be expert controlled
-                    # until they are actually spawned in the scene
-                    for veh in objects_of_interest:
-                        if np.isclose(veh.position.x, -10000.0):
-                            veh.expert_control = True
-                        else:
-                            if veh.getID() in relevant_obj_ids:
-                                veh.expert_control = expert_control_vehicles
-                                veh.highlight = True
-                    print(
-                        f'...{i+1}/{90 - n_stacked_states} ({traj_path} ; {mp4_name})'
-                    )
-                    img = scenario.getImage(
-                        img_width=1600,
-                        img_height=1600,
-                        draw_target_positions=True,
-                        padding=50.0,
-                    )
-                    frames.append(img)
-                    for veh in objects_of_interest:
-                        veh_state = np.concatenate(
-                            (np.array(scenario.ego_state(veh), copy=False),
-                             np.array(scenario.flattened_visible_state(
-                                 veh,
-                                 view_dist=view_dist,
-                                 view_angle=view_angle),
-                                      copy=False)))
-                        ego_state = scenario.ego_state(veh)
-                        visible_state = scenario.flattened_visible_state(
-                            veh, view_dist=view_dist, view_angle=view_angle)
-                        state = np.concatenate(
-                            (ego_state, visible_state)) / state_normalization
-                        state_dict[veh.getID()] = np.roll(
-                            state_dict[veh.getID()], len(state))
-                        state_dict[veh.getID()][:len(state)] = state
-                        action = policy(state_dict[veh.getID()])
-                        if dataloader_config['expert_position']:
-                            if configs['model_cfg']['discrete']:
-                                pos_diff = np.array([
-                                    pos.cpu().numpy()[0] for pos in action[0:2]
-                                ])
-                                heading = action[2:3][0].cpu().numpy()[0]
-                            else:
-                                pos_diff = action[0:2]
-                                heading = action[2:3]
-                            veh.position = Vector2D.from_numpy(
-                                pos_diff + veh.position.numpy())
-                            veh.heading += heading
-                        else:
-                            veh.acceleration = action[0].cpu().numpy()
-                            veh.steering = action[1].cpu().numpy()
-                    sim.step(dt)
-                    for veh in scenario.getObjectsThatMoved():
-                        if (veh.position -
-                                veh.target_position).norm() < GOAL_TOLERANCE:
-                            scenario.removeVehicle(veh)
-                imageio.mimsave(mp4_name, np.stack(frames, axis=0), fps=30)
-                print(f'> {mp4_name}')
-
-        # stack the movies side by side
-        output_name = traj_path.stem.split('.')[0].split('/')[-1]
-        output_path = f'{output_name}_output.mp4'
-        ffmpeg_command = f'ffmpeg -y -i {output_str}_true_rollout.mp4 ' \
-            f'-i {output_str}_policy_rollout.mp4 -filter_complex hstack {output_path}'
-        print(ffmpeg_command)
-        subprocess.call(ffmpeg_command.split(' '))
-        print(f'> {output_path}')
-        sys.exit()
diff --git a/examples/imitation_learning/train.py b/examples/imitation_learning/train.py
deleted file mode 100644
index 7f072162..00000000
--- a/examples/imitation_learning/train.py
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Imitation learning training script (behavioral cloning)."""
-from datetime import datetime
-from pathlib import Path
-import pickle
-import random
-import json
-
-import hydra
-import numpy as np
-import torch
-from torch.utils.tensorboard import SummaryWriter
-from torch.optim import Adam
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-import wandb
-
-from examples.imitation_learning.model import ImitationAgent
-from examples.imitation_learning.waymo_data_loader import WaymoDataset
-
-
-def set_seed_everywhere(seed):
-    """Ensure determinism."""
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-
-
-@hydra.main(config_path="../../cfgs/imitation", config_name="config")
-def main(args):
-    """Train an IL model."""
-    set_seed_everywhere(args.seed)
-    # create dataset and dataloader
-    if args.actions_are_positions:
-        expert_bounds = [[-0.5, 3], [-3, 3], [-0.07, 0.07]]
-        actions_discretizations = [21, 21, 21]
-        actions_bounds = [[-0.5, 3], [-3, 3], [-0.07, 0.07]]
-        mean_scalings = [3, 3, 0.07]
-        std_devs = [0.1, 0.1, 0.02]
-    else:
-        expert_bounds = [[-6, 6], [-0.7, 0.7]]
-        actions_bounds = expert_bounds
-        actions_discretizations = [15, 43]
-        mean_scalings = [3, 0.7]
-        std_devs = [0.1, 0.02]
-
-    dataloader_cfg = {
-        'tmin': 0,
-        'tmax': 90,
-        'view_dist': args.view_dist,
-        'view_angle': args.view_angle,
-        'dt': 0.1,
-        'expert_action_bounds': expert_bounds,
-        'expert_position': args.actions_are_positions,
-        'state_normalization': 100,
-        'n_stacked_states': args.n_stacked_states,
-    }
-    scenario_cfg = {
-        'start_time': 0,
-        'allow_non_vehicles': True,
-        'spawn_invalid_objects': True,
-        'max_visible_road_points': args.max_visible_road_points,
-        'sample_every_n': 1,
-        'road_edge_first': False,
-    }
-    dataset = WaymoDataset(
-        data_path=args.path,
-        file_limit=args.num_files,
-        dataloader_config=dataloader_cfg,
-        scenario_config=scenario_cfg,
-    )
-    data_loader = iter(
-        DataLoader(
-            dataset,
-            batch_size=args.batch_size,
-            num_workers=args.n_cpus,
-            pin_memory=True,
-        ))
-
-    # create model
-    sample_state, _ = next(data_loader)
-    n_states = sample_state.shape[-1]
-
-    model_cfg = {
-        'n_inputs': n_states,
-        'hidden_layers': [1024, 256, 128],
-        'discrete': args.discrete,
-        'mean_scalings': mean_scalings,
-        'std_devs': std_devs,
-        'actions_discretizations': actions_discretizations,
-        'actions_bounds': actions_bounds,
-        'device': args.device
-    }
-
-    model = ImitationAgent(model_cfg).to(args.device)
-    model.train()
-    print(model)
-
-    # create optimizer
-    optimizer = Adam(model.parameters(), lr=args.lr)
-
-    # create exp dir
-    time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-    exp_dir = Path.cwd() / Path('train_logs') / time_str
-    exp_dir.mkdir(parents=True, exist_ok=True)
-
-    # save configs
-    configs_path = exp_dir / 'configs.json'
-    configs = {
-        'scenario_cfg': scenario_cfg,
-        'dataloader_cfg': dataloader_cfg,
-        'model_cfg': model_cfg,
-    }
-    with open(configs_path, 'w') as fp:
-        json.dump(configs, fp, sort_keys=True, indent=4)
-    print('Wrote configs at', configs_path)
-
-    # tensorboard writer
-    if args.write_to_tensorboard:
-        writer = SummaryWriter(log_dir=str(exp_dir))
-    # wandb logging
-    if args.wandb:
-        wandb_mode = "online"
-        wandb.init(config=args,
-                   project=args.wandb_project,
-                   name=args.experiment,
-                   group=args.experiment,
-                   resume="allow",
-                   settings=wandb.Settings(start_method="fork"),
-                   mode=wandb_mode)
-
-    # train loop
-    print('Exp dir created at', exp_dir)
-    print(f'`tensorboard --logdir={exp_dir}`\n')
-    for epoch in range(args.epochs):
-        print(f'\nepoch {epoch+1}/{args.epochs}')
-        n_samples = epoch * args.batch_size * (args.samples_per_epoch //
-                                               args.batch_size)
-
-        for i in tqdm(range(args.samples_per_epoch // args.batch_size),
-                      unit='batch'):
-            # get states and expert actions
-            states, expert_actions = next(data_loader)
-            states = states.to(args.device)
-            expert_actions = expert_actions.to(args.device)
-
-            # compute loss
-            if args.discrete:
-                log_prob, expert_idxs = model.log_prob(states,
-                                                       expert_actions,
-                                                       return_indexes=True)
-            else:
-                dist = model.dist(states)
-                log_prob = dist.log_prob(expert_actions.float())
-            loss = -log_prob.mean()
-
-            metrics_dict = {}
-
-            # optim step
-            optimizer.zero_grad()
-            loss.backward()
-
-            # grad clipping
-            total_norm = 0
-            for p in model.parameters():
-                if p.grad is not None:
-                    param_norm = p.grad.detach().data.norm(2)
-                    total_norm += param_norm.item()**2
-            total_norm = total_norm**0.5
-            metrics_dict['train/grad_norm'] = total_norm
-            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-            total_norm = 0
-            for p in model.parameters():
-                if p.grad is not None:
-                    param_norm = p.grad.detach().data.norm(2)
-                    total_norm += param_norm.item()**2
-            total_norm = total_norm**0.5
-            metrics_dict['train/post_clip_grad_norm'] = total_norm
-            optimizer.step()
-
-            # tensorboard logging
-            metrics_dict['train/loss'] = loss.item()
-
-            if args.actions_are_positions:
-                metrics_dict['train/x_logprob'] = log_prob[0]
-                metrics_dict['train/y_logprob'] = log_prob[1]
-                metrics_dict['train/steer_logprob'] = log_prob[2]
-            else:
-                metrics_dict['train/accel_logprob'] = log_prob[0]
-                metrics_dict['train/steer_logprob'] = log_prob[1]
-
-            if not model_cfg['discrete']:
-                diff_actions = torch.mean(torch.abs(dist.mean -
-                                                    expert_actions),
-                                          axis=0)
-                metrics_dict['train/accel_diff'] = diff_actions[0]
-                metrics_dict['train/steer_diff'] = diff_actions[1]
-                metrics_dict['train/l2_dist'] = torch.norm(
-                    dist.mean - expert_actions.float())
-
-            if model_cfg['discrete']:
-                with torch.no_grad():
-                    model_actions, model_idxs = model(states,
-                                                      deterministic=True,
-                                                      return_indexes=True)
-                accuracy = [
-                    (model_idx == expert_idx).float().mean(axis=0)
-                    for model_idx, expert_idx in zip(model_idxs, expert_idxs.T)
-                ]
-                if args.actions_are_positions:
-                    metrics_dict['train/x_pos_acc'] = accuracy[0]
-                    metrics_dict['train/y_pos_acc'] = accuracy[1]
-                    metrics_dict['train/heading_acc'] = accuracy[2]
-                else:
-                    metrics_dict['train/accel_acc'] = accuracy[0]
-                    metrics_dict['train/steer_acc'] = accuracy[1]
-
-            for key, val in metrics_dict.items():
-                if args.write_to_tensorboard:
-                    writer.add_scalar(key, val, n_samples)
-            if args.wandb:
-                wandb.log(metrics_dict, step=n_samples)
-        # save model checkpoint
-        if (epoch + 1) % 10 == 0 or epoch == args.epochs - 1:
-            model_path = exp_dir / f'model_{epoch+1}.pth'
-            torch.save(model, str(model_path))
-            pickle.dump(filter, open(exp_dir / f"filter_{epoch+1}.pth", "wb"))
-            print(f'\nSaved model at {model_path}')
-        if args.discrete:
-            if args.actions_are_positions:
-                print('xpos')
-                print('model: ', model_idxs[0][0:10])
-                print('expert: ', expert_idxs[0:10, 0])
-                print('ypos')
-                print('model: ', model_idxs[1][0:10])
-                print('expert: ', expert_idxs[0:10, 1])
-                print('steer')
-                print('model: ', model_idxs[2][0:10])
-                print('expert: ', expert_idxs[0:10, 2])
-            else:
-                print('accel')
-                print('model: ', model_idxs[0][0:10])
-                print('expert: ', expert_idxs[0:10, 0])
-                print('steer')
-                print('model: ', model_idxs[1][0:10])
-                print('expert: ', expert_idxs[0:10, 1])
-
-    print('Done, exp dir is', exp_dir)
-
-    writer.flush()
-    writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/imitation_learning/waymo_data_loader.py b/examples/imitation_learning/waymo_data_loader.py
deleted file mode 100644
index 8f1fc606..00000000
--- a/examples/imitation_learning/waymo_data_loader.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Dataloader for imitation learning in Nocturne."""
-from collections import defaultdict
-import random
-
-import torch
-from pathlib import Path
-import numpy as np
-
-from cfgs.config import ERR_VAL
-from nocturne import Simulation
-
-
-def _get_waymo_iterator(paths, dataloader_config, scenario_config):
-    # if worker has no paths, return an empty iterator
-    if len(paths) == 0:
-        return
-
-    # load dataloader config
-    tmin = dataloader_config.get('tmin', 0)
-    tmax = dataloader_config.get('tmax', 90)
-    view_dist = dataloader_config.get('view_dist', 80)
-    view_angle = dataloader_config.get('view_angle', np.radians(120))
-    dt = dataloader_config.get('dt', 0.1)
-    expert_action_bounds = dataloader_config.get('expert_action_bounds',
-                                                 [[-3, 3], [-0.7, 0.7]])
-    expert_position = dataloader_config.get('expert_position', True)
-    state_normalization = dataloader_config.get('state_normalization', 100)
-    n_stacked_states = dataloader_config.get('n_stacked_states', 5)
-
-    while True:
-        # select a random scenario path
-        scenario_path = np.random.choice(paths)
-
-        # create simulation
-        sim = Simulation(str(scenario_path), scenario_config)
-        scenario = sim.getScenario()
-
-        # set objects to be expert-controlled
-        for obj in scenario.getObjects():
-            obj.expert_control = True
-
-        # we are interested in imitating vehicles that moved
-        objects_that_moved = scenario.getObjectsThatMoved()
-        objects_of_interest = [
-            obj for obj in scenario.getVehicles() if obj in objects_that_moved
-        ]
-
-        # initialize values if stacking states
-        stacked_state = defaultdict(lambda: None)
-        initial_warmup = n_stacked_states - 1
-
-        state_list = []
-        action_list = []
-
-        # iterate over timesteps and objects of interest
-        for time in range(tmin, tmax):
-            for obj in objects_of_interest:
-                # get state
-                ego_state = scenario.ego_state(obj)
-                visible_state = scenario.flattened_visible_state(
-                    obj, view_dist=view_dist, view_angle=view_angle)
-                state = np.concatenate((ego_state, visible_state))
-
-                # normalize state
-                state /= state_normalization
-
-                # stack state
-                if n_stacked_states > 1:
-                    if stacked_state[obj.getID()] is None:
-                        stacked_state[obj.getID()] = np.zeros(
-                            len(state) * n_stacked_states, dtype=state.dtype)
-                    stacked_state[obj.getID()] = np.roll(
-                        stacked_state[obj.getID()], len(state))
-                    stacked_state[obj.getID()][:len(state)] = state
-
-                if np.isclose(obj.position.x, ERR_VAL):
-                    continue
-
-                if not expert_position:
-                    # get expert action
-                    expert_action = scenario.expert_action(obj, time)
-                    # check for invalid action (because no value available for taking derivative)
-                    # or because the vehicle is at an invalid state
-                    if expert_action is None:
-                        continue
-                    expert_action = expert_action.numpy()
-                    # now find the corresponding expert actions in the grids
-
-                    # throw out actions containing NaN or out-of-bound values
-                    if np.isnan(expert_action).any() \
-                            or expert_action[0] < expert_action_bounds[0][0] \
-                            or expert_action[0] > expert_action_bounds[0][1] \
-                            or expert_action[1] < expert_action_bounds[1][0] \
-                            or expert_action[1] > expert_action_bounds[1][1]:
-                        continue
-                else:
-                    expert_pos_shift = scenario.expert_pos_shift(obj, time)
-                    if expert_pos_shift is None:
-                        continue
-                    expert_pos_shift = expert_pos_shift.numpy()
-                    expert_heading_shift = scenario.expert_heading_shift(
-                        obj, time)
-                    if expert_heading_shift is None \
-                            or expert_pos_shift[0] < expert_action_bounds[0][0] \
-                            or expert_pos_shift[0] > expert_action_bounds[0][1] \
-                            or expert_pos_shift[1] < expert_action_bounds[1][0] \
-                            or expert_pos_shift[1] > expert_action_bounds[1][1] \
-                            or expert_heading_shift < expert_action_bounds[2][0] \
-                            or expert_heading_shift > expert_action_bounds[2][1]:
-                        continue
-                    expert_action = np.concatenate(
-                        (expert_pos_shift, [expert_heading_shift]))
-
-                # yield state and expert action
-                if stacked_state[obj.getID()] is not None:
-                    if initial_warmup <= 0:  # warmup to wait for stacked state to be filled up
-                        state_list.append(stacked_state[obj.getID()])
-                        action_list.append(expert_action)
-                else:
-                    state_list.append(state)
-                    action_list.append(expert_action)
-
-            # step the simulation
-            sim.step(dt)
-            if initial_warmup > 0:
-                initial_warmup -= 1
-
-        if len(state_list) > 0:
-            temp = list(zip(state_list, action_list))
-            random.shuffle(temp)
-            state_list, action_list = zip(*temp)
-            for state_return, action_return in zip(state_list, action_list):
-                yield (state_return, action_return)
-
-
-class WaymoDataset(torch.utils.data.IterableDataset):
-    """Waymo dataset loader."""
-
-    def __init__(self,
-                 data_path,
-                 dataloader_config={},
-                 scenario_config={},
-                 file_limit=None):
-        super(WaymoDataset).__init__()
-
-        # save configs
-        self.dataloader_config = dataloader_config
-        self.scenario_config = scenario_config
-
-        # get paths of dataset files (up to file_limit paths)
-        self.file_paths = list(
-            Path(data_path).glob('tfrecord*.json'))[:file_limit]
-        print(f'WaymoDataset: loading {len(self.file_paths)} files.')
-
-        # sort the paths for reproducibility if testing on a small set of files
-        self.file_paths.sort()
-
-    def __iter__(self):
-        """Partition files for each worker and return an (state, expert_action) iterable."""
-        # get info on current worker process
-        worker_info = torch.utils.data.get_worker_info()
-
-        if worker_info is None:
-            # single-process data loading, return the whole set of files
-            return _get_waymo_iterator(self.file_paths, self.dataloader_config,
-                                       self.scenario_config)
-
-        # distribute a unique set of file paths to each worker process
-        worker_file_paths = np.array_split(
-            self.file_paths, worker_info.num_workers)[worker_info.id]
-        return _get_waymo_iterator(list(worker_file_paths),
-                                   self.dataloader_config,
-                                   self.scenario_config)
-
-
-if __name__ == '__main__':
-    dataset = WaymoDataset(data_path='dataset/tf_records',
-                           file_limit=20,
-                           dataloader_config={
-                               'view_dist': 80,
-                               'n_stacked_states': 3,
-                           },
-                           scenario_config={
-                               'start_time': 0,
-                               'allow_non_vehicles': True,
-                               'spawn_invalid_objects': True,
-                           })
-
-    data_loader = torch.utils.data.DataLoader(
-        dataset,
-        batch_size=32,
-        num_workers=4,
-        pin_memory=True,
-    )
-
-    for i, x in zip(range(100), data_loader):
-        print(i, x[0].shape, x[1].shape)
diff --git a/examples/nocturne_functions.py b/examples/nocturne_functions.py
deleted file mode 100644
index 93f37c52..00000000
--- a/examples/nocturne_functions.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Example of how to make movies of Nocturne scenarios."""
-import os
-
-import hydra
-import matplotlib.pyplot as plt
-import numpy as np
-
-from cfgs.config import PROJECT_PATH, get_scenario_dict, set_display_window
-from nocturne import Simulation, Action
-
-
-def save_image(img, output_path='./img.png'):
-    """Make a single image from the scenario."""
-    dpi = 100
-    height, width, depth = img.shape
-    figsize = width / float(dpi), height / float(dpi)
-    plt.figure(figsize=figsize, dpi=dpi)
-    plt.axis('off')
-    plt.imshow(img)
-    plt.savefig(output_path)
-    print('>', output_path)
-
-
-@hydra.main(config_path="../cfgs/", config_name="config")
-def main(cfg):
-    """Initialize the scenario."""
-    set_display_window()
-    if not os.path.exists(PROJECT_PATH / 'examples/rendering'):
-        os.makedirs(PROJECT_PATH / 'examples/rendering')
-    # load scenario. by default this won't have pedestrians or cyclists
-    sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' /
-                                       'example_scenario.json'),
-                     config=get_scenario_dict(cfg))
-    scenario = sim.getScenario()
-    img = scenario.getImage(
-        img_width=2000,
-        img_height=2000,
-        padding=50.0,
-        draw_target_positions=True,
-    )
-    save_image(img,
-               PROJECT_PATH / 'examples/rendering' / 'scene_with_no_peds.png')
-    # grab all the vehicles
-    vehs = scenario.getVehicles()
-    # grab all the vehicles that moved and show some things
-    # we can do with them
-    vehs = scenario.getObjectsThatMoved()
-    vehs[0].highlight = True  # draw a circle around it on the rendered image
-    # setting a vehicle to expert_control will cause
-    # this agent will replay expert data starting frmo
-    # the current time in the simulation
-    vehs[0].expert_control = True
-    print(f'width is {vehs[0].width}, length is {vehs[0].length}')
-    print(f'speed is {vehs[0].speed}, heading is {vehs[0].heading}')
-    print(f'position is {vehs[0].width}, length is {vehs[0].length}')
-    # for efficiency, we return position as a custom Vector2D object
-    # this object can be converted to and from numpy and comes with
-    # support for a variety of algebraic operations
-    print(f'position is {vehs[0].position}')
-    print(f'position as numpy array is {vehs[0].position.numpy()}')
-    print(f'norm of position is {vehs[0].position.norm()}')
-    print(f'angle in a world-centered frame {vehs[0].position.angle()}')
-    print(f'rotated position is {vehs[0].position.rotate(np.pi).numpy()}')
-    # we can set vehicle accel, steering, head angle directly
-    vehs[0].acceleration = -1
-    vehs[0].steering = 1
-    vehs[0].head_angle = np.pi
-    # we can also set them all directly using an action object
-    vehs[0].apply_action(Action(acceleration=-1, steering=1, head_angle=np.pi))
-    # we can grab the state for this vehicle in two way:
-    # 1) a flattened vector corresponding to the set of visible objects
-    # concatenated according to [visible objects, visible road points,
-    #                           visible stop signs, visible traffic lights]
-    # note that since we want to make a fixed length vector, for each of these
-    # types the config, under the scenario key has the following items
-    # max_visible_objects: 16
-    # max_visible_road_points: 1000
-    # max_visible_traffic_lights: 20
-    # max_visible_stop_signs: 4
-    # we grab all the visible items for each type, sort them by distance from
-    # the vehicle and return the closest. If we have fewer than the maximum
-    # we pad with 0s.
-    flattened_vector = scenario.flattened_visible_state(object=vehs[0],
-                                                        view_dist=80,
-                                                        view_angle=120 *
-                                                        (np.pi / 180),
-                                                        head_angle=0.0)
-    # we can also grab a dict of all of the objects
-    # if padding is true we will add extra objects to the dict
-    # to ensure we hit the maximum number of objects for each type
-    visible_dict = scenario.visible_state(object=vehs[0],
-                                          view_dist=80,
-                                          view_angle=120 * (np.pi / 180),
-                                          padding=False)
-    # step the scenario. By default we step at 0.1s.
-    # you can use any step you want, but, if you do so make sure
-    # not to make any vehicle an expert as the expert positions / speeds / headings
-    # are only available in increments of 0.1 seconds
-    sim.step(cfg['dt'])
-
-    # load scenario, this time with pedestrians and cyclists
-    cfg['scenario']['allow_non_vehicles'] = True
-    sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' /
-                                       'example_scenario.json'),
-                     config=get_scenario_dict(cfg))
-    scenario = sim.getScenario()
-    img = scenario.getImage(
-        img_width=2000,
-        img_height=2000,
-        padding=50.0,
-        draw_target_positions=True,
-    )
-    save_image(img,
-               PROJECT_PATH / 'examples/rendering' / 'scene_with_peds.png')
-    # now we need to be slightly more careful about how we select objects
-    # since getMovingObjects will return pedestrians and cyclists
-    # and getVehicles will return vehicles that don't necessarily need to move
-    objects_that_moved = scenario.getObjectsThatMoved()
-    objects_of_interest = [
-        obj for obj in scenario.getVehicles() if obj in objects_that_moved
-    ]  # noqa: 841
-    vehicles = scenario.getVehicles()
-    cyclists = scenario.getCyclists()
-    pedestrians = scenario.getPedestrians()
-    all_objects = scenario.getObjects()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/on_policy_files/nocturne_runner.py b/examples/on_policy_files/nocturne_runner.py
deleted file mode 100644
index 8f988ad5..00000000
--- a/examples/on_policy_files/nocturne_runner.py
+++ /dev/null
@@ -1,562 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-# Code modified from https://github.com/marlbenchmark/on-policy
-"""Runner for PPO from https://github.com/marlbenchmark/on-policy."""
-from pathlib import Path
-import os
-import time
-
-import hydra
-from cfgs.config import set_display_window
-import imageio
-import numpy as np
-import setproctitle
-import torch
-import wandb
-
-from algos.ppo.base_runner import Runner
-from algos.ppo.env_wrappers import SubprocVecEnv, DummyVecEnv
-
-from nocturne.envs.wrappers import create_ppo_env
-
-
-def _t2n(x):
-    """Convert torch tensor to a numpy array."""
-    return x.detach().cpu().numpy()
-
-
-def make_train_env(cfg):
-    """Construct a training environment."""
-
-    def get_env_fn(rank):
-
-        def init_env():
-            env = create_ppo_env(cfg, rank)
-            # TODO(eugenevinitsky) implement this
-            env.seed(cfg.seed + rank * 1000)
-            return env
-
-        return init_env
-
-    if cfg.algorithm.n_rollout_threads == 1:
-        return DummyVecEnv([get_env_fn(0)])
-    else:
-        return SubprocVecEnv(
-            [get_env_fn(i) for i in range(cfg.algorithm.n_rollout_threads)])
-
-
-def make_eval_env(cfg):
-    """Construct an eval environment."""
-
-    def get_env_fn(rank):
-
-        def init_env():
-            env = create_ppo_env(cfg)
-            # TODO(eugenevinitsky) implement this
-            env.seed(cfg.seed + rank * 1000)
-            return env
-
-        return init_env
-
-    if cfg.algorithm.n_eval_rollout_threads == 1:
-        return DummyVecEnv([get_env_fn(0)])
-    else:
-        return SubprocVecEnv(
-            [get_env_fn(i) for i in range(cfg.algorithm.n_eval_rollout_threads)])
-
-
-def make_render_env(cfg):
-    """Construct a rendering environment."""
-
-    def get_env_fn(rank):
-
-        def init_env():
-            env = create_ppo_env(cfg)
-            # TODO(eugenevinitsky) implement this
-            env.seed(cfg.seed + rank * 1000)
-            return env
-
-        return init_env
-
-    return DummyVecEnv([get_env_fn(0)])
-
-
-class NocturneSharedRunner(Runner):
-    """
-    Runner class to perform training, evaluation and data collection for the Nocturne envs.
-
-    WARNING: Assumes a shared policy.
-    """
-
-    def __init__(self, config):
-        """Initialize."""
-        super(NocturneSharedRunner, self).__init__(config)
-        self.cfg = config['cfg.algo']
-        self.render_envs = config['render_envs']
-
-    def run(self):
-        """Run the training code."""
-        self.warmup()
-
-        start = time.time()
-        episodes = int(self.num_env_steps
-                       ) // self.episode_length // self.n_rollout_threads
-
-        for episode in range(episodes):
-            if self.use_linear_lr_decay:
-                self.trainer.policy.lr_decay(episode, episodes)
-
-            for step in range(self.episode_length):
-                # Sample actions
-                values, actions, action_log_probs, rnn_states, rnn_states_critic, actions_env = self.collect(
-                    step)
-
-                # Obser reward and next obs
-                obs, rewards, dones, infos = self.envs.step(actions_env)
-
-                data = obs, rewards, dones, infos, values, actions, action_log_probs, rnn_states, rnn_states_critic
-
-                # insert data into buffer
-                self.insert(data)
-
-            # compute return and update network
-            self.compute()
-            train_infos = self.train()
-
-            # post process
-            total_num_steps = (
-                episode + 1) * self.episode_length * self.n_rollout_threads
-
-            # save model
-            if (episode % self.save_interval == 0 or episode == episodes - 1):
-                self.save()
-
-            # log information
-            if episode % self.log_interval == 0:
-                end = time.time()
-                print(
-                    "\n Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n"
-                    .format(self.algorithm_name, self.experiment_name,
-                            episode * self.n_rollout_threads,
-                            episodes * self.n_rollout_threads, total_num_steps,
-                            self.num_env_steps,
-                            int(total_num_steps / (end - start))))
-
-                if self.use_wandb:
-                    wandb.log({'fps': int(total_num_steps / (end - start))},
-                              step=total_num_steps)
-                env_infos = {}
-                for agent_id in range(self.num_agents):
-                    idv_rews = []
-                    for info in infos:
-                        if 'individual_reward' in info[agent_id].keys():
-                            idv_rews.append(
-                                info[agent_id]['individual_reward'])
-                    agent_k = 'agent%i/individual_rewards' % agent_id
-                    env_infos[agent_k] = idv_rews
-
-                # TODO(eugenevinitsky) this does not correctly account for the fact that there could be
-                # two episodes in the buffer
-                train_infos["average_episode_rewards"] = np.mean(
-                    self.buffer.rewards) * self.episode_length
-                print("average episode rewards is {}".format(
-                    train_infos["average_episode_rewards"]))
-                print(
-                    f"maximum per step reward is {np.max(self.buffer.rewards)}"
-                )
-                self.log_train(train_infos, total_num_steps)
-                self.log_env(env_infos, total_num_steps)
-
-            # eval
-            if episode % self.eval_interval == 0 and self.use_eval:
-                self.eval(total_num_steps)
-
-            # save videos
-            if episode % self.cfg.render_interval == 0:
-                self.render(total_num_steps)
-
-    def warmup(self):
-        """Initialize the buffers."""
-        # reset env
-        obs = self.envs.reset()
-
-        # replay buffer
-        if self.use_centralized_V:
-            share_obs = obs.reshape(self.n_rollout_threads, -1)
-            share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents,
-                                                            axis=1)
-        else:
-            share_obs = obs
-
-        self.buffer.share_obs[0] = share_obs.copy()
-        self.buffer.obs[0] = obs.copy()
-
-    @torch.no_grad()
-    def collect(self, step):
-        """Collect rollout data."""
-        self.trainer.prep_rollout()
-        value, action, action_log_prob, rnn_states, rnn_states_critic \
-            = self.trainer.policy.get_actions(np.concatenate(self.buffer.share_obs[step]),
-                                              np.concatenate(self.buffer.obs[step]),
-                                              np.concatenate(self.buffer.rnn_states[step]),
-                                              np.concatenate(self.buffer.rnn_states_critic[step]),
-                                              np.concatenate(self.buffer.masks[step]))
-        # [self.envs, agents, dim]
-        values = np.array(np.split(_t2n(value), self.n_rollout_threads))
-        actions = np.array(np.split(_t2n(action), self.n_rollout_threads))
-        action_log_probs = np.array(
-            np.split(_t2n(action_log_prob), self.n_rollout_threads))
-        rnn_states = np.array(
-            np.split(_t2n(rnn_states), self.n_rollout_threads))
-        rnn_states_critic = np.array(
-            np.split(_t2n(rnn_states_critic), self.n_rollout_threads))
-        # rearrange action
-        if self.envs.action_space[0].__class__.__name__ == 'MultiDiscrete':
-            for i in range(self.envs.action_space[0].shape):
-                uc_actions_env = np.eye(self.envs.action_space[0].high[i] +
-                                        1)[actions[:, :, i]]
-                if i == 0:
-                    actions_env = uc_actions_env
-                else:
-                    actions_env = np.concatenate((actions_env, uc_actions_env),
-                                                 axis=2)
-        elif self.envs.action_space[0].__class__.__name__ == 'Discrete':
-            actions_env = np.squeeze(
-                np.eye(self.envs.action_space[0].n)[actions], 2)
-        else:
-            raise NotImplementedError
-
-        return values, actions, action_log_probs, rnn_states, rnn_states_critic, actions_env
-
-    def insert(self, data):
-        """Store the data in the buffers."""
-        obs, rewards, dones, _, values, actions, action_log_probs, rnn_states, rnn_states_critic = data
-
-        dones_env = np.all(dones, axis=1)
-
-        rnn_states[dones_env] = np.zeros(((dones_env).sum(), self.num_agents,
-                                          self.recurrent_N, self.hidden_size),
-                                         dtype=np.float32)
-        rnn_states_critic[dones_env] = np.zeros(
-            ((dones_env).sum(), self.num_agents,
-             *self.buffer.rnn_states_critic.shape[3:]),
-            dtype=np.float32)
-
-        masks = np.ones((self.n_rollout_threads, self.num_agents, 1),
-                        dtype=np.float32)
-        masks[dones_env] = np.zeros(((dones_env).sum(), self.num_agents, 1),
-                                    dtype=np.float32)
-
-        active_masks = np.ones((self.n_rollout_threads, self.num_agents, 1),
-                               dtype=np.float32)
-        active_masks[dones] = np.zeros(((dones).sum(), 1), dtype=np.float32)
-        active_masks[dones_env] = np.ones(
-            ((dones_env).sum(), self.num_agents, 1), dtype=np.float32)
-
-        if self.use_centralized_V:
-            share_obs = obs.reshape(self.n_rollout_threads, -1)
-            share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents,
-                                                            axis=1)
-        else:
-            share_obs = obs
-
-        self.buffer.insert(share_obs,
-                           obs,
-                           rnn_states,
-                           rnn_states_critic,
-                           actions,
-                           action_log_probs,
-                           values,
-                           rewards,
-                           masks,
-                           active_masks=active_masks)
-
-    @torch.no_grad()
-    def eval(self, total_num_steps):
-        """Get the policy returns in deterministic mode."""
-        eval_episode = 0
-
-        eval_episode_rewards = []
-        one_episode_rewards = [[] for _ in range(self.n_eval_rollout_threads)]
-        num_achieved_goals = 0
-        num_collisions = 0
-
-        i = 0
-        eval_obs = self.eval_envs.reset()
-
-        eval_rnn_states = np.zeros(
-            (self.n_eval_rollout_threads, self.num_agents, self.recurrent_N,
-             self.hidden_size),
-            dtype=np.float32)
-        eval_masks = np.ones((self.n_eval_rollout_threads, self.num_agents, 1),
-                             dtype=np.float32)
-
-        while eval_episode < self.cfg.eval_episodes:
-            i += 1
-            self.trainer.prep_rollout()
-            eval_actions, eval_rnn_states = \
-                self.trainer.policy.act(np.concatenate(eval_obs),
-                                        np.concatenate(eval_rnn_states),
-                                        np.concatenate(eval_masks),
-                                        deterministic=True)
-            eval_actions = np.array(
-                np.split(_t2n(eval_actions), self.n_eval_rollout_threads))
-            eval_rnn_states = np.array(
-                np.split(_t2n(eval_rnn_states), self.n_eval_rollout_threads))
-
-            # Observed reward and next obs
-            eval_obs, eval_rewards, eval_dones, eval_infos = self.eval_envs.step(
-                eval_actions)
-            for info_arr in eval_infos:
-                for agent_info_arr in info_arr:
-                    if 'goal_achieved' in agent_info_arr and agent_info_arr[
-                            'goal_achieved']:
-                        num_achieved_goals += 1
-                    if 'collided' in agent_info_arr and agent_info_arr[
-                            'collided']:
-                        num_collisions += 1
-
-            for i in range(self.n_eval_rollout_threads):
-                one_episode_rewards[i].append(eval_rewards[i])
-
-            eval_dones_env = np.all(eval_dones, axis=1)
-
-            eval_rnn_states[eval_dones_env] = np.zeros(
-                ((eval_dones_env).sum(), self.num_agents, self.recurrent_N,
-                 self.hidden_size),
-                dtype=np.float32)
-
-            eval_masks = np.ones(
-                (self.n_eval_rollout_threads, self.num_agents, 1),
-                dtype=np.float32)
-            eval_masks[eval_dones_env] = np.zeros(
-                ((eval_dones_env).sum(), self.num_agents, 1), dtype=np.float32)
-
-            for eval_i in range(self.n_eval_rollout_threads):
-                if eval_dones_env[eval_i]:
-                    eval_episode += 1
-                    eval_episode_rewards.append(
-                        np.sum(one_episode_rewards[eval_i], axis=0).mean())
-                    one_episode_rewards[eval_i] = []
-
-        eval_episode_rewards = np.array(eval_episode_rewards)
-        eval_episode_rewards = np.mean(eval_episode_rewards)
-        if self.use_wandb:
-            wandb.log({'eval_episode_rewards': eval_episode_rewards},
-                      step=total_num_steps)
-            wandb.log(
-                {
-                    'avg_eval_goals_achieved':
-                    num_achieved_goals / self.num_agents /
-                    self.cfg.eval_episodes
-                },
-                step=total_num_steps)
-            wandb.log(
-                {
-                    'avg_eval_num_collisions':
-                    num_collisions / self.num_agents / self.cfg.eval_episodes
-                },
-                step=total_num_steps)
-
-    @torch.no_grad()
-    def render(self, total_num_steps):
-        """Visualize the env."""
-        envs = self.render_envs
-
-        all_frames = []
-        for episode in range(self.cfg.render_episodes):
-            obs = envs.reset()
-            if self.cfg.save_gifs:
-                image = envs.envs[0].render('rgb_array')
-                all_frames.append(image)
-            else:
-                envs.render('human')
-
-            rnn_states = np.zeros(
-                (1, self.num_agents, self.recurrent_N, self.hidden_size),
-                dtype=np.float32)
-            masks = np.ones((1, self.num_agents, 1), dtype=np.float32)
-
-            episode_rewards = []
-
-            self.trainer.prep_rollout()
-            for step in range(self.episode_length):
-                calc_start = time.time()
-
-                action, rnn_states = self.trainer.policy.act(
-                    np.concatenate(obs),
-                    np.concatenate(rnn_states),
-                    np.concatenate(masks),
-                    deterministic=True)
-                actions = np.array(np.split(_t2n(action), 1))
-                rnn_states = np.array(np.split(_t2n(rnn_states), 1))
-
-                if envs.action_space[0].__class__.__name__ == 'MultiDiscrete':
-                    for i in range(envs.action_space[0].shape):
-                        uc_actions_env = np.eye(envs.action_space[0].high[i] +
-                                                1)[actions[:, :, i]]
-                        if i == 0:
-                            actions_env = uc_actions_env
-                        else:
-                            actions_env = np.concatenate(
-                                (actions_env, uc_actions_env), axis=2)
-                elif envs.action_space[0].__class__.__name__ == 'Discrete':
-                    actions_env = np.squeeze(
-                        np.eye(envs.action_space[0].n)[actions], 2)
-                else:
-                    raise NotImplementedError
-
-                # Obser reward and next obs
-                obs, rewards, dones, infos = envs.step(actions_env)
-                episode_rewards.append(rewards)
-
-                rnn_states[dones] = np.zeros(
-                    ((dones).sum(), self.recurrent_N, self.hidden_size),
-                    dtype=np.float32)
-                masks = np.ones((1, self.num_agents, 1), dtype=np.float32)
-                masks[dones] = np.zeros(((dones).sum(), 1), dtype=np.float32)
-
-                if self.cfg.save_gifs:
-                    image = envs.envs[0].render('rgb_array')
-                    all_frames.append(image)
-                    calc_end = time.time()
-                    elapsed = calc_end - calc_start
-                    if elapsed < self.cfg.ifi:
-                        time.sleep(self.cfg.ifi - elapsed)
-                else:
-                    envs.render('human')
-
-                if np.all(dones[0]):
-                    break
-
-            # note, every rendered episode is exactly the same since there's no randomness in the env and our actions
-            # are deterministic
-            # TODO(eugenevinitsky) why is this lower than the non-render reward?
-            render_val = np.mean(np.sum(np.array(episode_rewards), axis=0))
-            print("episode reward of rendered episode is: " + str(render_val))
-            if self.use_wandb:
-                wandb.log({'render_rew': render_val}, step=total_num_steps)
-
-        if self.cfg.save_gifs:
-            if self.use_wandb:
-                np_arr = np.stack(all_frames).transpose((0, 3, 1, 2))
-                wandb.log({"video": wandb.Video(np_arr, fps=4, format="gif")},
-                          step=total_num_steps)
-            # else:
-            imageio.mimsave(os.getcwd() + '/render.gif',
-                            all_frames,
-                            duration=self.cfg.ifi)
-
-
-@hydra.main(config_path='../../cfgs/', config_name='config')
-def main(cfg):
-    """Run the on-policy code."""
-    set_display_window()
-    logdir = Path(os.getcwd())
-    if cfg.wandb_id is not None:
-        wandb_id = cfg.wandb_id
-    else:
-        wandb_id = wandb.util.generate_id()
-        # with open(os.path.join(logdir, 'wandb_id.txt'), 'w+') as f:
-        #     f.write(wandb_id)
-    wandb_mode = "disabled" if (cfg.debug or not cfg.wandb) else "online"
-
-    if cfg.wandb:
-        run = wandb.init(config=cfg,
-                         project=cfg.wandb_name,
-                         name=wandb_id,
-                         group='ppov2_' + cfg.experiment,
-                         resume="allow",
-                         settings=wandb.Settings(start_method="fork"),
-                         mode=wandb_mode)
-    else:
-        if not logdir.exists():
-            curr_run = 'run1'
-        else:
-            exst_run_nums = [
-                int(str(folder.name).split('run')[1])
-                for folder in logdir.iterdir()
-                if str(folder.name).startswith('run')
-            ]
-            if len(exst_run_nums) == 0:
-                curr_run = 'run1'
-            else:
-                curr_run = 'run%i' % (max(exst_run_nums) + 1)
-        logdir = logdir / curr_run
-        if not logdir.exists():
-            os.makedirs(str(logdir))
-
-    if cfg.algorithm.algorithm_name == "rmappo":
-        assert (cfg.algorithm.use_recurrent_policy
-                or cfg.algorithm.use_naive_recurrent_policy), (
-                    "check recurrent policy!")
-    elif cfg.algorithm.algorithm_name == "mappo":
-        assert (not cfg.algorithm.use_recurrent_policy
-                and not cfg.algorithm.use_naive_recurrent_policy), (
-                    "check recurrent policy!")
-    else:
-        raise NotImplementedError
-
-    # cuda
-    if 'cpu' not in cfg.algorithm.device and torch.cuda.is_available():
-        print("choose to use gpu...")
-        device = torch.device(cfg.algorithm.device)
-        torch.set_num_threads(cfg.algorithm.n_training_threads)
-        # if cfg.algorithm.cuda_deterministic:
-        #     import torch.backends.cudnn as cudnn
-        #     cudnn.benchmark = False
-        #     cudnn.deterministic = True
-    else:
-        print("choose to use cpu...")
-        device = torch.device("cpu")
-        torch.set_num_threads(cfg.algorithm.n_training_threads)
-
-    setproctitle.setproctitle(
-        str(cfg.algorithm.algorithm_name) + "-" + str(cfg.experiment))
-
-    # seed
-    torch.manual_seed(cfg.algorithm.seed)
-    torch.cuda.manual_seed_all(cfg.algorithm.seed)
-    np.random.seed(cfg.algorithm.seed)
-
-    # env init
-    # TODO(eugenevinitsky) this code requires a fixed number of agents but this
-    # should be done by overriding in the hydra config rather than here
-    cfg.subscriber.keep_inactive_agents = True
-    envs = make_train_env(cfg)
-    eval_envs = make_eval_env(cfg)
-    render_envs = make_render_env(cfg)
-    # TODO(eugenevinitsky) hacky
-    num_agents = envs.reset().shape[1]
-
-    config = {
-        "cfg.algo": cfg.algorithm,
-        "envs": envs,
-        "eval_envs": eval_envs,
-        "render_envs": render_envs,
-        "num_agents": num_agents,
-        "device": device,
-        "logdir": logdir
-    }
-
-    # run experiments
-    runner = NocturneSharedRunner(config)
-    runner.run()
-
-    # post process
-    envs.close()
-    if cfg.algorithm.use_eval and eval_envs is not envs:
-        eval_envs.close()
-
-    if cfg.wandb:
-        run.finish()
-    else:
-        runner.writter.export_scalars_to_json(
-            str(runner.log_dir + '/summary.json'))
-        runner.writter.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/rendering.py b/examples/rendering.py
deleted file mode 100644
index 46050f57..00000000
--- a/examples/rendering.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Example of how to make movies of Nocturne scenarios."""
-import os
-
-import hydra
-import imageio
-import matplotlib.pyplot as plt
-import numpy as np
-
-from cfgs.config import PROJECT_PATH, get_scenario_dict, set_display_window
-from nocturne import Simulation
-
-
-def get_sim(cfg):
-    """Initialize the scenario."""
-    # load scenario, set vehicles to be expert-controlled
-    sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' /
-                                       'example_scenario.json'),
-                     config=get_scenario_dict(cfg))
-    for obj in sim.getScenario().getObjectsThatMoved():
-        obj.expert_control = True
-    return sim
-
-
-def make_movie(cfg,
-               scenario_fn,
-               output_path='./vid.mp4',
-               dt=0.1,
-               steps=90,
-               fps=10):
-    """Make a movie from the scenario."""
-    sim = get_sim(cfg)
-    scenario = sim.getScenario()
-    movie_frames = []
-    timestep = 0
-    movie_frames.append(scenario_fn(scenario, timestep))
-    for i in range(steps):
-        sim.step(dt)
-        timestep += 1
-        movie_frames.append(scenario_fn(scenario, timestep))
-    movie_frames = np.array(movie_frames)
-    imageio.mimwrite(output_path, movie_frames, fps=fps)
-    print('>', output_path)
-    del sim
-    del movie_frames
-
-
-def make_image(cfg, scenario_fn, output_path='./img.png'):
-    """Make a single image from the scenario."""
-    sim = get_sim(cfg)
-    scenario = sim.getScenario()
-    img = scenario_fn(scenario)
-    dpi = 100
-    height, width, depth = img.shape
-    figsize = width / float(dpi), height / float(dpi)
-    plt.figure(figsize=figsize, dpi=dpi)
-    plt.axis('off')
-    plt.imshow(img)
-    plt.savefig(output_path)
-    print('>', output_path)
-
-
-@hydra.main(config_path="../cfgs/", config_name="config")
-def main(cfg):
-    """See file docstring."""
-    # NOTE: don't run this file all at once since the memory usage for
-    # rendering all the videos will be dozens of gigabytes
-    set_display_window()
-
-    if not os.path.exists(PROJECT_PATH / 'examples/rendering'):
-        os.makedirs(PROJECT_PATH / 'examples/rendering')
-
-    # movie of whole scenario
-    make_movie(
-        cfg,
-        scenario_fn=lambda scenario, _: scenario.getImage(
-            img_width=1600,
-            img_height=1600,
-            draw_target_positions=True,
-            padding=50.0,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'movie_whole_scenario.mp4',
-    )
-
-    # movie around a vehicle
-    make_movie(
-        cfg,
-        scenario_fn=lambda scenario, _: scenario.getImage(
-            img_width=1600,
-            img_height=1600,
-            draw_target_positions=True,
-            padding=50.0,
-            source=scenario.getVehicles()[3],
-            view_width=120,
-            view_height=120,
-            rotate_with_source=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'movie_around_vehicle.mp4',
-    )
-
-    # movie around a vehicle (without rotating with source)
-    make_movie(
-        cfg,
-        scenario_fn=lambda scenario, _: scenario.getImage(
-            img_width=1600,
-            img_height=1600,
-            draw_target_positions=True,
-            padding=50.0,
-            source=scenario.getObjectsThatMoved()[0],
-            view_width=120,
-            view_height=120,
-            rotate_with_source=False,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'movie_around_vehicle_stable.mp4',
-    )
-
-    # movie of cone around vehicle
-    make_movie(
-        cfg,
-        scenario_fn=lambda scenario, _: scenario.getConeImage(
-            source=scenario.getObjectsThatMoved()[0],
-            view_dist=80,
-            view_angle=np.pi * (120 / 180),
-            head_angle=0.0,
-            img_width=1600,
-            img_height=1600,
-            padding=50.0,
-            draw_target_position=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' / 'movie_cone.mp4',
-    )
-
-    # movie of cone around vehicle with varying head angle
-    make_movie(
-        cfg,
-        scenario_fn=lambda scenario, timestep: scenario.getConeImage(
-            source=scenario.getVehicles()[6],
-            view_dist=80.0,
-            view_angle=np.pi * (120 / 180),
-            head_angle=0.8 * np.sin(timestep / 10),
-            img_width=1600,
-            img_height=1600,
-            padding=50.0,
-            draw_target_position=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'movie_cone_head_angle.mp4',
-    )
-
-    # image of whole scenario
-    make_image(
-        cfg,
-        scenario_fn=lambda scenario: scenario.getImage(
-            img_width=2000,
-            img_height=2000,
-            padding=50.0,
-            draw_target_positions=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' / 'img_scenario.png',
-    )
-
-    # image of cone
-    make_image(
-        cfg,
-        scenario_fn=lambda scenario: scenario.getConeImage(
-            source=scenario.getVehicles()[9],
-            view_dist=80,
-            view_angle=np.pi * (120 / 180),
-            head_angle=np.pi / 8.0,
-            img_width=2000,
-            img_height=2000,
-            padding=50.0,
-            draw_target_position=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'img_cone_tilted.png',
-    )
-
-    # image of visible state
-    make_image(
-        cfg,
-        scenario_fn=lambda scenario: scenario.getFeaturesImage(
-            source=scenario.getVehicles()[9],
-            view_dist=80,
-            view_angle=np.pi * (120 / 180),
-            head_angle=np.pi / 8.0,
-            img_width=2000,
-            img_height=2000,
-            padding=50.0,
-            draw_target_position=True,
-        ),
-        output_path=PROJECT_PATH / 'examples/rendering' /
-        'img_features_tilted.png',
-    )
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/rllib_files/run_rllib.py b/examples/rllib_files/run_rllib.py
deleted file mode 100644
index fb019d00..00000000
--- a/examples/rllib_files/run_rllib.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Example run script for RLlib."""
-import os
-
-import hydra
-from omegaconf import OmegaConf
-from cfgs.config import set_display_window
-import ray
-from ray import tune
-from ray.tune.registry import register_env
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
-
-from nocturne.envs.wrappers import create_env
-
-
-class RLlibWrapperEnv(MultiAgentEnv):
-    """Thin wrapper making our env look like a MultiAgentEnv."""
-
-    metadata = {
-        "render.modes": ["rgb_array"],
-    }
-
-    def __init__(self, env):
-        """See wrapped env class."""
-        self._skip_env_checking = True  # temporary fix for rllib env checking issue
-        super().__init__()
-        self._env = env
-
-    def step(self, actions):
-        """See wrapped env class."""
-        next_obs, rew, done, info = self._env.step(actions)
-        return next_obs, rew, done, info
-
-    def reset(self):
-        """See wrapped env class."""
-        obses = self._env.reset()
-        return obses
-
-    @property
-    def observation_space(self):
-        """See wrapped env class."""
-        return self._env.observation_space
-
-    @property
-    def action_space(self):
-        """See wrapped env class."""
-        return self._env.action_space
-
-    def render(self, mode=None):
-        """See wrapped env class."""
-        return self._env.render()
-
-    def seed(self, seed=None):
-        """Set seed on the wrapped env."""
-        self._env.seed(seed)
-
-    def __getattr__(self, name):
-        """Return attributes from the wrapped env."""
-        return getattr(self._env, name)
-
-
-def create_rllib_env(cfg):
-    """Return an MultiAgentEnv wrapped environment."""
-    return RLlibWrapperEnv(create_env(cfg))
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def main(cfg):
-    """Run RLlib example."""
-    set_display_window()
-    cfg = OmegaConf.to_container(cfg, resolve=True)
-    # TODO(eugenevinitsky) move these into a config
-    if cfg['debug']:
-        ray.init(local_mode=True)
-        num_workers = 0
-        num_envs_per_worker = 1
-        num_gpus = 0
-        use_lstm = False
-    else:
-        num_workers = 15
-        num_envs_per_worker = 5
-        num_gpus = 1
-        use_lstm = True
-
-    register_env("nocturne", lambda cfg: create_rllib_env(cfg))
-
-    username = os.environ["USER"]
-    tune.run(
-        "PPO",
-        # TODO(eugenevinitsky) move into config
-        local_dir=f"/checkpoint/{username}/nocturne/ray_results",
-        stop={"episodes_total": 60000},
-        checkpoint_freq=1000,
-        config={
-            # Enviroment specific.
-            "env":
-            "nocturne",
-            "env_config":
-            cfg,
-            # General
-            "framework":
-            "torch",
-            "num_gpus":
-            num_gpus,
-            "num_workers":
-            num_workers,
-            "num_envs_per_worker":
-            num_envs_per_worker,
-            "observation_filter":
-            "MeanStdFilter",
-            # Method specific.
-            "entropy_coeff":
-            0.0,
-            "num_sgd_iter":
-            5,
-            "train_batch_size":
-            max(100 * num_workers * num_envs_per_worker, 512),
-            "rollout_fragment_length":
-            20,
-            "sgd_minibatch_size":
-            max(int(100 * num_workers * num_envs_per_worker / 4), 512),
-            "multiagent": {
-                # We only have one policy (calling it "shared").
-                # Class, obs/act-spaces, and config will be derived
-                # automatically.
-                "policies": {"shared_policy"},
-                # Always use "shared" policy.
-                "policy_mapping_fn":
-                (lambda agent_id, episode, **kwargs: "shared_policy"),
-                # each agent step is counted towards train_batch_size
-                # rather than environment steps
-                "count_steps_by":
-                "agent_steps",
-            },
-            "model": {
-                "use_lstm": use_lstm
-            },
-            # Evaluation stuff
-            "evaluation_interval":
-            50,
-            # Run evaluation on (at least) one episodes
-            "evaluation_duration":
-            1,
-            # ... using one evaluation worker (setting this to 0 will cause
-            # evaluation to run on the local evaluation worker, blocking
-            # training until evaluation is done).
-            # TODO: if this is not 0, it seems to error out
-            "evaluation_num_workers":
-            0,
-            # Special evaluation config. Keys specified here will override
-            # the same keys in the main config, but only for evaluation.
-            "evaluation_config": {
-                # Store videos in this relative directory here inside
-                # the default output dir (~/ray_results/...).
-                # Alternatively, you can specify an absolute path.
-                # Set to True for using the default output dir (~/ray_results/...).
-                # Set to False for not recording anything.
-                "record_env": "videos_test",
-                # "record_env": "/Users/xyz/my_videos/",
-                # Render the env while evaluating.
-                # Note that this will always only render the 1st RolloutWorker's
-                # env and only the 1st sub-env in a vectorized env.
-                "render_env": True,
-            },
-        },
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/sample_factory_files/results/plot_successes.py b/examples/sample_factory_files/results/plot_successes.py
deleted file mode 100644
index fda57827..00000000
--- a/examples/sample_factory_files/results/plot_successes.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Util for plotting eval_sample_factory.py output."""
-import matplotlib.pyplot as plt
-import numpy as np
-
-if __name__ == '__main__':
-    plt.figure()
-    num_arr = np.load('success_by_veh_number.npy')
-    for i in range(num_arr.shape[0]):
-        veh_num_arr = num_arr[i, i]
-        plt.figure()
-        plt.plot(list(range(len(veh_num_arr))), veh_num_arr[:, 0])
-        plt.plot(list(range(len(veh_num_arr))), veh_num_arr[:, 1])
-        plt.plot(list(range(len(veh_num_arr))),
-                 veh_num_arr[:, 1] + veh_num_arr[:, 0])
-        plt.xlabel('num vehicles')
-        plt.ylabel('rate')
-        plt.legend(['goal rate', 'collide rate', 'sum'])
-        plt.title('goal rate as function of number of vehicles')
-        plt.savefig(f'{i}_goal_func_num.png')
-        plt.close()
-    num_arr = np.load('success_by_dist.npy')
-    for i in range(num_arr.shape[0]):
-        dist_arr = num_arr[i, i]
-        plt.figure()
-        plt.plot(10 * np.array(list(range(len(dist_arr)))), dist_arr[:, 0])
-        plt.plot(10 * np.array(list(range(len(dist_arr)))), dist_arr[:, 1])
-        plt.plot(10 * np.array(list(range(len(dist_arr)))),
-                 dist_arr[:, 1] + dist_arr[:, 0])
-        plt.xlabel('distance')
-        plt.ylabel('rate')
-        plt.legend(['goal rate', 'collide rate', 'sum'])
-        plt.title('goal rate as function of start distance')
-        plt.savefig(f'{i}_goal_func_dist.png')
-        plt.close()
diff --git a/examples/sample_factory_files/results/success_by_dist.npy b/examples/sample_factory_files/results/success_by_dist.npy
deleted file mode 100644
index 88bb3470..00000000
Binary files a/examples/sample_factory_files/results/success_by_dist.npy and /dev/null differ
diff --git a/examples/sample_factory_files/results/success_by_veh_number.npy b/examples/sample_factory_files/results/success_by_veh_number.npy
deleted file mode 100644
index e2ccc8c0..00000000
Binary files a/examples/sample_factory_files/results/success_by_veh_number.npy and /dev/null differ
diff --git a/examples/sample_factory_files/results/zsc_collision.txt b/examples/sample_factory_files/results/zsc_collision.txt
deleted file mode 100644
index fab1b513..00000000
--- a/examples/sample_factory_files/results/zsc_collision.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-3.073423876390209974e-01,2.998212611906500014e-01
-2.892664684601605751e-01,3.056283516749848106e-01
diff --git a/examples/sample_factory_files/results/zsc_goal.txt b/examples/sample_factory_files/results/zsc_goal.txt
deleted file mode 100644
index da893d8f..00000000
--- a/examples/sample_factory_files/results/zsc_goal.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-6.806898396366272141e-01,6.894188181744292931e-01
-6.908729456388121859e-01,6.775820683229745178e-01
diff --git a/examples/sample_factory_files/run_sample_factory.py b/examples/sample_factory_files/run_sample_factory.py
deleted file mode 100644
index e39582da..00000000
--- a/examples/sample_factory_files/run_sample_factory.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Runner script for sample factory.
-
-To run in single agent mode on one file for testing.
-python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=True \
-    ++algorithm.num_workers=10 ++algorithm.experiment=EXPERIMENT_NAME \
-    ++max_num_vehicles=1 ++num_files=1
-
-To run in multiagent mode on one file for testing
-python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=True \
-    ++algorithm.num_workers=10 ++algorithm.experiment=EXPERIMENT_NAME \
-    ++num_files=1
-
-To run on all files set ++num_files=-1
-
-For debugging
-python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=False \
-    ++algorithm.num_workers=1 ++force_envs_single_thread=False
-After training for a desired period of time, evaluate the policy by running:
-python -m sample_factory_examples.enjoy_custom_multi_env --algo=APPO \
-    --env=my_custom_multi_env_v1 --experiment=example
-"""
-import os
-import sys
-
-import hydra
-import numpy as np
-from omegaconf import OmegaConf
-from sample_factory.envs.env_registry import global_env_registry
-from sample_factory.run_algorithm import run_algorithm
-from sample_factory_examples.train_custom_env_custom_model import override_default_params_func
-from sample_factory.algorithms.appo.model_utils import get_obs_shape, EncoderBase, nonlinearity, register_custom_encoder
-from torch import nn
-
-from nocturne.envs.wrappers import create_env
-
-
-class SampleFactoryEnv():
-    """Wrapper environment that converts between our dicts and Sample Factory format."""
-
-    def __init__(self, env):
-        """Initialize wrapper.
-
-        Args
-        ----
-            env (BaseEnv): Base environment that we are wrapping.
-        """
-        self.env = env
-        self.num_agents = self.env.cfg['max_num_vehicles']
-        self.agent_ids = [i for i in range(self.num_agents)]
-        self.is_multiagent = True
-        _ = self.env.reset()
-        # used to track which agents are done
-        self.already_done = [False for _ in self.agent_ids]
-        self.episode_rewards = np.zeros(self.num_agents)
-
-    def step(self, actions):
-        """Convert between environment dicts and sample factory lists.
-
-        Important to note:
-        1) Items in info['episode_extra_stats'] will be logged by sample factory.
-        2) sample factory does not reset the environment for you
-           so we reset it if the env returns __all__ in its done dict
-
-        Args:
-            actions ({str: numpy array}): agent actions
-
-        Returns
-        -------
-            obs_n ([np.array]): N length list of agent observations
-            rew_n ([float]): N length list of agent rewards
-            info_n ([{str: float}]): N length list of info dicts
-            done_n ([bool]): N length list of whether agents are done
-
-        """
-        agent_actions = {}
-        for action, agent_id, already_done in zip(actions, self.agent_ids,
-                                                  self.already_done):
-            if already_done:
-                continue
-            agent_actions[self.agent_id_to_env_id_map[agent_id]] = action
-        next_obses, rew, done, info = self.env.step(agent_actions)
-        rew_n = []
-        done_n = []
-        info_n = []
-
-        for agent_id in self.agent_ids:
-            # first check that the agent_id ever had a corresponding vehicle
-            # and then check that there's actually an observation for it i.e. it's not done
-            if agent_id in self.agent_id_to_env_id_map.keys(
-            ) and self.agent_id_to_env_id_map[agent_id] in next_obses.keys():
-                map_key = self.agent_id_to_env_id_map[agent_id]
-                # since the environment may have just reset, we don't actually have
-                # reward objects yet
-                rew_n.append(rew.get(map_key, 0))
-                agent_info = info.get(map_key, {})
-                # track the per-agent reward for later logging
-                self.episode_rewards[agent_id] += rew.get(map_key, 0)
-                self.num_steps[agent_id] += 1
-                self.goal_achieved[agent_id] = self.goal_achieved[
-                    agent_id] or agent_info['goal_achieved']
-                self.collided[agent_id] = self.collided[
-                    agent_id] or agent_info['collided']
-                self.veh_edge_collided[agent_id] = self.veh_edge_collided[
-                    agent_id] or agent_info['veh_edge_collision']
-                self.veh_veh_collided[agent_id] = self.veh_veh_collided[
-                    agent_id] or agent_info['veh_veh_collision']
-            else:
-                rew_n.append(0)
-                agent_info = {}
-            if self.already_done[agent_id]:
-                agent_info['is_active'] = False
-            else:
-                agent_info['is_active'] = True
-            info_n.append(agent_info)
-        # now stick in some extra state information if needed
-        # anything in episode_extra_stats is logged at the end of the episode
-        if done['__all__']:
-            # log any extra info that you need
-            avg_rew = np.mean(self.episode_rewards[self.valid_indices])
-            avg_len = np.mean(self.num_steps[self.valid_indices])
-            avg_goal_achieved = np.mean(self.goal_achieved[self.valid_indices])
-            avg_collided = np.mean(self.collided[self.valid_indices])
-            avg_veh_edge_collided = np.mean(
-                self.veh_edge_collided[self.valid_indices])
-            avg_veh_veh_collided = np.mean(
-                self.veh_veh_collided[self.valid_indices])
-            for info in info_n:
-                info['episode_extra_stats'] = {}
-                info['episode_extra_stats']['avg_rew'] = avg_rew
-                info['episode_extra_stats']['avg_agent_len'] = avg_len
-                info['episode_extra_stats'][
-                    'goal_achieved'] = avg_goal_achieved
-                info['episode_extra_stats']['collided'] = avg_collided
-                info['episode_extra_stats'][
-                    'veh_edge_collision'] = avg_veh_edge_collided
-                info['episode_extra_stats'][
-                    'veh_veh_collision'] = avg_veh_veh_collided
-
-        # update the dones so we know if we need to reset
-        # sample factory does not call reset for you
-        for env_id, done_val in done.items():
-            # handle the __all__ signal that's just in there for
-            # telling when the environment should stop
-            if env_id == '__all__':
-                continue
-            if done_val:
-                agent_id = self.env_id_to_agent_id_map[env_id]
-                self.already_done[agent_id] = True
-
-        # okay, now if all the agents are done set done to True for all of them
-        # otherwise, False. Sample factory uses info['is_active'] to track if agents
-        # are done, not the done signal
-        # also, convert the obs_dict into the right format
-        if done['__all__']:
-            done_n = [True] * self.num_agents
-            obs_n = self.reset()
-        else:
-            done_n = [False] * self.num_agents
-            obs_n = self.obs_dict_to_list(next_obses)
-        return obs_n, rew_n, done_n, info_n
-
-    def obs_dict_to_list(self, obs_dict):
-        """Convert the dictionary returned by the environment into a fixed size list of arrays.
-
-        Args:
-            obs_dict ({agent id in environment: observation}): dict mapping ID to observation
-
-        Returns
-        -------
-            [np.array]: List of arrays ordered by which agent ID they correspond to.
-        """
-        obs_n = []
-        for agent_id in self.agent_ids:
-            # first check that the agent_id ever had a corresponding vehicle
-            # and then check that there's actually an observation for it i.e. it's not done
-            if agent_id in self.agent_id_to_env_id_map.keys(
-            ) and self.agent_id_to_env_id_map[agent_id] in obs_dict.keys():
-                map_key = self.agent_id_to_env_id_map[agent_id]
-                obs_n.append(obs_dict[map_key])
-            else:
-                obs_n.append(self.dead_feat)
-        return obs_n
-
-    def reset(self):
-        """Reset the environment.
-
-        Key things done here:
-        1) build a map between the agent IDs in the environment (which are not necessarily 0-N)
-           and the agent IDs for sample factory which are from 0 to the maximum number of agents
-        2) sample factory (until some bugs are fixed) requires a fixed number of agents. Some of these
-           agents will be dummy agents that do not act in the environment. So, here we build valid
-           indices which can be used to figure out which agent IDs correspond
-
-        Returns
-        -------
-            [np.array]: List of numpy arrays, one for each agent.
-        """
-        # track the agent_ids that actually take an action during the episode
-        self.valid_indices = []
-        self.episode_rewards = np.zeros(self.num_agents)
-        self.num_steps = np.zeros(self.num_agents)
-        self.goal_achieved = np.zeros(self.num_agents)
-        self.collided = np.zeros(self.num_agents)
-        self.veh_veh_collided = np.zeros(self.num_agents)
-        self.veh_edge_collided = np.zeros(self.num_agents)
-        self.already_done = [False for _ in self.agent_ids]
-        next_obses = self.env.reset()
-        env_keys = sorted(list(next_obses.keys()))
-        # agent ids is a list going from 0 to (num_agents - 1)
-        # however, the vehicle IDs might go from 0 to anything
-        # we want to initialize a mapping that is maintained through the episode and always
-        # uniquely convert the vehicle ID to an agent id
-        self.agent_id_to_env_id_map = {
-            agent_id: env_id
-            for agent_id, env_id in zip(self.agent_ids, env_keys)
-        }
-        self.env_id_to_agent_id_map = {
-            env_id: agent_id
-            for agent_id, env_id in zip(self.agent_ids, env_keys)
-        }
-        # if there isn't a mapping from an agent id to a vehicle id, that agent should be
-        # set to permanently inactive
-        for agent_id in self.agent_ids:
-            if agent_id not in self.agent_id_to_env_id_map.keys():
-                self.already_done[agent_id] = True
-            else:
-                # check that this isn't actually a fake padding agent used
-                # when keep_inactive_agents is True
-                if agent_id in self.agent_id_to_env_id_map.keys(
-                ) and self.agent_id_to_env_id_map[
-                        agent_id] not in self.env.dead_agent_ids:
-                    self.valid_indices.append(agent_id)
-        obs_n = self.obs_dict_to_list(next_obses)
-        return obs_n
-
-    @property
-    def observation_space(self):
-        """See superclass."""
-        return self.env.observation_space
-
-    @property
-    def action_space(self):
-        """See superclass."""
-        return self.env.action_space
-
-    def render(self, mode=None):
-        """See superclass."""
-        return self.env.render(mode)
-
-    def seed(self, seed=None):
-        """Pass the seed to the environment."""
-        self.env.seed(seed)
-
-    def __getattr__(self, name):
-        """Pass attributes directly through to the wrapped env. TODO(remove)."""
-        return getattr(self.env, name)
-
-
-class CustomEncoder(EncoderBase):
-    """Encoder for the input."""
-
-    def __init__(self, cfg, obs_space, timing):
-        super().__init__(cfg, timing)
-
-        obs_shape = get_obs_shape(obs_space)
-        assert len(obs_shape.obs) == 1
-
-        fc_encoder_layer = cfg.encoder_hidden_size
-        encoder_layers = [
-            nn.Linear(obs_shape.obs[0], fc_encoder_layer),
-            nonlinearity(cfg),
-            nn.Linear(fc_encoder_layer, fc_encoder_layer),
-            nonlinearity(cfg),
-        ]
-
-        self.mlp_head = nn.Sequential(*encoder_layers)
-        self.init_fc_blocks(fc_encoder_layer)
-
-    def forward(self, obs_dict):
-        """See superclass."""
-        x = self.mlp_head(obs_dict['obs'])
-        x = self.forward_fc_blocks(x)
-        return x
-
-
-def make_custom_multi_env_func(full_env_name, cfg, env_config=None):
-    """Return a wrapped base environment.
-
-    Args:
-        full_env_name (str): Unused.
-        cfg (dict): Dict needed to configure the environment.
-        env_config (dict, optional): Deprecated. Will be removed from SampleFactory later.
-
-    Returns
-    -------
-        SampleFactoryEnv: Wrapped environment.
-    """
-    env = create_env(cfg)
-    return SampleFactoryEnv(env)
-
-
-def register_custom_components():
-    """Register needed constructors for custom environments."""
-    global_env_registry().register_env(
-        env_name_prefix='my_custom_multi_env_',
-        make_env_func=make_custom_multi_env_func,
-        override_default_params_func=override_default_params_func,
-    )
-    register_custom_encoder('custom_env_encoder', CustomEncoder)
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def main(cfg):
-    """Script entry point."""
-    register_custom_components()
-    # cfg = parse_args()
-    # TODO(ev) hacky renaming and restructuring, better to do this cleanly
-    cfg_dict = OmegaConf.to_container(cfg, resolve=True)
-    # copy algo keys into the main keys
-    for key, value in cfg_dict['algorithm'].items():
-        cfg_dict[key] = value
-    # we didn't set a train directory so use the hydra one
-    if cfg_dict['train_dir'] is None:
-        cfg_dict['train_dir'] = os.getcwd()
-        print(f'storing the results in {os.getcwd()}')
-    else:
-        output_dir = cfg_dict['train_dir']
-        print(f'storing results in {output_dir}')
-
-    # recommendation from Aleksei to keep horizon length fixed
-    # and number of agents fixed and just pad missing / exited
-    # agents with a vector of -1s
-    cfg_dict['subscriber']['keep_inactive_agents'] = True
-
-    # put it into a namespace so sample factory code runs correctly
-    class Bunch(object):
-
-        def __init__(self, adict):
-            self.__dict__.update(adict)
-
-    cfg = Bunch(cfg_dict)
-    status = run_algorithm(cfg)
-    return status
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/examples/sample_factory_files/success_by_veh_number b/examples/sample_factory_files/success_by_veh_number
deleted file mode 100644
index 3aa29c4e..00000000
Binary files a/examples/sample_factory_files/success_by_veh_number and /dev/null differ
diff --git a/examples/sample_factory_files/visualize_sample_factory.py b/examples/sample_factory_files/visualize_sample_factory.py
deleted file mode 100644
index a9676b0f..00000000
--- a/examples/sample_factory_files/visualize_sample_factory.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Use to create movies of trained policies."""
-import argparse
-from collections import deque
-import json
-import sys
-import time
-import os
-
-import imageio
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-
-from sample_factory.algorithms.appo.actor_worker import transform_dict_observations
-from sample_factory.algorithms.appo.learner import LearnerWorker
-from sample_factory.algorithms.appo.model import create_actor_critic
-from sample_factory.algorithms.appo.model_utils import get_hidden_size
-from sample_factory.algorithms.utils.action_distributions import ContinuousActionDistribution, \
-     CategoricalActionDistribution
-from sample_factory.algorithms.utils.arguments import load_from_checkpoint
-from sample_factory.algorithms.utils.multi_agent_wrapper import MultiAgentWrapper, is_multiagent_env
-from sample_factory.envs.create_env import create_env
-from sample_factory.utils.utils import log, AttrDict
-
-from run_sample_factory import register_custom_components
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, PROJECT_PATH, set_display_window  # noqa: F401
-
-
-def run_eval(cfg_dict, max_num_frames=1e9):
-    """Run evaluation over a single file. Exits when one episode finishes.
-
-    Args:
-        cfg (dict): configuration file for instantiating the agents and environment.
-        max_num_frames (int, optional): Deprecated. Should be removed.
-
-    Returns
-    -------
-        None: None
-
-    """
-    cfg = load_from_checkpoint(cfg_dict)
-
-    render_action_repeat = cfg.render_action_repeat if cfg.render_action_repeat is not None else cfg.env_frameskip
-    if render_action_repeat is None:
-        log.warning('Not using action repeat!')
-        render_action_repeat = 1
-    log.debug('Using action repeat %d during evaluation', render_action_repeat)
-
-    cfg.env_frameskip = 1  # for evaluation
-    cfg.num_envs = 1
-    cfg.seed = np.random.randint(10000)
-    cfg.scenario_path = cfg_dict.scenario_path
-
-    def make_env_func(env_config):
-        return create_env(cfg.env, cfg=cfg, env_config=env_config)
-
-    env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0}))
-
-    is_multiagent = is_multiagent_env(env)
-    if not is_multiagent:
-        env = MultiAgentWrapper(env)
-
-    if hasattr(env.unwrapped, 'reset_on_init'):
-        # reset call ruins the demo recording for VizDoom
-        env.unwrapped.reset_on_init = False
-
-    actor_critic = create_actor_critic(cfg, env.observation_space,
-                                       env.action_space)
-
-    device = torch.device('cpu' if cfg.device == 'cpu' else 'cuda')
-    actor_critic.model_to_device(device)
-
-    policy_id = cfg.policy_index
-    checkpoints = LearnerWorker.get_checkpoints(
-        LearnerWorker.checkpoint_dir(cfg, policy_id))
-    checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device)
-    actor_critic.load_state_dict(checkpoint_dict['model'])
-
-    episode_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
-    true_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
-    num_frames = 0
-
-    last_render_start = time.time()
-
-    def max_frames_reached(frames):
-        return max_num_frames is not None and frames > max_num_frames
-
-    obs = env.reset()
-    print(os.path.join(env.cfg['scenario_path'], env.unwrapped.file))
-    rnn_states = torch.zeros(
-        [env.num_agents, get_hidden_size(cfg)],
-        dtype=torch.float32,
-        device=device)
-    episode_reward = np.zeros(env.num_agents)
-    finished_episode = [False] * env.num_agents
-
-    if not cfg.no_render:
-        fig = plt.figure()
-        frames = []
-        ego_frames = []
-        feature_frames = []
-
-    with torch.no_grad():
-        while not max_frames_reached(num_frames):
-            obs_torch = AttrDict(transform_dict_observations(obs))
-            for key, x in obs_torch.items():
-                obs_torch[key] = torch.from_numpy(x).to(device).float()
-
-            policy_outputs = actor_critic(obs_torch,
-                                          rnn_states,
-                                          with_action_distribution=True)
-
-            # sample actions from the distribution by default
-            actions = policy_outputs.actions
-
-            action_distribution = policy_outputs.action_distribution
-            if isinstance(action_distribution, ContinuousActionDistribution):
-                if not cfg.continuous_actions_sample:  # TODO: add similar option for discrete actions
-                    actions = action_distribution.means
-            if isinstance(action_distribution, CategoricalActionDistribution):
-                if not cfg.discrete_actions_sample:
-                    actions = policy_outputs['action_logits'].argmax(axis=1)
-
-            actions = actions.cpu().numpy()
-
-            rnn_states = policy_outputs.rnn_states
-
-            for _ in range(render_action_repeat):
-                if not cfg.no_render:
-                    target_delay = 1.0 / cfg.fps if cfg.fps > 0 else 0
-                    current_delay = time.time() - last_render_start
-                    time_wait = target_delay - current_delay
-
-                    if time_wait > 0:
-                        # log.info('Wait time %.3f', time_wait)
-                        time.sleep(time_wait)
-
-                    last_render_start = time.time()
-                    img = env.render()
-                    frames.append(img)
-                    ego_img = env.render_ego()
-                    if ego_img is not None:
-                        ego_frames.append(ego_img)
-                    feature_img = env.render_features()
-                    if feature_img is not None:
-                        feature_frames.append(feature_img)
-
-                obs, rew, done, infos = env.step(actions)
-
-                episode_reward += rew
-                num_frames += 1
-
-                for agent_i, done_flag in enumerate(done):
-                    if done_flag:
-                        finished_episode[agent_i] = True
-                        episode_rewards[agent_i].append(
-                            episode_reward[agent_i])
-                        true_rewards[agent_i].append(infos[agent_i].get(
-                            'true_reward', episode_reward[agent_i]))
-                        log.info(
-                            'Episode finished for agent %d at %d frames. Reward: %.3f, true_reward: %.3f',
-                            agent_i, num_frames, episode_reward[agent_i],
-                            true_rewards[agent_i][-1])
-                        rnn_states[agent_i] = torch.zeros(
-                            [get_hidden_size(cfg)],
-                            dtype=torch.float32,
-                            device=device)
-                        episode_reward[agent_i] = 0
-
-                # if episode terminated synchronously for all agents, pause a bit before starting a new one
-                if all(done):
-                    if not cfg.no_render:
-                        imageio.mimsave(os.path.join(PROJECT_PATH,
-                                                     'animation.mp4'),
-                                        np.array(frames),
-                                        fps=30)
-                        plt.close(fig)
-                        imageio.mimsave(os.path.join(PROJECT_PATH,
-                                                     'animation_ego.mp4'),
-                                        np.array(ego_frames),
-                                        fps=30)
-                        plt.close(fig)
-                        imageio.mimsave(os.path.join(PROJECT_PATH,
-                                                     'animation_feature.mp4'),
-                                        np.array(feature_frames),
-                                        fps=30)
-                        plt.close(fig)
-                    if not cfg.no_render:
-                        env.render()
-                    time.sleep(0.05)
-
-                if all(finished_episode):
-                    finished_episode = [False] * env.num_agents
-                    avg_episode_rewards_str, avg_true_reward_str = '', ''
-                    for agent_i in range(env.num_agents):
-                        avg_rew = np.mean(episode_rewards[agent_i])
-                        avg_true_rew = np.mean(true_rewards[agent_i])
-                        if not np.isnan(avg_rew):
-                            if avg_episode_rewards_str:
-                                avg_episode_rewards_str += ', '
-                            avg_episode_rewards_str += f'#{agent_i}: {avg_rew:.3f}'
-                        if not np.isnan(avg_true_rew):
-                            if avg_true_reward_str:
-                                avg_true_reward_str += ', '
-                            avg_true_reward_str += f'#{agent_i}: {avg_true_rew:.3f}'
-                    avg_goal = infos[0]['episode_extra_stats']['goal_achieved']
-                    avg_collisions = infos[0]['episode_extra_stats'][
-                        'collided']
-                    log.info(f'Avg goal achieved, {avg_goal}')
-                    log.info(f'Avg num collisions, {avg_collisions}')
-                    log.info('Avg episode rewards: %s, true rewards: %s',
-                             avg_episode_rewards_str, avg_true_reward_str)
-                    log.info(
-                        'Avg episode reward: %.3f, avg true_reward: %.3f',
-                        np.mean([
-                            np.mean(episode_rewards[i])
-                            for i in range(env.num_agents)
-                        ]),
-                        np.mean([
-                            np.mean(true_rewards[i])
-                            for i in range(env.num_agents)
-                        ]))
-                    return avg_goal
-    env.close()
-
-
-def main():
-    """Script entry point."""
-    set_display_window()
-    register_custom_components()
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument('cfg_path', type=str)
-    args = parser.parse_args()
-
-    file_path = os.path.join(args.cfg_path, 'cfg.json')
-    with open(file_path, 'r') as file:
-        cfg_dict = json.load(file)
-
-    cfg_dict['cli_args'] = {}
-    cfg_dict['fps'] = 0
-    cfg_dict['render_action_repeat'] = None
-    cfg_dict['no_render'] = False
-    cfg_dict['policy_index'] = 0
-    cfg_dict['record_to'] = os.path.join(os.getcwd(), '..', 'recs')
-    cfg_dict['continuous_actions_sample'] = True
-    cfg_dict['discrete_actions_sample'] = False
-    cfg_dict['remove_at_collide'] = True
-    cfg_dict['remove_at_goal'] = True
-    cfg_dict['scenario_path'] = PROCESSED_VALID_NO_TL
-
-    class Bunch(object):
-
-        def __init__(self, adict):
-            self.__dict__.update(adict)
-
-    cfg = Bunch(cfg_dict)
-    avg_goals = []
-    for _ in range(1):
-        avg_goal = run_eval(cfg)
-        avg_goals.append(avg_goal)
-    print(avg_goals)
-    print('the total average goal achieved is {}'.format(np.mean(avg_goals)))
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/nocturne/__init__.py b/nocturne/__init__.py
index 9aeaf2bd..c4db0c62 100644
--- a/nocturne/__init__.py
+++ b/nocturne/__init__.py
@@ -22,10 +22,3 @@
     "Cyclist",
     "envs",
 ]
-
-import os
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, PROJECT_PATH
-
-os.environ["PROCESSED_TRAIN_NO_TL"] = str(PROCESSED_TRAIN_NO_TL)
-os.environ["PROCESSED_VALID_NO_TL"] = str(PROCESSED_VALID_NO_TL)
-os.environ["NOCTURNE_LOG_DIR"] = str(os.path.join(PROJECT_PATH, 'logs'))
diff --git a/nocturne/envs/base_env.py b/nocturne/envs/base_env.py
index 85ec0684..22dbbe80 100644
--- a/nocturne/envs/base_env.py
+++ b/nocturne/envs/base_env.py
@@ -2,339 +2,315 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-"""Default environment for Nocturne."""
-from typing import Any, Dict, Sequence, Union
+"""Default Nocturne env with minor adaptations."""
 
-from collections import defaultdict, deque
-from itertools import islice
 import json
-import os
+import logging
+from collections import defaultdict, deque
+from enum import Enum
+from itertools import islice, product
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple, TypeVar, Union
 
-from gym import Env
-from gym.spaces import Box, Discrete
 import numpy as np
 import torch
+import yaml
+from box import Box as ConfigBox
+from gym import Env
+from gym.spaces import Box, Discrete
+
+from nocturne import Action, Simulation, Vector2D, Vehicle
+
+_MAX_NUM_TRIES_TO_FIND_VALID_VEHICLE = 1_000
+
+logging.getLogger(__name__)
+
+ActType = TypeVar("ActType")  # pylint: disable=invalid-name
+ObsType = TypeVar("ObsType")  # pylint: disable=invalid-name
+RenderType = TypeVar("RenderType")  # pylint: disable=invalid-name
+
 
-from cfgs.config import ERR_VAL as INVALID_POSITION, get_scenario_dict
-from nocturne import Action, Simulation
+class CollisionType(Enum):
+    """Enum for collision types."""
 
+    NONE = 0
+    VEHICLE_VEHICLE = 1
+    VEHICLE_EDGE = 2
 
-class BaseEnv(Env):
-    """Default environment for Nocturne."""
 
-    def __init__(self, cfg: Dict[str, Any], rank: int = 0) -> None:
-        """Initialize the environment.
+class BaseEnv(Env):  # pylint: disable=too-many-instance-attributes
+    """Nocturne base Gym environment."""
+
+    def __init__(  # pylint: disable=too-many-arguments
+        self,
+        config: Dict[str, Any],
+        *,
+        img_width=1600,
+        img_height=1600,
+        draw_target_positions=True,
+        padding=50.0,
+    ) -> None:
+        """Initialize a Nocturne environment.
 
         Args
         ----
-            cfg (dict): configuration file describing the experiment
-            rank (int, optional): [description]. Defaults to 0.
+            config (dict): configuration file for the environment.
+
+        Optional Args
+        -------------
+            img_width (int): width of the image to render.
+            img_height (int): height of the image to render.
+            draw_target_positions (bool): whether to draw the target positions.
+            padding (float): padding to add to the image.
         """
         super().__init__()
-        self.cfg = cfg
-        with open(os.path.join(cfg['scenario_path'],
-                               'valid_files.json')) as file:
+        self.config = ConfigBox(config)
+        self.config.data_path = Path(self.config.data_path)
+        self._render_settings = {
+            "img_width": img_width,
+            "img_height": img_height,
+            "draw_target_positions": draw_target_positions,
+            "padding": padding,
+        }
+
+        self.seed(self.config.seed)
+
+        # Load the list of valid files
+        with open(self.config.data_path / "valid_files.json", encoding="utf-8") as file:
             self.valid_veh_dict = json.load(file)
-            self.files = list(self.valid_veh_dict.keys())
-            # sort the files so that we have a consistent order
-            self.files = sorted(self.files)
-        if cfg['num_files'] != -1:
-            self.files = self.files[0:cfg['num_files']]
-        self.file = self.files[np.random.randint(len(self.files))]
-        self.simulation = Simulation(os.path.join(cfg['scenario_path'],
-                                                  self.file),
-                                     config=get_scenario_dict(cfg))
-
-        self.scenario = self.simulation.getScenario()
-        self.controlled_vehicles = self.scenario.getObjectsThatMoved()
-        self.cfg = cfg
-        self.n_frames_stacked = self.cfg['subscriber'].get(
-            'n_frames_stacked', 1)
-        if self.n_frames_stacked > 1:
-            print(
-                'WARNING: you are frame stacking and may want to turn off recurrence if it is enabled\
-                  in your agent as frame-stacking may not be needed when using recurrent policies.'
-            )
-        self.single_agent_mode = cfg['single_agent_mode']
-        self.seed(cfg['seed'])
-        self.episode_length = cfg['episode_length']
-        self.t = 0
-        self.step_num = 0
-        self.rank = rank
-        self.seed(cfg['seed'])
+            files = sorted(list(self.valid_veh_dict.keys()))
+            if self.config.num_files != -1:
+                self.files = files[: self.config.num_files]
+        if len(self.files) == 0:
+            raise ValueError("Data path does not contain scenes.")
+
         obs_dict = self.reset()
-        self.observation_space = Box(low=-np.infty,
-                                     high=np.infty,
-                                     shape=(obs_dict[list(
-                                         obs_dict.keys())[0]].shape[0], ))
-        if self.cfg['discretize_actions']:
-            self.accel_discretization = self.cfg['accel_discretization']
-            self.steering_discretization = self.cfg['steering_discretization']
-            self.head_angle_discretization = self.cfg[
-                'head_angle_discretization']
-            self.action_space = Discrete(self.accel_discretization *
-                                         self.steering_discretization *
-                                         self.head_angle_discretization)
-            self.accel_grid = np.linspace(
-                -np.abs(self.cfg['accel_lower_bound']),
-                self.cfg['accel_upper_bound'], self.accel_discretization)
-            self.steering_grid = np.linspace(
-                -np.abs(self.cfg['steering_lower_bound']),
-                self.cfg['steering_upper_bound'], self.steering_discretization)
-            self.head_angle_grid = np.linspace(
-                -np.abs(self.cfg['head_angle_lower_bound']),
-                self.cfg['head_angle_upper_bound'],
-                self.head_angle_discretization)
-            # compute the indexing only once
-            self.idx_to_actions = {}
-            i = 0
-            for accel in self.accel_grid:
-                for steer in self.steering_grid:
-                    for head_angle in self.head_angle_grid:
-                        self.idx_to_actions[i] = [accel, steer, head_angle]
-                        i += 1
+
+        # Set observation space
+        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(obs_dict[list(obs_dict.keys())[0]].shape[0],))
+
+        # Set action space
+        if self.config.discretize_actions:
+            self._set_discrete_action_space()
         else:
-            self.action_space = Box(
-                low=-np.array([
-                    np.abs(self.cfg['accel_lower_bound']),
-                    self.cfg['steering_lower_bound'],
-                    self.cfg['head_angle_lower_bound']
-                ]),
-                high=np.array([
-                    np.abs(self.cfg['accel_upper_bound']),
-                    self.cfg['steering_upper_bound'],
-                    self.cfg['head_angle_upper_bound']
-                ]),
-            )
+            self._set_continuous_action_space()
 
-    def apply_actions(
-        self, action_dict: Dict[int, Union[Action, np.ndarray, Sequence[float],
-                                           int]]
-    ) -> None:
-        """Apply a dict of actions to the vehicle objects."""
+    def apply_actions(self, action_dict: Dict[int, ActType]) -> None:
+        """Apply a dict of actions to the vehicle objects.
+
+        Args
+        ----
+            action_dict (Dict[int, ActType]): Dictionary of actions to apply to the vehicles.
+        """
         for veh_obj in self.scenario.getObjectsThatMoved():
             action = action_dict.get(veh_obj.id, None)
             if action is None:
                 continue
+            _apply_action_to_vehicle(veh_obj, action, idx_to_actions=self.idx_to_actions)
 
-            # TODO: Make this a util function.
-            if isinstance(action, Action):
-                veh_obj.apply_action(action)
-            elif isinstance(action, np.ndarray):
-                veh_obj.apply_action(Action.from_numpy(action))
-            elif isinstance(action, (tuple, list)):
-                veh_obj.acceleration = action[0]
-                veh_obj.steering = action[1]
-                veh_obj.head_angle = action[2]
-            else:
-                accel, steer, head_angle = self.idx_to_actions[action]
-                veh_obj.acceleration = accel
-                veh_obj.steering = steer
-                veh_obj.head_angle = head_angle
-
-    def step(
-        self, action_dict: Dict[int, Union[Action, np.ndarray, Sequence[float],
-                                           int]]
-    ) -> None:
-        """See superclass."""
+    def step(  # pylint: disable=arguments-renamed,too-many-locals,too-many-branches,too-many-statements
+        self, action_dict: Dict[int, ActType]
+    ) -> Tuple[Dict[int, ObsType], Dict[int, float], Dict[int, bool], Dict[int, Dict[str, Union[bool, str]]]]:
+        """Run one timestep of the environment's dynamics.
+
+        Args
+        ----
+            action_dict (Dict[int, ActType]): Dictionary of actions to apply to the vehicles.
+
+        Raises
+        ------
+            ValueError: If the action is not of a supported type or if the vehicle collision type is unknown.
+
+
+        Returns
+        -------
+            Dict[int, ObsType]: Dictionary with observation for each vehicle.
+            Dict[int, float]: Dictionary with reward for each vehicle.
+            Dict[int, bool]: Dictionary with done flag for each vehicle.
+            Dict[int, Dict[str, Union[bool, str]]]]: Dictionary with info for each vehicle.
+        """
         obs_dict = {}
         rew_dict = {}
         done_dict = {}
         info_dict = defaultdict(dict)
-        rew_cfg = self.cfg['rew_cfg']
+
+        rew_cfg = self.config.rew_cfg
+
         self.apply_actions(action_dict)
-        self.simulation.step(self.cfg['dt'])
-        self.t += self.cfg['dt']
+        self.simulation.step(self.config.dt)
+        self.t += self.config.dt
         self.step_num += 1
-        objs_to_remove = []
+
         for veh_obj in self.controlled_vehicles:
             veh_id = veh_obj.getID()
             if veh_id in self.done_ids:
                 continue
             self.context_dict[veh_id].append(self.get_observation(veh_obj))
-            if self.n_frames_stacked > 1:
+            if self.config.subscriber.n_frames_stacked > 1:
                 veh_deque = self.context_dict[veh_id]
                 context_list = list(
-                    islice(veh_deque,
-                           len(veh_deque) - self.n_frames_stacked,
-                           len(veh_deque)))
+                    islice(
+                        veh_deque,
+                        len(veh_deque) - self.config.subscriber.n_frames_stacked,
+                        len(veh_deque),
+                    )
+                )
                 obs_dict[veh_id] = np.concatenate(context_list)
             else:
                 obs_dict[veh_id] = self.context_dict[veh_id][-1]
             rew_dict[veh_id] = 0
             done_dict[veh_id] = False
-            info_dict[veh_id]['goal_achieved'] = False
-            info_dict[veh_id]['collided'] = False
-            info_dict[veh_id]['veh_veh_collision'] = False
-            info_dict[veh_id]['veh_edge_collision'] = False
+            info_dict[veh_id]["goal_achieved"] = False
+            info_dict[veh_id]["collided"] = False
+            info_dict[veh_id]["veh_veh_collision"] = False
+            info_dict[veh_id]["veh_edge_collision"] = False
             obj_pos = veh_obj.position
             goal_pos = veh_obj.target_position
-            '''############################################
-                            Compute rewards
-               ############################################'''
+            ############################################
+            #   Compute rewards
+            ############################################
             position_target_achieved = True
             speed_target_achieved = True
             heading_target_achieved = True
-            if rew_cfg['position_target']:
-                position_target_achieved = (
-                    goal_pos -
-                    obj_pos).norm() < rew_cfg['position_target_tolerance']
-            if rew_cfg['speed_target']:
-                speed_target_achieved = np.abs(
-                    veh_obj.speed -
-                    veh_obj.target_speed) < rew_cfg['speed_target_tolerance']
-            if rew_cfg['heading_target']:
-                heading_target_achieved = np.abs(
-                    self.angle_sub(veh_obj.heading, veh_obj.target_heading)
-                ) < rew_cfg['heading_target_tolerance']
+            if rew_cfg.position_target:
+                position_target_achieved = (goal_pos - obj_pos).norm() < rew_cfg.position_target_tolerance
+            if rew_cfg.speed_target:
+                speed_target_achieved = np.abs(veh_obj.speed - veh_obj.target_speed) < rew_cfg.speed_target_tolerance
+            if rew_cfg.heading_target:
+                heading_target_achieved = (
+                    np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) < rew_cfg.heading_target_tolerance
+                )
             if position_target_achieved and speed_target_achieved and heading_target_achieved:
-                info_dict[veh_id]['goal_achieved'] = True
-                rew_dict[veh_id] += rew_cfg['goal_achieved_bonus'] / rew_cfg[
-                    'reward_scaling']
-            if rew_cfg['shaped_goal_distance'] and rew_cfg['position_target']:
+                info_dict[veh_id]["goal_achieved"] = True
+                rew_dict[veh_id] += rew_cfg.goal_achieved_bonus / rew_cfg.reward_scaling
+            if rew_cfg.shaped_goal_distance and rew_cfg.position_target:
                 # penalize the agent for its distance from goal
-                # we scale by goal_dist_normalizers to ensure that this value is always less than the penalty for
-                # collision
-                if rew_cfg['goal_distance_penalty']:
-                    rew_dict[veh_id] -= rew_cfg.get(
-                        'shaped_goal_distance_scaling', 1.0) * (
-                            (goal_pos - obj_pos).norm() /
-                            self.goal_dist_normalizers[veh_id]
-                        ) / rew_cfg['reward_scaling']
+                # we scale by goal_dist_normalizers to ensure that this value is always
+                # less than the penalty for collision
+                if rew_cfg.goal_distance_penalty:
+                    rew_dict[veh_id] -= (
+                        rew_cfg.shaped_goal_distance_scaling
+                        * ((goal_pos - obj_pos).norm() / self.goal_dist_normalizers[veh_id])
+                        / rew_cfg.reward_scaling
+                    )
                 else:
                     # the minus one is to ensure that it's not beneficial to collide
                     # we divide by goal_achieved_bonus / episode_length to ensure that
-                    # acquiring the maximum "get-close-to-goal" reward at every time-step is
-                    # always less than just acquiring the goal reward once
-                    # we also assume that vehicles are never more than 400 meters from their goal
-                    # which makes sense as the episodes are 9 seconds long i.e. we'd have to go more than
-                    # 40 m/s to get there
-                    rew_dict[veh_id] += rew_cfg.get(
-                        'shaped_goal_distance_scaling',
-                        1.0) * (1 - (goal_pos - obj_pos).norm() /
-                                self.goal_dist_normalizers[veh_id]
-                                ) / rew_cfg['reward_scaling']
+                    # acquiring the maximum "get-close-to-goal" reward at every
+                    # time-step is always less than just acquiring the goal reward once
+                    rew_dict[veh_id] += (
+                        rew_cfg.shaped_goal_distance_scaling
+                        * (1 - (goal_pos - obj_pos).norm() / self.goal_dist_normalizers[veh_id])
+                        / rew_cfg.reward_scaling
+                    )
                 # repeat the same thing for speed and heading
-                if rew_cfg['shaped_goal_distance'] and rew_cfg['speed_target']:
-                    if rew_cfg['goal_distance_penalty']:
-                        rew_dict[veh_id] -= rew_cfg.get(
-                            'shaped_goal_distance_scaling', 1.0) * (
-                                np.abs(veh_obj.speed - veh_obj.target_speed) /
-                                40.0) / rew_cfg['reward_scaling']
+                if rew_cfg.shaped_goal_distance and rew_cfg.speed_target:
+                    if rew_cfg.goal_distance_penalty:
+                        rew_dict[veh_id] -= (
+                            rew_cfg.shaped_goal_distance_scaling
+                            * (np.abs(veh_obj.speed - veh_obj.target_speed) / rew_cfg.goal_speed_scaling)
+                            / rew_cfg.reward_scaling
+                        )
                     else:
-                        rew_dict[veh_id] += rew_cfg.get(
-                            'shaped_goal_distance_scaling', 1.0
-                        ) * (1 - np.abs(veh_obj.speed - veh_obj.target_speed) /
-                             40.0) / rew_cfg['reward_scaling']
-                if rew_cfg['shaped_goal_distance'] and rew_cfg[
-                        'heading_target']:
-                    if rew_cfg['goal_distance_penalty']:
-                        rew_dict[veh_id] -= rew_cfg.get(
-                            'shaped_goal_distance_scaling',
-                            1.0) * (np.abs(
-                                self.angle_sub(veh_obj.heading,
-                                               veh_obj.target_heading)) /
-                                    (2 * np.pi)) / rew_cfg['reward_scaling']
+                        rew_dict[veh_id] += (
+                            rew_cfg.shaped_goal_distance_scaling
+                            * (1 - np.abs(veh_obj.speed - veh_obj.target_speed) / rew_cfg.goal_speed_scaling)
+                            / rew_cfg.reward_scaling
+                        )
+                if rew_cfg.shaped_goal_distance and rew_cfg.heading_target:
+                    if rew_cfg.goal_distance_penalty:
+                        rew_dict[veh_id] -= (
+                            rew_cfg.shaped_goal_distance_scaling
+                            * (np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) / (2 * np.pi))
+                            / rew_cfg.reward_scaling
+                        )
                     else:
-                        rew_dict[veh_id] += rew_cfg.get(
-                            'shaped_goal_distance_scaling',
-                            1.0) * (1 - np.abs(
-                                self.angle_sub(veh_obj.heading,
-                                               veh_obj.target_heading)) /
-                                    (2 * np.pi)) / rew_cfg['reward_scaling']
-            '''############################################
-                    Handle potential done conditions
-            ############################################'''
+                        rew_dict[veh_id] += (
+                            rew_cfg.shaped_goal_distance_scaling
+                            * (1 - np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) / (2 * np.pi))
+                            / rew_cfg.reward_scaling
+                        )
+            ############################################
+            #   Handle potential done conditions
+            ############################################
             # achieved our goal
-            if info_dict[veh_id]['goal_achieved'] and self.cfg.get(
-                    'remove_at_goal', True):
+            if info_dict[veh_id]["goal_achieved"] and self.config.get("remove_at_goal", True):
                 done_dict[veh_id] = True
             if veh_obj.getCollided():
-                info_dict[veh_id]['collided'] = True
-                if int(veh_obj.collision_type) == 1:
-                    info_dict[veh_id]['veh_veh_collision'] = True
-                if int(veh_obj.collision_type) == 2:
-                    info_dict[veh_id]['veh_edge_collision'] = True
-                rew_dict[veh_id] -= np.abs(
-                    rew_cfg['collision_penalty']) / rew_cfg['reward_scaling']
-                if self.cfg.get('remove_at_collide', True):
+                info_dict[veh_id]["collided"] = True
+                if int(veh_obj.collision_type) == CollisionType.VEHICLE_VEHICLE.value:
+                    info_dict[veh_id]["veh_veh_collision"] = True
+                elif int(veh_obj.collision_type) == CollisionType.VEHICLE_EDGE.value:
+                    info_dict[veh_id]["veh_edge_collision"] = True
+                elif int(veh_obj.collision_type) != CollisionType.NONE.value:
+                    raise ValueError(f"Unknown collision type: {veh_obj.collision_type}.")
+                rew_dict[veh_id] -= np.abs(rew_cfg.collision_penalty) / rew_cfg.reward_scaling
+                if self.config.get("remove_at_collide", True):
                     done_dict[veh_id] = True
-            # remove the vehicle so that its trajectory doesn't continue. This is important
-            # in the multi-agent setting.
+            # remove the vehicle so that its trajectory doesn't continue. This is
+            # important in the multi-agent setting.
             if done_dict[veh_id]:
                 self.done_ids.append(veh_id)
-                if (info_dict[veh_id]['goal_achieved']
-                        and self.cfg.get('remove_at_goal', True)) or (
-                            info_dict[veh_id]['collided']
-                            and self.cfg.get('remove_at_collide', True)):
-                    objs_to_remove.append(veh_obj)
-
-        for veh_obj in objs_to_remove:
-            self.scenario.removeVehicle(veh_obj)
-
-        if self.cfg['rew_cfg']['shared_reward']:
-            total_reward = np.sum([rew_dict[key] for key in rew_dict.keys()])
-            rew_dict = {key: total_reward for key in rew_dict.keys()}
-
-        # fill in the missing observations if we should be doing so
-        if self.cfg['subscriber']['keep_inactive_agents']:
-            # force all vehicles done to be false since they should persist through the episode
-            done_dict = {key: False for key in self.all_vehicle_ids}
-            for key in self.all_vehicle_ids:
-                if key not in obs_dict.keys():
-                    obs_dict[key] = self.dead_feat
-                    rew_dict[key] = 0.0
-                    info_dict[key]['goal_achieved'] = False
-                    info_dict[key]['collided'] = False
-                    info_dict[key]['veh_veh_collision'] = False
-                    info_dict[key]['veh_edge_collision'] = False
-
-        if self.step_num >= self.episode_length:
-            done_dict = {key: True for key in done_dict.keys()}
-
-        all_done = True
-        for value in done_dict.values():
-            all_done *= value
-        done_dict['__all__'] = all_done
+                if (info_dict[veh_id]["goal_achieved"] and self.config.get("remove_at_goal", True)) or (
+                    info_dict[veh_id]["collided"] and self.config.get("remove_at_collide", True)
+                ):
+                    self.scenario.removeVehicle(veh_obj)
+
+        if self.config.rew_cfg.shared_reward:
+            total_reward = np.sum(rew_dict.values())
+            rew_dict = {key: total_reward for key in rew_dict}
+
+        if self.step_num >= self.config.episode_length:
+            done_dict = {key: True for key in done_dict}
+
+        done_dict["__all__"] = all(done_dict.values())
 
         return obs_dict, rew_dict, done_dict, info_dict
 
-    def reset(self):
-        """See superclass."""
+    def reset(  # pylint: disable=arguments-differ,too-many-locals,too-many-branches,too-many-statements
+        self,
+    ) -> Dict[int, ObsType]:
+        """Reset the environment.
+
+        Returns
+        -------
+            Dict[int, ObsType]: Dictionary of observations for each vehicle.
+        """
         self.t = 0
         self.step_num = 0
 
-        enough_vehicles = False
         # we don't want to initialize scenes with 0 actors after satisfying
         # all the conditions on a scene that we have
-        while not enough_vehicles:
-            self.file = self.files[np.random.randint(len(self.files))]
-            self.simulation = Simulation(os.path.join(
-                self.cfg['scenario_path'], self.file),
-                                         config=get_scenario_dict(self.cfg))
+        for _ in range(_MAX_NUM_TRIES_TO_FIND_VALID_VEHICLE):
+            self.file = np.random.choice(self.files)
+            self.simulation = Simulation(str(self.config.data_path / self.file), config=self.config.scenario)
             self.scenario = self.simulation.getScenario()
-            '''##################################################################
-                Construct context dictionary of observations that can be used to
-                warm up policies by stepping all vehicles as experts.
-            #####################################################################'''
-            dead_obs = self.get_observation(self.scenario.getVehicles()[0])
-            self.dead_feat = -np.ones(
-                dead_obs.shape[0] * self.n_frames_stacked)
-            # step all the vehicles forward by one second and record their observations as context
-            context_len = max(10, self.n_frames_stacked)
+
+            #####################################################################
+            #   Construct context dictionary of observations that can be used to
+            #   warm up policies by stepping all vehicles as experts.
+            #####################################################################
+            dead_feat = -np.ones(
+                self.get_observation(self.scenario.getVehicles()[0]).shape[0] * self.config.subscriber.n_frames_stacked
+            )
+            # step all the vehicles forward by one second and record their observations
+            # as context
+            self.config.scenario.context_length = max(
+                self.config.scenario.context_length, self.config.subscriber.n_frames_stacked
+            )  # Note: Consider raising an error if context_length < n_frames_stacked.
             self.context_dict = {
-                veh.getID():
-                deque([self.dead_feat for _ in range(context_len)],
-                      maxlen=context_len)
+                veh.getID(): deque(
+                    [dead_feat for _ in range(self.config.scenario.context_length)],
+                    maxlen=self.config.scenario.context_length,
+                )
                 for veh in self.scenario.getObjectsThatMoved()
             }
             for veh in self.scenario.getObjectsThatMoved():
                 veh.expert_control = True
-            for _ in range(10):
+            for _ in range(self.config.scenario.context_length):
                 for veh in self.scenario.getObjectsThatMoved():
-                    self.context_dict[veh.getID()].append(
-                        self.get_observation(veh))
-                self.simulation.step(self.cfg['dt'])
+                    self.context_dict[veh.getID()].append(self.get_observation(veh))
+                self.simulation.step(self.config.dt)
             # now hand back control to our actual controllers
             for veh in self.scenario.getObjectsThatMoved():
                 veh.expert_control = False
@@ -342,99 +318,75 @@ def reset(self):
             # remove all the objects that are in collision or are already in goal dist
             # additionally set the objects that have infeasible goals to be experts
             for veh_obj in self.simulation.getScenario().getObjectsThatMoved():
-                obj_pos = veh_obj.getPosition()
-                obj_pos = np.array([obj_pos.x, obj_pos.y])
-                goal_pos = veh_obj.getGoalPosition()
-                goal_pos = np.array([goal_pos.x, goal_pos.y])
-                '''############################################
-                    Remove vehicles at goal
-                ############################################'''
+                obj_pos = _position_as_array(veh_obj.getPosition())
+                goal_pos = _position_as_array(veh_obj.getGoalPosition())
+                ############################################
+                #    Remove vehicles at goal
+                ############################################
                 norm = np.linalg.norm(goal_pos - obj_pos)
-                if norm < self.cfg['rew_cfg'][
-                        'goal_tolerance'] or veh_obj.getCollided():
+                if norm < self.config.rew_cfg.goal_tolerance or veh_obj.getCollided():
                     self.scenario.removeVehicle(veh_obj)
-                '''############################################
-                    Set all vehicles with unachievable goals to be experts
-                ############################################'''
-                if self.file in self.valid_veh_dict and veh_obj.getID(
-                ) in self.valid_veh_dict[self.file]:
+                ############################################
+                #    Set all vehicles with unachievable goals to be experts
+                ############################################
+                if self.file in self.valid_veh_dict and veh_obj.getID() in self.valid_veh_dict[self.file]:
                     veh_obj.expert_control = True
-            '''############################################
-                Pick out the vehicles that we are controlling
-            ############################################'''
-            # ensure that we have no more than max_num_vehicles are controlled
-            temp_vehicles = self.scenario.getObjectsThatMoved()
-            np.random.shuffle(temp_vehicles)
+            ############################################
+            #    Pick out the vehicles that we are controlling
+            ############################################
+            # Ensure that no more than max_num_vehicles are controlled
+            temp_vehicles = np.random.permutation(self.scenario.getObjectsThatMoved())
             curr_index = 0
             self.controlled_vehicles = []
-            self.expert_controlled_vehicles = []
-            self.vehicles_to_delete = []
             for vehicle in temp_vehicles:
-                # this vehicle was invalid at the end of the 1 second context
-                # step so we need to remove it.
-                if np.isclose(vehicle.position.x, INVALID_POSITION):
-                    self.vehicles_to_delete.append(vehicle)
-                # we don't want to include vehicles that had unachievable goals
+                # This vehicle was invalid at the end of the 1 second context
+                # step so we need to remove it
+                if np.isclose(vehicle.position.x, self.config.scenario.invalid_position):
+                    self.scenario.removeVehicle(vehicle)
+                # We don't want to include vehicles that had unachievable goals
                 # as controlled vehicles
-                elif not vehicle.expert_control and curr_index < self.cfg[
-                        'max_num_vehicles']:
+                elif not vehicle.expert_control and curr_index < self.config.max_num_vehicles:
                     self.controlled_vehicles.append(vehicle)
                     curr_index += 1
                 else:
-                    self.expert_controlled_vehicles.append(vehicle)
-            self.all_vehicle_ids = [
-                veh.getID() for veh in self.controlled_vehicles
-            ]
-            # make all the vehicles that are in excess of max_num_vehicles controlled by an expert
-            for veh in self.expert_controlled_vehicles:
-                veh.expert_control = True
-            # remove vehicles that are currently at an invalid position
-            for veh in self.vehicles_to_delete:
-                self.scenario.removeVehicle(veh)
+                    vehicle.expert_control = True
+
+            self.all_vehicle_ids = [veh.getID() for veh in self.controlled_vehicles]
 
             # check that we have at least one vehicle or if we have just one file, exit anyways
             # or else we might be stuck in an infinite loop
-            if len(self.all_vehicle_ids) > 0 or len(self.files) == 1:
-                enough_vehicles = True
-
-        # for one reason or another (probably we had a file where all the agents achieved their goals)
-        # we have no controlled vehicles
-        # just grab a vehicle even if it hasn't moved so that we have something
-        # to return obs for even if it's not controlled
-        # NOTE: this case only occurs during our eval procedure where we set the
-        # self.files list to be length 1. Otherwise, the while loop above will repeat
-        # until a file is found.
-        if len(self.all_vehicle_ids) == 0:
-            self.controlled_vehicles = [self.scenario.getVehicles()[0]]
-            self.all_vehicle_ids = [
-                veh.getID() for veh in self.controlled_vehicles
-            ]
+            if len(self.all_vehicle_ids) > 0:
+                break
+        else:  # No break in for-loop, i.e., no valid vehicle found in any of the files.
+            raise ValueError(f"No controllable vehicles in any of the {len(self.files)} scenes.")
 
         # construct the observations and goal normalizers
         obs_dict = {}
         self.goal_dist_normalizers = {}
-        max_goal_dist = -100
+        max_goal_dist = -np.inf
         for veh_obj in self.controlled_vehicles:
             veh_id = veh_obj.getID()
             # store normalizers for each vehicle
-            obj_pos = veh_obj.getPosition()
-            obj_pos = np.array([obj_pos.x, obj_pos.y])
-            goal_pos = veh_obj.getGoalPosition()
-            goal_pos = np.array([goal_pos.x, goal_pos.y])
+            obj_pos = _position_as_array(veh_obj.getPosition())
+            goal_pos = _position_as_array(veh_obj.getGoalPosition())
             dist = np.linalg.norm(obj_pos - goal_pos)
             self.goal_dist_normalizers[veh_id] = dist
             # compute the obs
             self.context_dict[veh_id].append(self.get_observation(veh_obj))
-            if self.n_frames_stacked > 1:
+            if self.config.subscriber.n_frames_stacked > 1:
                 veh_deque = self.context_dict[veh_id]
                 context_list = list(
-                    islice(veh_deque,
-                           len(veh_deque) - self.n_frames_stacked,
-                           len(veh_deque)))
+                    islice(
+                        veh_deque,
+                        len(veh_deque) - self.config.subscriber.n_frames_stacked,
+                        len(veh_deque),
+                    )
+                )
                 obs_dict[veh_id] = np.concatenate(context_list)
             else:
                 obs_dict[veh_id] = self.context_dict[veh_id][-1]
-            # pick the vehicle that has to travel the furthest distance and use it for rendering
+            # pick the vehicle that has to travel the furthest distance and use it for
+            # rendering
             if dist > max_goal_dist:
                 # this attribute is just used for rendering of the view
                 # from the ego frame
@@ -442,114 +394,222 @@ def reset(self):
                 max_goal_dist = dist
 
         self.done_ids = []
-        # we should return obs for the missing agents
-        if self.cfg['subscriber']['keep_inactive_agents']:
-            max_id = max([int(key) for key in obs_dict.keys()])
-            num_missing_agents = max(
-                0, self.cfg['max_num_vehicles'] - len(obs_dict))
-            for i in range(num_missing_agents):
-                obs_dict[max_id + i + 1] = self.dead_feat
-            self.dead_agent_ids = [
-                max_id + i + 1 for i in range(num_missing_agents)
-            ]
-            self.all_vehicle_ids = list(obs_dict.keys())
-        else:
-            self.dead_agent_ids = []
+
+        logging.debug("Scene: %s | Controlling vehicles: %s", self.file, [veh.id for veh in self.controlled_vehicles])
+
         return obs_dict
 
-    def get_observation(self, veh_obj):
-        """Return the observation for a particular vehicle."""
-        ego_obs = self.scenario.ego_state(veh_obj)
-        if self.cfg['subscriber']['use_ego_state'] and self.cfg['subscriber'][
-                'use_observations']:
-            obs = np.concatenate(
-                (ego_obs,
-                 self.scenario.flattened_visible_state(
-                     veh_obj,
-                     view_dist=self.cfg['subscriber']['view_dist'],
-                     view_angle=self.cfg['subscriber']['view_angle'],
-                     head_angle=veh_obj.head_angle)))
-        elif self.cfg['subscriber']['use_ego_state'] and not self.cfg[
-                'subscriber']['use_observations']:
-            obs = ego_obs
-        else:
-            obs = self.scenario.flattened_visible_state(
-                veh_obj,
-                view_dist=self.cfg['subscriber']['view_dist'],
-                view_angle=self.cfg['subscriber']['view_angle'],
-                head_angle=veh_obj.head_angle)
+    def get_observation(self, veh_obj: Vehicle) -> np.ndarray:
+        """Return the observation for a particular vehicle.
+
+        Args
+        ----
+            veh_obj (Vehicle): Vehicle object to get the observation for.
+
+        Returns
+        -------
+            np.ndarray: Observation for the vehicle.
+        """
+        cur_position = _position_as_array(veh_obj.getPosition())
+        obs = np.concatenate(
+            (
+                self.scenario.ego_state(veh_obj) if self.config.subscriber.use_ego_state else [],
+                cur_position if self.config.subscriber.use_current_position else [],
+                self.scenario.flattened_visible_state(
+                    veh_obj, self.config.subscriber.view_dist, self.config.subscriber.view_angle
+                )
+                if self.config.subscriber.use_observations
+                else [],
+            )
+        )
         return obs
 
-    def make_all_vehicles_experts(self):
+    def make_all_vehicles_experts(self) -> None:
         """Force all vehicles to be experts."""
         for veh in self.scenario.getVehicles():
             veh.expert_control = True
 
-    def get_vehicles(self):
-        """Return the vehicles."""
-        return self.scenario.getVehicles()
-
-    def get_objects_that_moved(self):
-        """Return the objects that moved."""
-        return self.scenario.getObjectsThatMoved()
-
-    def render(self, mode=None):
-        """See superclass."""
-        return self.scenario.getImage(
-            img_width=1600,
-            img_height=1600,
-            draw_target_positions=True,
-            padding=50.0,
-        )
+    def render(self, mode: Optional[bool] = None) -> Optional[RenderType]:  # pylint: disable=unused-argument
+        """Render the environment.
+
+        Args
+        ----
+            mode (Optional[bool]): Render mode.
+
+        Returns
+        -------
+            Optional[RenderType]: Rendered image.
+        """
+        return self.scenario.getImage(**self._render_settings)
+
+    def render_ego(self, mode: Optional[bool] = None) -> Optional[RenderType]:  # pylint: disable=unused-argument
+        """Render the ego vehicles.
+
+        Args
+        ----
+            mode (Optional[bool]): Render mode.
 
-    def render_ego(self, mode=None):
-        """See superclass."""
+        Returns
+        -------
+            Optional[RenderType]: Rendered image.
+        """
         if self.render_vehicle.getID() in self.done_ids:
             return None
-        else:
-            return self.scenario.getConeImage(
-                source=self.render_vehicle,
-                view_dist=self.cfg['subscriber']['view_dist'],
-                view_angle=self.cfg['subscriber']['view_angle'],
-                head_angle=self.render_vehicle.head_angle,
-                img_width=1600,
-                img_height=1600,
-                padding=50.0,
-                draw_target_position=True,
-            )
+        return self.scenario.getConeImage(
+            source=self.render_vehicle,
+            view_dist=self.config.subscriber.view_dist,
+            view_angle=self.config.subscriber.view_angle,
+            head_angle=self.render_vehicle.head_angle,
+            **self._render_settings,
+        )
 
-    def render_features(self, mode=None):
-        """See superclass."""
+    def render_features(self, mode: Optional[bool] = None) -> Optional[RenderType]:  # pylint: disable=unused-argument
+        """Render the features.
+
+        Args
+        ----
+            mode (Optional[bool]): Render mode.
+
+        Returns
+        -------
+            Optional[RenderType]: Rendered image.
+        """
         if self.render_vehicle.getID() in self.done_ids:
             return None
-        else:
-            return self.scenario.getFeaturesImage(
-                source=self.render_vehicle,
-                view_dist=self.cfg['subscriber']['view_dist'],
-                view_angle=self.cfg['subscriber']['view_angle'],
-                head_angle=self.render_vehicle.head_angle,
-                img_width=1600,
-                img_height=1600,
-                padding=50.0,
-                draw_target_position=True,
-            )
+        return self.scenario.getFeaturesImage(
+            source=self.render_vehicle,
+            view_dist=self.config.subscriber.view_dist,
+            view_angle=self.config.subscriber.view_angle,
+            head_angle=self.render_vehicle.head_angle,
+            **self._render_settings,
+        )
 
-    def seed(self, seed=None):
-        """Ensure determinism."""
-        if seed is None:
-            np.random.seed(1)
-        else:
+    def seed(self, seed: Optional[int] = None) -> None:
+        """Seed the environment.
+
+        Args
+        ----
+            seed (Optional[int]): Seed to use.
+        """
+        if seed is not None:
             np.random.seed(seed)
             torch.manual_seed(seed)
             torch.cuda.manual_seed_all(seed)
 
-    def angle_sub(self, current_angle, target_angle) -> int:
-        """Subtract two angles to find the minimum angle between them."""
-        # Subtract the angles, constraining the value to [0, 2 * np.pi)
-        diff = (target_angle - current_angle) % (2 * np.pi)
+    def _set_discrete_action_space(self) -> None:
+        """Set the discrete action space."""
+        self.action_space = Discrete(self.config.accel_discretization * self.config.steering_discretization)
+        self.accel_grid = np.linspace(
+            -np.abs(self.config.accel_lower_bound),
+            self.config.accel_upper_bound,
+            self.config.accel_discretization,
+        )
+        self.steering_grid = np.linspace(
+            -np.abs(self.config.steering_lower_bound),
+            self.config.steering_upper_bound,
+            self.config.steering_discretization,
+        )
 
-        # If we are more than np.pi we're taking the long way around.
-        # Let's instead go in the shorter, negative direction
-        if diff > np.pi:
-            diff = -(2 * np.pi - diff)
-        return diff
+        self.idx_to_actions = {}
+        for i, (accel, steer) in enumerate(product(self.accel_grid, self.steering_grid)):
+            self.idx_to_actions[i] = [accel, steer]
+
+    def _set_continuous_action_space(self) -> None:
+        """Set the continuous action space."""
+        self.action_space = Box(
+            low=-np.array(
+                [
+                    np.abs(self.config.accel_lower_bound),
+                    self.config.steering_lower_bound,
+                ]
+            ),
+            high=np.array(
+                [
+                    np.abs(self.config.accel_upper_bound),
+                    self.config.steering_upper_bound,
+                ]
+            ),
+        )
+        self.idx_to_actions = None
+
+
+def _angle_sub(current_angle: float, target_angle: float) -> float:
+    """Subtract two angles to find the minimum angle between them.
+
+    Args
+    ----
+        current_angle (float): Current angle.
+        target_angle (float): Target angle.
+
+    Returns
+    -------
+        float: Minimum angle between the two angles.
+    """
+    # Subtract the angles, constraining the value to [0, 2 * np.pi)
+    diff = (target_angle - current_angle) % (2 * np.pi)
+
+    # If we are more than np.pi we're taking the long way around.
+    # Let's instead go in the shorter, negative direction
+    if diff > np.pi:
+        diff = -(2 * np.pi - diff)
+    return diff
+
+
+def _apply_action_to_vehicle(
+    veh_obj: Vehicle, action: ActType, *, idx_to_actions: Optional[Dict[int, Tuple[float, float]]] = None
+) -> None:
+    """Apply an action to a vehicle.
+
+    Args
+    ----
+        veh_obj (Vehicle): Vehicle object to apply the action to.
+        action (ActType): Action to apply to the vehicle.
+
+    Optional Args
+    -------------
+        idx_to_actions (Optional[Dict[int, Tuple[float, float]]]): Dictionary of actions to apply to the vehicle.
+
+    Raises
+    ------
+        NotImplementedError: If the action type is not supported.
+    """
+    if isinstance(action, Action):
+        veh_obj.apply_action(action)
+    elif isinstance(action, np.ndarray):
+        veh_obj.apply_action(Action.from_numpy(action))
+    elif isinstance(action, (tuple, list)):
+        veh_obj.acceleration = action[0]
+        veh_obj.steering = action[1]
+    elif isinstance(action, int) and idx_to_actions is not None:
+        accel, steer = idx_to_actions[action]
+        veh_obj.acceleration = accel
+        veh_obj.steering = steer
+    elif isinstance(action, np.int64):
+        accel, steer = idx_to_actions[action]
+        veh_obj.acceleration = accel
+        veh_obj.steering = steer
+    else:
+        raise NotImplementedError(f"Action type '{type(action)}' not supported.")
+
+
+def _position_as_array(position: Vector2D) -> np.ndarray:
+    """Convert a position to an array.
+
+    Args
+    ----
+        position (Vector2D): Position to convert.
+
+    Returns
+    -------
+        np.ndarray: Position as an array.
+    """
+    return np.array([position.x, position.y])
+
+
+if __name__ == "__main__":
+    # Load environment settings
+    with open("./configs/env_config.yaml", "r") as stream:
+        env_config = yaml.safe_load(stream)
+
+    # Initialize environment
+    env = BaseEnv(config=env_config)
diff --git a/nocturne/envs/wrappers.py b/nocturne/envs/wrappers.py
deleted file mode 100644
index da846671..00000000
--- a/nocturne/envs/wrappers.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Wrappers and env constructors for the environments."""
-from gym.spaces import Box
-import numpy as np
-
-from nocturne.envs import BaseEnv
-
-
-class OnPolicyPPOWrapper(object):
-    """Wrapper to make env compatible with On-Policy code."""
-
-    def __init__(self, env, use_images=False):
-        """Wrap with appropriate observation spaces and make fixed length.
-
-        Args
-        ----
-            env ([type]): [description]
-            no_img_concat (bool, optional): If true, we don't concat images into the 'state' key
-        """
-        self._env = env
-        self.use_images = use_images
-
-        self.n = self.cfg.max_num_vehicles
-        obs_dict = self.reset()
-        # tracker used to match observations to actions
-        self.agent_ids = []
-        self.feature_shape = obs_dict[0].shape
-        self.share_observation_space = [
-            Box(low=-np.inf,
-                high=+np.inf,
-                shape=self.feature_shape,
-                dtype=np.float32) for _ in range(self.n)
-        ]
-
-    @property
-    def observation_space(self):
-        """See superclass."""
-        return [self._env.observation_space for _ in range(self.n)]
-
-    @property
-    def action_space(self):
-        """See superclass."""
-        return [self._env.action_space for _ in range(self.n)]
-
-    def step(self, actions):
-        """Convert returned dicts to lists."""
-        agent_actions = {}
-        for action_vec, agent_id in zip(actions, self.agent_ids):
-            agent_actions[agent_id] = action_vec
-        next_obses, rew, done, info = self._env.step(agent_actions)
-        obs_n = []
-        rew_n = []
-        done_n = []
-        info_n = []
-        for key in self.agent_ids:
-            if isinstance(next_obses[key], dict):
-                obs_n.append(next_obses[key]['features'])
-            else:
-                obs_n.append(next_obses[key])
-            rew_n.append([rew[key]])
-            done_n.append(done[key])
-            agent_info = info[key]
-            agent_info['individual_reward'] = rew[key]
-            info_n.append(agent_info)
-        return obs_n, rew_n, done_n, info_n
-
-    def reset(self):
-        """Convert observation dict to list."""
-        obses = self._env.reset()
-        obs_n = []
-        self.agent_ids = []
-        for key in obses.keys():
-            self.agent_ids.append(key)
-            if not hasattr(self, 'agent_key'):
-                self.agent_key = key
-            if isinstance(obses[key], dict):
-                obs_n.append(obses[key]['features'])
-            else:
-                obs_n.append(obses[key])
-        return obs_n
-
-    def render(self, mode=None):
-        """See superclass."""
-        return self._env.render(mode)
-
-    def seed(self, seed=None):
-        """See superclass."""
-        self._env.seed(seed)
-
-    def __getattr__(self, name):
-        """See superclass."""
-        return getattr(self._env, name)
-
-
-def create_env(cfg):
-    """Return the base environment."""
-    env = BaseEnv(cfg)
-    return env
-
-
-def create_ppo_env(cfg, rank=0):
-    """Return a PPO wrapped environment."""
-    env = BaseEnv(cfg, rank=rank)
-    return OnPolicyPPOWrapper(env, use_images=cfg.img_as_state)
diff --git a/nocturne/utils/eval/average_displacement.py b/nocturne/utils/eval/average_displacement.py
deleted file mode 100644
index 4d6502ad..00000000
--- a/nocturne/utils/eval/average_displacement.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Average displacement error computation."""
-from collections import defaultdict
-from itertools import repeat
-import json
-from multiprocessing import Pool
-import os
-import random
-
-import numpy as np
-import torch
-
-from cfgs.config import PROCESSED_VALID_NO_TL, ERR_VAL
-from nocturne import Simulation
-
-SIM_N_STEPS = 90  # number of steps per trajectory
-GOAL_TOLERANCE = 0.5
-
-
-def _average_displacement_impl(arg):
-    trajectory_path, model, configs = arg
-    print(trajectory_path)
-
-    scenario_config = configs['scenario_cfg']
-
-    view_dist = configs['dataloader_cfg']['view_dist']
-    view_angle = configs['dataloader_cfg']['view_angle']
-    state_normalization = configs['dataloader_cfg']['state_normalization']
-    dt = configs['dataloader_cfg']['dt']
-
-    n_stacked_states = configs['dataloader_cfg']['n_stacked_states']
-    state_size = configs['model_cfg']['n_inputs'] // n_stacked_states
-    state_dict = defaultdict(lambda: np.zeros(state_size * n_stacked_states))
-
-    # create expert simulation
-    sim_expert = Simulation(str(trajectory_path), scenario_config)
-    scenario_expert = sim_expert.getScenario()
-    vehicles_expert = scenario_expert.getVehicles()
-    objects_expert = scenario_expert.getObjectsThatMoved()
-    id2veh_expert = {veh.id: veh for veh in vehicles_expert}
-
-    # create model simulation
-    sim_model = Simulation(str(trajectory_path), scenario_config)
-    scenario_model = sim_model.getScenario()
-    vehicles_model = scenario_model.getVehicles()
-    objects_model = scenario_model.getObjectsThatMoved()
-
-    # set all objects to be expert-controlled
-    for obj in objects_expert:
-        obj.expert_control = True
-    for obj in objects_model:
-        obj.expert_control = True
-
-    # in model sim, model will control vehicles that moved
-    controlled_vehicles = [
-        veh for veh in vehicles_model if veh in objects_model
-    ]
-    random.shuffle(controlled_vehicles)
-    # controlled_vehicles = controlled_vehicles[:2]
-
-    # warmup to build up state stacking
-    for i in range(n_stacked_states - 1):
-        for veh in controlled_vehicles:
-            ego_state = scenario_model.ego_state(veh)
-            visible_state = scenario_model.flattened_visible_state(
-                veh, view_dist=view_dist, view_angle=view_angle)
-            state = np.concatenate(
-                (ego_state, visible_state)) / state_normalization
-            state_dict[veh.getID()] = np.roll(state_dict[veh.getID()],
-                                              len(state))
-            state_dict[veh.getID()][:len(state)] = state
-        sim_model.step(dt)
-        sim_expert.step(dt)
-
-    for veh in controlled_vehicles:
-        veh.expert_control = False
-
-    avg_displacements = []
-    final_displacements = [0 for _ in controlled_vehicles]
-    collisions = [False for _ in controlled_vehicles]
-    goal_achieved = [False for _ in controlled_vehicles]
-    for i in range(SIM_N_STEPS - n_stacked_states):
-        for veh in controlled_vehicles:
-            if np.isclose(veh.position.x, ERR_VAL):
-                veh.expert_control = True
-            else:
-                veh.expert_control = False
-        # set model actions
-        all_states = []
-        for veh in controlled_vehicles:
-            # get vehicle state
-            state = np.concatenate(
-                (scenario_model.ego_state(veh),
-                 scenario_model.flattened_visible_state(
-                     veh, view_dist=view_dist,
-                     view_angle=view_angle))) / state_normalization
-            # stack state
-            state_dict[veh.getID()] = np.roll(state_dict[veh.getID()],
-                                              len(state))
-            state_dict[veh.getID()][:len(state)] = state
-            all_states.append(state_dict[veh.getID()])
-        all_states = torch.as_tensor(np.array(all_states), dtype=torch.float32)
-
-        # compute vehicle actions
-        all_actions = model(all_states, deterministic=True
-                            )  # /!\ this returns an array (2,n) and not (n,2)
-        accel_actions = all_actions[0].cpu().numpy()
-        steering_actions = all_actions[1].cpu().numpy()
-        # set vehicles actions
-        for veh, accel_action, steering_action in zip(controlled_vehicles,
-                                                      accel_actions,
-                                                      steering_actions):
-            veh.acceleration = accel_action
-            veh.steering = steering_action
-
-        # step simulations
-        sim_expert.step(dt)
-        sim_model.step(dt)
-
-        # compute displacements over non-collided vehicles
-        displacements = []
-        for i, veh in enumerate(controlled_vehicles):
-            # get corresponding vehicle in expert simulation
-            expert_veh = id2veh_expert[veh.id]
-            # make sure it is valid
-            if np.isclose(expert_veh.position.x,
-                          ERR_VAL) or expert_veh.collided:
-                continue
-            # print(expert_veh.position, veh.position)
-            # compute displacement
-            expert_pos = id2veh_expert[veh.id].position
-            model_pos = veh.position
-            pos_diff = (model_pos - expert_pos).norm()
-            displacements.append(pos_diff)
-            final_displacements[i] = pos_diff
-            if veh.collided:
-                collisions[i] = True
-            if (veh.position - veh.target_position).norm() < GOAL_TOLERANCE:
-                goal_achieved[i] = True
-
-        # average displacements over all vehicles
-        if len(displacements) > 0:
-            avg_displacements.append(np.mean(displacements))
-            # print(displacements, np.mean(displacements))
-
-    # average displacements over all time steps
-    avg_displacement = np.mean(
-        avg_displacements) if len(avg_displacements) > 0 else np.nan
-    final_displacement = np.mean(
-        final_displacements) if len(final_displacements) > 0 else np.nan
-    avg_collisions = np.mean(collisions) if len(collisions) > 0 else np.nan
-    avg_goals = np.mean(goal_achieved) if len(goal_achieved) > 0 else np.nan
-    print('displacements', avg_displacement)
-    print('final_displacement', final_displacement)
-    print('collisions', avg_collisions)
-    print('goal_rate', avg_goals)
-    return avg_displacement, final_displacement, avg_collisions, avg_goals
-
-
-def compute_average_displacement(trajectories_dir, model, configs):
-    """Compute average displacement error between a model and the ground truth."""
-    NUM_FILES = 200
-    # get trajectories paths
-    with open(os.path.join(trajectories_dir, 'valid_files.json')) as file:
-        valid_veh_dict = json.load(file)
-        files = list(valid_veh_dict.keys())
-        # sort the files so that we have a consistent order
-        np.random.seed(0)
-        np.random.shuffle(files)
-    # compute average displacement over each individual trajectory file
-    trajectories_paths = files[:NUM_FILES]
-    for i, trajectory in enumerate(trajectories_paths):
-        trajectories_paths[i] = os.path.join(trajectories_dir, trajectory)
-    with Pool(processes=14) as pool:
-        result = list(
-            pool.map(_average_displacement_impl,
-                     zip(trajectories_paths, repeat(model), repeat(configs))))
-        average_displacements = np.array(result)[:, 0]
-        final_displacements = np.array(result)[:, 1]
-        average_collisions = np.array(result)[:, 2]
-        average_goals = np.array(result)[:, 3]
-        print(average_displacements, final_displacements, average_collisions,
-              average_goals)
-
-    return [
-        np.mean(average_displacements[~np.isnan(average_displacements)]),
-        np.std(average_displacements[~np.isnan(average_displacements)])
-    ], [
-        np.mean(final_displacements[~np.isnan(final_displacements)]),
-        np.std(final_displacements[~np.isnan(final_displacements)])
-    ], [
-        np.mean(average_collisions[~np.isnan(average_collisions)]),
-        np.std(average_collisions[~np.isnan(average_displacements)])
-    ], [
-        np.mean(average_goals[~np.isnan(average_goals)]),
-        np.std(average_goals[~np.isnan(average_goals)])
-    ]
-
-
-if __name__ == '__main__':
-    from examples.imitation_learning.model import ImitationAgent  # noqa: F401
-    model = torch.load(
-        '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\
-            ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_600.pth'
-    ).to('cpu')
-    model.actions_grids = [x.to('cpu') for x in model.actions_grids]
-    model.eval()
-    model.nn[0].eval()
-    with open(
-            '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\
-                ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/configs.json',
-            'r') as fp:
-        configs = json.load(fp)
-        configs['device'] = 'cpu'
-    with torch.no_grad():
-        ade, fde, collisions, goals = compute_average_displacement(
-            PROCESSED_VALID_NO_TL, model=model, configs=configs)
-    print(f'Average Displacement Error: {ade[0]:.3f} ± {ade[1]:.3f} meters')
-    print(f'Final Displacement Error: {fde[0]:.3f} ± {fde[1]:.3f} meters')
-    print(f'Average Collisions: {collisions[0]:.3f} ± {collisions[1]:.3f}%')
-    print(
-        f'Average Success at getting to goal: {goals[0]:.3f} ± {goals[1]:.3f}%'
-    )
diff --git a/nocturne/utils/eval/collision_rate.py b/nocturne/utils/eval/collision_rate.py
deleted file mode 100644
index 38e29755..00000000
--- a/nocturne/utils/eval/collision_rate.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Collision rate computation."""
-from pathlib import Path
-import numpy as np
-import torch
-
-from nocturne import Simulation
-from cfgs.config import ERR_VAL as INVALID_POSITION
-
-
-SIM_N_STEPS = 90  # number of steps per trajectory
-SIM_STEP_TIME = 0.1  # dt (in seconds)
-
-
-def _collision_rate_impl(trajectory_path, model=None, sim_allow_non_vehicles=True, check_vehicles_only=True):
-    # create expert simulation
-    sim = Simulation(scenario_path=str(trajectory_path), start_time=0, allow_non_vehicles=sim_allow_non_vehicles)
-    scenario = sim.getScenario()
-    vehicles = scenario.getVehicles()
-    objects_that_moved = scenario.getObjectsThatMoved()
-    vehicles_that_moved = [veh for veh in vehicles if veh in objects_that_moved]
-
-    # set all objects to be expert-controlled
-    for obj in objects_that_moved:
-        obj.expert_control = True
-    for obj in vehicles:
-        obj.expert_control = True
-
-    # if a model is given, model will control vehicles that moved
-    if model is not None:
-        controlled_vehicles = vehicles_that_moved
-        for veh in controlled_vehicles:
-            veh.expert_control = False
-    else:
-        controlled_vehicles = []
-
-    # vehicles to check for collisions on
-    objects_to_check = [
-        obj for obj in (vehicles_that_moved if check_vehicles_only else objects_that_moved)
-        if (obj.target_position - obj.position).norm() > 0.5
-    ]
-
-    # step sim until the end and check for collisions
-    collided_with_vehicle = {obj.id: False for obj in objects_to_check}
-    collided_with_edge = {obj.id: False for obj in objects_to_check}
-    for i in range(SIM_N_STEPS):
-        # set model actions
-        for veh in controlled_vehicles:
-            # get vehicle state
-            state = torch.as_tensor(np.expand_dims(np.concatenate(
-                (scenario.ego_state(veh),
-                 scenario.flattened_visible_state(veh, view_dist=120, view_angle=3.14))
-            ), axis=0), dtype=torch.float32)
-            # compute vehicle action
-            action = model(state)[0]
-            # set vehicle action
-            veh.acceleration = action[0]
-            veh.steering = action[1]
-
-        # step simulation
-        sim.step(SIM_STEP_TIME)
-
-        # check for collisions
-        for obj in objects_to_check:
-            if not np.isclose(obj.position.x, INVALID_POSITION) and obj.collided:
-                if int(obj.collision_type) == 1:
-                    collided_with_vehicle[obj.id] = True
-                if int(obj.collision_type) == 2:
-                    collided_with_edge[obj.id] = True
-
-    # compute collision rate
-    collisions_with_vehicles = list(collided_with_vehicle.values())
-    collisions_with_edges = list(collided_with_edge.values())
-    collision_rate_vehicles = collisions_with_vehicles.count(True) / len(collisions_with_vehicles)
-    collision_rate_edges = collisions_with_edges.count(True) / len(collisions_with_edges)
-
-    return collision_rate_vehicles, collision_rate_edges
-
-
-def compute_average_collision_rate(trajectories_dir, model=None, **kwargs):
-    """Compute average collision rate for a model."""
-    # get trajectories paths
-    if isinstance(trajectories_dir, str):
-        # if trajectories_dir is a string, treat it as the path to a directory of trajectories
-        trajectories_dir = Path(trajectories_dir)
-        trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json'))
-    elif isinstance(trajectories_dir, list):
-        # if trajectories_dir is a list, treat it as a list of paths to trajectory files
-        trajectories_paths = [Path(path) for path in trajectories_dir]
-    # compute average collision rate over each individual trajectory file
-    average_collision_rates = np.array(list(map(
-        lambda path: _collision_rate_impl(path, model, **kwargs),
-        trajectories_paths
-    )))
-
-    return np.mean(average_collision_rates, axis=0)
-
-
-if __name__ == '__main__':
-    from nocturne.utils.imitation_learning.waymo_data_loader import ImitationAgent  # noqa: F401
-    model = torch.load('model.pth')
-    collisions_with_vehicles, collisions_with_road_lines = \
-        compute_average_collision_rate('dataset/json_files', model=None)
-    print(f'Average Collision Rate: {100*collisions_with_vehicles:.2f}% with vehicles, '
-          f'{100*collisions_with_road_lines:.2f}% with road lines')
diff --git a/nocturne/utils/eval/goal_by_intersection.py b/nocturne/utils/eval/goal_by_intersection.py
deleted file mode 100644
index e3d36f20..00000000
--- a/nocturne/utils/eval/goal_by_intersection.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Goal reaching rate and collision rate computation as a function of number of intersections in expert trajectory."""
-from pathlib import Path
-import numpy as np
-import torch
-from collections import defaultdict
-import random
-import json
-
-from nocturne import Simulation
-from cfgs.config import ERR_VAL as INVALID_POSITION
-from multiprocessing import Pool
-from itertools import repeat, combinations
-
-SIM_N_STEPS = 90  # number of steps per trajectory
-GOAL_TOLERANCE = 0.5
-
-
-def _compute_expert_intersections(trajectory_path):
-    with open(trajectory_path, 'r') as fp:
-        data = json.load(fp)
-
-    segments = defaultdict(list)
-    for veh_id, veh in enumerate(data['objects']):
-        # note: i checked and veh_id is consistent with how it's loaded in simulation
-
-        for i in range(len(veh['position']) - 1):
-            # compute polyline (might not be continuous since we have invalid positions)
-            segment = np.array([
-                [veh['position'][i]['x'], veh['position'][i]['y']],
-                [veh['position'][i + 1]['x'], veh['position'][i + 1]['y']],
-            ])
-
-            # if segment doesnt contain an invalid position, append to trajectory
-            if np.isclose(segment, INVALID_POSITION).any():
-                continue
-            segments[veh_id].append(segment)
-
-    # go over pair of vehicles and check if their segments intersect
-    n_collisions = defaultdict(int)
-    for veh1, veh2 in combinations(segments.keys(), 2):
-        # get corresponding segments
-        segments1 = np.array(segments[veh1])
-        segments2 = np.array(segments[veh2])
-
-        # check bounding rectangle intersection - O(n)
-        xmin1, ymin1 = np.min(np.min(segments1, axis=0), axis=0)
-        xmax1, ymax1 = np.max(np.max(segments1, axis=0), axis=0)
-        xmin2, ymin2 = np.min(np.min(segments2, axis=0), axis=0)
-        xmax2, ymax2 = np.max(np.max(segments2, axis=0), axis=0)
-
-        if xmax1 <= xmin2 or xmax2 <= xmin1 or ymax1 <= ymin2 or ymax2 <= ymin1:
-            # segments can't intersect since their bounding rectangle don't intersect
-            continue
-
-        # check intersection over pairs of segments - O(n^2)
-
-        # construct numpy array of shape (N = len(segments1) * len(segments2), 4, 2)
-        # where each element contain 4 points ABCD (segment AB of segments1 and segment CD of segments2)
-        idx1 = np.repeat(
-            np.arange(len(segments1)),
-            len(segments2))  # build indexes 1 1 1 2 2 2 3 3 3 4 4 4
-        idx2 = np.tile(np.arange(len(segments2)),
-                       len(segments1))  # build indexes 1 2 3 1 2 3 1 2 3 1 2 3
-        segment_pairs = np.concatenate(
-            (segments1[idx1], segments2[idx2]),
-            axis=1)  # concatenate to create all pairs
-
-        # now we need to check if at least one element ABCD contains an intersection between segment AB and segment CD
-        def ccw(A, B, C):
-            return (C[:, 1] - A[:, 1]) * (B[:, 0] - A[:, 0]) > (
-                B[:, 1] - A[:, 1]) * (C[:, 0] - A[:, 0])
-
-        # ABCD are each arrays of N points (shape (N, 2))
-        A = segment_pairs[:, 0]
-        B = segment_pairs[:, 1]
-        C = segment_pairs[:, 2]
-        D = segment_pairs[:, 3]
-        if np.logical_and(
-                ccw(A, C, D) != ccw(B, C, D),
-                ccw(A, B, C) != ccw(A, B, D)).any():
-            n_collisions[veh1] += 1
-            n_collisions[veh2] += 1
-
-    return n_collisions
-
-
-def _intesection_metrics_impl(trajectory_path, model, configs):
-    print(trajectory_path)
-
-    scenario_config = configs['scenario_cfg']
-
-    view_dist = configs['dataloader_cfg']['view_dist']
-    view_angle = configs['dataloader_cfg']['view_angle']
-    state_normalization = configs['dataloader_cfg']['state_normalization']
-    dt = configs['dataloader_cfg']['dt']
-
-    n_stacked_states = configs['dataloader_cfg']['n_stacked_states']
-    state_size = configs['model_cfg']['n_inputs'] // n_stacked_states
-    state_dict = defaultdict(lambda: np.zeros(state_size * n_stacked_states))
-
-    # create model simulation
-    sim = Simulation(str(trajectory_path), scenario_config)
-    scenario = sim.getScenario()
-    vehicles = scenario.getVehicles()
-    objects = scenario.getObjectsThatMoved()
-
-    # set all objects to be expert-controlled
-    for obj in objects:
-        obj.expert_control = True
-
-    # in model sim, model will control vehicles that moved
-    controlled_vehicles = [veh for veh in vehicles if veh in objects]
-
-    # only control 2 vehicles at random
-    random.shuffle(controlled_vehicles)
-    # controlled_vehicles = controlled_vehicles[:2]
-
-    # warmup to build up state stacking
-    for i in range(n_stacked_states - 1):
-        for veh in controlled_vehicles:
-            ego_state = scenario.ego_state(veh)
-            visible_state = scenario.flattened_visible_state(
-                veh, view_dist=view_dist, view_angle=view_angle)
-            state = np.concatenate(
-                (ego_state, visible_state)) / state_normalization
-            state_dict[veh.getID()] = np.roll(state_dict[veh.getID()],
-                                              len(state))
-            state_dict[veh.getID()][:len(state)] = state
-        sim.step(dt)
-
-    for veh in controlled_vehicles:
-        veh.expert_control = False
-
-    collisions = [False] * len(controlled_vehicles)
-    goal_achieved = [False] * len(controlled_vehicles)
-    for i in range(SIM_N_STEPS - n_stacked_states):
-        for veh in controlled_vehicles:
-            if np.isclose(veh.position.x, INVALID_POSITION):
-                veh.expert_control = True
-            else:
-                veh.expert_control = False
-        # set model actions
-        # get all actions at once
-        all_states = []
-        for veh in controlled_vehicles:
-            # get vehicle state
-            state = np.concatenate(
-                (scenario.ego_state(veh),
-                 scenario.flattened_visible_state(
-                     veh, view_dist=view_dist,
-                     view_angle=view_angle))) / state_normalization
-            # stack state
-            state_dict[veh.getID()] = np.roll(state_dict[veh.getID()],
-                                              len(state))
-            state_dict[veh.getID()][:len(state)] = state
-            all_states.append(state_dict[veh.getID()])
-        all_states = torch.as_tensor(np.array(all_states), dtype=torch.float32)
-
-        # compute vehicle actions
-        all_actions = model(all_states, deterministic=True
-                            )  # /!\ this returns an array (2,n) and not (n,2)
-        accel_actions = all_actions[0].cpu().numpy()
-        steering_actions = all_actions[1].cpu().numpy()
-        # set vehicles actions
-        for veh, accel_action, steering_action in zip(controlled_vehicles,
-                                                      accel_actions,
-                                                      steering_actions):
-            veh.acceleration = accel_action
-            veh.steering = steering_action
-
-        # step simulation
-        sim.step(dt)
-
-        # compute displacements over non-collided vehicles
-        for i, veh in enumerate(controlled_vehicles):
-            # make sure it is valid
-            if np.isclose(veh.position.x, INVALID_POSITION):
-                continue
-
-            # a collision with another a vehicle
-            if veh.collided and int(veh.collision_type) == 1:
-                collisions[i] = True
-            if (veh.position - veh.target_position).norm() < GOAL_TOLERANCE:
-                goal_achieved[i] = True
-
-    # compute expert intersections for all vehicles (mapping veh_id -> nb of intersections in expert traj)
-    intersection_data = _compute_expert_intersections(trajectory_path)
-
-    # compute metrics as a function of number of intersections
-
-    collision_rates = np.zeros(4)
-    goal_rates = np.zeros(4)
-    counts = np.zeros(4)
-    for i, veh in enumerate(controlled_vehicles):
-        n_intersections = min(intersection_data[veh.getID()], 3)
-        counts[n_intersections] += 1
-        if collisions[i]:
-            collision_rates[n_intersections] += 1
-        if goal_achieved[i]:
-            goal_rates[n_intersections] += 1
-    collision_rates /= counts
-    goal_rates /= counts
-    # note: returned values can contain NaN
-
-    return collision_rates, goal_rates
-
-
-def compute_metrics_by_intersection(trajectories_dir, model, configs):
-    """Compute metrics as a function of number of intesections in a vehicle's expert trajectory."""
-    NUM_FILES = 200
-    NUM_CPUS = 14
-
-    # get trajectories paths
-    trajectories_dir = Path(trajectories_dir)
-    trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json'))
-    trajectories_paths.sort()
-    trajectories_paths = trajectories_paths[:NUM_FILES]
-
-    # parallel metric computation
-    with Pool(processes=NUM_CPUS) as pool:
-        result = np.array(
-            list(
-                pool.starmap(
-                    _intesection_metrics_impl,
-                    zip(trajectories_paths, repeat(model), repeat(configs)))))
-        assert result.shape == (len(trajectories_paths), 2, 4
-                                )  # collision rates, goal rates (in 4 bins)
-        avg_result = np.nanmean(result, axis=0)  # nanmean ignores NaN values
-        print(avg_result)
-        return avg_result
-
-
-if __name__ == '__main__':
-    from examples.imitation_learning.model import ImitationAgent  # noqa: F401
-    model = torch.load(
-        '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\
-            ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_600.pth'
-    ).to('cpu')
-    model.actions_grids = [x.to('cpu') for x in model.actions_grids]
-    model.eval()
-    model.nn[0].eval()
-    with open(
-            '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17\
-                /++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/configs.json',
-            'r') as fp:
-        configs = json.load(fp)
-        configs['device'] = 'cpu'
-    with torch.no_grad():
-        result = compute_metrics_by_intersection(
-            '/checkpoint/eugenevinitsky/waymo_open/motion_v1p1/\
-                uncompressed/scenario/formatted_json_v2_no_tl_valid',
-            model=model,
-            configs=configs)
-        print('collision rates', result[0])
-        print('goal rates', result[1])
diff --git a/nocturne/utils/eval/goal_reaching_rate.py b/nocturne/utils/eval/goal_reaching_rate.py
deleted file mode 100644
index e0ccfee3..00000000
--- a/nocturne/utils/eval/goal_reaching_rate.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Goal reaching rate computation."""
-from pathlib import Path
-import numpy as np
-import torch
-
-from nocturne import Simulation
-
-SIM_N_STEPS = 90  # number of steps per trajectory
-SIM_STEP_TIME = 0.1  # dt (in seconds)
-
-
-def _goal_reaching_rate_impl(trajectory_path,
-                             model=None,
-                             sim_allow_non_vehicles=True,
-                             check_vehicles_only=True):
-    # create expert simulation
-    sim = Simulation(scenario_path=str(trajectory_path),
-                     start_time=0,
-                     allow_non_vehicles=sim_allow_non_vehicles)
-    scenario = sim.getScenario()
-    vehicles = scenario.getVehicles()
-    objects_that_moved = scenario.getObjectsThatMoved()
-    vehicles_that_moved = [
-        veh for veh in vehicles if veh in objects_that_moved
-    ]
-
-    # set all objects to be expert-controlled
-    for obj in objects_that_moved:
-        obj.expert_control = True
-    for obj in vehicles:
-        obj.expert_control = True
-
-    # if a model is given, model will control vehicles that moved
-    if model is not None:
-        controlled_vehicles = vehicles_that_moved
-        for veh in controlled_vehicles:
-            veh.expert_control = False
-    else:
-        controlled_vehicles = []
-
-    # vehicles to check for collisions on
-    objects_to_check = vehicles_that_moved if check_vehicles_only else objects_that_moved
-
-    # step sim until the end and check for collisions
-    reached_goal = {obj.id: False for obj in objects_to_check}
-    for i in range(SIM_N_STEPS):
-        # set model actions
-        for veh in controlled_vehicles:
-            # get vehicle state
-            state = torch.as_tensor(np.expand_dims(np.concatenate(
-                (scenario.ego_state(veh),
-                 scenario.flattened_visible_state(veh,
-                                                  view_dist=120,
-                                                  view_angle=3.14))),
-                                                   axis=0),
-                                    dtype=torch.float32)
-            # compute vehicle action
-            action = model(state)[0]
-            # set vehicle action
-            veh.acceleration = action[0]
-            veh.steering = action[1]
-
-        # step simulation
-        sim.step(SIM_STEP_TIME)
-
-        # check for collisions
-        for obj in objects_to_check:
-            if (obj.target_position - obj.position).norm() < 0.5:
-                reached_goal[obj.id] = True
-
-    # compute collision rate
-    reached_goal_values = list(reached_goal.values())
-    reached_goal_rate = reached_goal_values.count(True) / len(
-        reached_goal_values)
-
-    return reached_goal_rate
-
-
-def compute_average_goal_reaching_rate(trajectories_dir, model=None, **kwargs):
-    """Compute average goal reaching rate for a model."""
-    # get trajectories paths
-    if isinstance(trajectories_dir, str):
-        # if trajectories_dir is a string, treat it as the path to a directory of trajectories
-        trajectories_dir = Path(trajectories_dir)
-        trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json'))
-    elif isinstance(trajectories_dir, list):
-        # if trajectories_dir is a list, treat it as a list of paths to trajectory files
-        trajectories_paths = [Path(path) for path in trajectories_dir]
-    # compute average collision rate over each individual trajectory file
-    average_goal_reaching_rates = np.array(
-        list(
-            map(lambda path: _goal_reaching_rate_impl(path, model, **kwargs),
-                trajectories_paths)))
-
-    return np.mean(average_goal_reaching_rates)
-
-
-if __name__ == '__main__':
-    from nocturne.utils.imitation_learning.waymo_data_loader import ImitationAgent  # noqa: F401
-    model = torch.load('model.pth')
-    goal_reaching_rate = compute_average_goal_reaching_rate(
-        'dataset/json_files', model=None)
-    print(f'Average Goal Reaching Rate: {100*goal_reaching_rate:.2f}%')
diff --git a/nocturne/wrappers/sb3_wrappers.py b/nocturne/wrappers/sb3_wrappers.py
new file mode 100644
index 00000000..ec944b6f
--- /dev/null
+++ b/nocturne/wrappers/sb3_wrappers.py
@@ -0,0 +1,86 @@
+
+import gymnasium
+import numpy as np
+import gym
+
+class NocturneToSB3(gymnasium.Env):
+    """Makes Nocturne env compatible with SB3.
+    ! NOTE: Controlling a single agent.
+    """
+
+    def __init__(self, nocturne_env: gym.Env):
+        self.env = nocturne_env
+        self.action_space = gymnasium.spaces.Discrete(self.env.action_space.n)
+        self.observation_space = gymnasium.spaces.Box(
+            -np.inf, np.inf, self.env.observation_space.shape, np.float32
+        )
+    
+    def step(self, action):
+        """Take a step in the environment, convert dicts to np arrays.
+
+        Args:
+            action (Dict): Dictionary with a single action for the controlled vehicle.
+
+        Returns:
+            observation, reward, terminated, truncated, info (np.ndarray, float, bool, bool, dict)
+        """
+        next_obs_dict, rewards_dict, dones_dict, info_dict = self.env.step(
+            action_dict={self.controlled_vehicle: action}
+        )
+
+        return (
+            next_obs_dict[self.controlled_vehicle],
+            rewards_dict[self.controlled_vehicle],
+            dones_dict[self.controlled_vehicle],
+            False,
+            info_dict[self.controlled_vehicle],
+        )
+    
+    def reset(self, seed=None):
+        """Reset the environment."""
+        obs_dict = self.env.reset()
+        assert (
+            len(self.env.controlled_vehicles) == 1
+        ), "This wrapper does not support multi-agent control."
+        
+        self.controlled_vehicle = self.env.controlled_vehicles[0].id
+        return obs_dict[self.controlled_vehicle], {}
+
+    @property
+    def action_space(self):
+        return self.env.action_space
+
+    @action_space.setter
+    def action_space(self, action_space):
+        self.env.action_space = action_space
+
+    @property
+    def observation_space(self):
+        return self.env.observation_space
+
+    @observation_space.setter
+    def observation_space(self, observation_space):
+        self.env.observation_space = observation_space
+    
+    def render(self):
+        pass
+
+    def close(self):
+        pass
+
+    @property
+    def seed(self, seed=None):
+        return None
+
+    @seed.setter
+    def seed(self, seed=None):
+        pass
+
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+
+    def get_attr(self, attr_name: str):
+        return getattr(self._env, attr_name)
+
+    def set_attr(self, attr_name: str):
+        setattr(self._env, attr_name)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..e62c4129
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,89 @@
+[tool.poetry]
+name = "nocturne"
+version = "0.0.1"
+description = "A data-driven, fast driving simulator for multi-agent coordination under partial observability."
+authors = [
+  "Nathan Lichtlé <nathan.lichtle@gmail.com>",
+  "Eugene Vinitsky <vinitsky.eugene@gmail.com>",
+  "Xiaomeng Yang <bit.yangxm@gmail.com>"
+]
+maintainers = [
+  "Daphne Cornelisse <cornelisse.daphne@nyu.edu>",
+  "Eugene Vinitsky <vinitsky.eugene@gmail.com>"
+]
+homepage = "https://github.com/Emerge-Lab/nocturne"
+repository = "https://github.com/Emerge-Lab/nocturne"
+documentation = "https://nocturne.readthedocs.io/"
+license = "MIT"
+readme = "README.md"
+keywords = ["Driving", "Simulation", "Autonomous Vehicles", "Waymo", "Reinforcement Learning"]
+classifiers = [
+  "Development Status :: 5 - Production/Stable",
+  "Intended Audience :: Developers",
+  "Topic :: Software Development :: Libraries :: Python Modules",
+  "Topic :: Utilities",
+  "Programming Language :: C++",
+  "Programming Language :: Python :: 3 :: Only",
+  "Programming Language :: Python :: 3.10",
+  "License :: OSI Approved :: BSD License",
+  "Programming Language :: Python :: Implementation :: PyPy",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: C++",
+  "Topic :: Software Development :: Libraries :: Python Modules"
+]
+
+[tool.poetry.urls]
+"Bug Tracker" = "https://github.com/Emerge-Lab/nocturne/issues"
+"Discussions" = "https://github.com/Emerge-Lab/nocturne/discussions"
+"Changelog" = "https://nocturne.readthedocs.io/en/latest/changelog.html"
+"Chat" = "https://gitter.im/nocturne/Lobby"
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.13"
+numpy = "^1.26.0"
+torch = "^2.0.1"
+gym = "^0.26.2"
+pybind11 = "^2.11.1"
+python-box = "^7.1.1"
+gymnasium = "^0.29.1"
+
+[tool.poetry.group.research.dependencies]
+ipykernel = "^6.25.2"
+matplotlib = "^3.8.0"
+seaborn = "^0.13.0"
+pandas = "^2.1.1"
+wandb = "^0.15.12"
+tensorboard = "^2.14.1"
+
+[tool.poetry.group.dev.dependencies]
+pre-commit = "^3.4.0"
+flake8 = "^6.1.0"
+black = "^23.9.1"
+isort = "^5.12.0"
+pylint = "^3.0.0"
+tomli = "^2.0.1"
+
+[tool.poetry.build]
+script = "build.py"
+generate-setup-file = true
+
+[tool.black]
+line-length = 120
+
+[tool.flake8]
+max-line-length = 120
+extend-ignore = "E203"
+
+[tool.pydocstyle]
+convention = "google"
+
+[tool.pylint]
+max-line-length = 120
+disable = "W1514"
+
+[tool.isort]
+profile = "black"
+
+[build-system]
+requires = ["poetry-core", "pybind11>=2.11.1", "setuptools>=68.2.2"]
+build-backend = "poetry.core.masonry.api"
diff --git a/requirements.dev.txt b/requirements.dev.txt
new file mode 100644
index 00000000..be8f9e62
--- /dev/null
+++ b/requirements.dev.txt
@@ -0,0 +1,27 @@
+astroid==3.0.0 ; python_version >= "3.10" and python_version < "3.13"
+black==23.9.1 ; python_version >= "3.10" and python_version < "3.13"
+cfgv==3.4.0 ; python_version >= "3.10" and python_version < "3.13"
+click==8.1.7 ; python_version >= "3.10" and python_version < "3.13"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+dill==0.3.7 ; python_version >= "3.10" and python_version < "3.13"
+distlib==0.3.7 ; python_version >= "3.10" and python_version < "3.13"
+filelock==3.12.4 ; python_version >= "3.10" and python_version < "3.13"
+flake8==6.1.0 ; python_version >= "3.10" and python_version < "3.13"
+identify==2.5.30 ; python_version >= "3.10" and python_version < "3.13"
+isort==5.12.0 ; python_version >= "3.10" and python_version < "3.13"
+mccabe==0.7.0 ; python_version >= "3.10" and python_version < "3.13"
+mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "3.13"
+nodeenv==1.8.0 ; python_version >= "3.10" and python_version < "3.13"
+packaging==23.2 ; python_version >= "3.10" and python_version < "3.13"
+pathspec==0.11.2 ; python_version >= "3.10" and python_version < "3.13"
+platformdirs==3.11.0 ; python_version >= "3.10" and python_version < "3.13"
+pre-commit==3.4.0 ; python_version >= "3.10" and python_version < "3.13"
+pycodestyle==2.11.0 ; python_version >= "3.10" and python_version < "3.13"
+pyflakes==3.1.0 ; python_version >= "3.10" and python_version < "3.13"
+pylint==3.0.0 ; python_version >= "3.10" and python_version < "3.13"
+pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "3.13"
+setuptools==68.2.2 ; python_version >= "3.10" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.13"
+tomlkit==0.12.1 ; python_version >= "3.10" and python_version < "3.13"
+typing-extensions==4.8.0 ; python_version >= "3.10" and python_version < "3.11"
+virtualenv==20.24.5 ; python_version >= "3.10" and python_version < "3.13"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..ab0a0465
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,98 @@
+absl-py==2.0.0 ; python_version >= "3.10" and python_version < "3.13"
+appdirs==1.4.4 ; python_version >= "3.10" and python_version < "3.13"
+appnope==0.1.3 ; python_version >= "3.10" and python_version < "3.13" and (platform_system == "Darwin" or sys_platform == "darwin")
+asttokens==2.4.0 ; python_version >= "3.10" and python_version < "3.13"
+backcall==0.2.0 ; python_version >= "3.10" and python_version < "3.13"
+cachetools==5.3.1 ; python_version >= "3.10" and python_version < "3.13"
+certifi==2023.7.22 ; python_version >= "3.10" and python_version < "3.13"
+cffi==1.16.0 ; python_version >= "3.10" and python_version < "3.13" and implementation_name == "pypy"
+charset-normalizer==3.3.0 ; python_version >= "3.10" and python_version < "3.13"
+click==8.1.7 ; python_version >= "3.10" and python_version < "3.13"
+cloudpickle==2.2.1 ; python_version >= "3.10" and python_version < "3.13"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+comm==0.1.4 ; python_version >= "3.10" and python_version < "3.13"
+contourpy==1.1.1 ; python_version >= "3.10" and python_version < "3.13"
+cycler==0.12.0 ; python_version >= "3.10" and python_version < "3.13"
+debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13"
+decorator==5.1.1 ; python_version >= "3.10" and python_version < "3.13"
+docker-pycreds==0.4.0 ; python_version >= "3.10" and python_version < "3.13"
+exceptiongroup==1.1.3 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.0.0 ; python_version >= "3.10" and python_version < "3.13"
+farama-notifications==0.0.4 ; python_version >= "3.10" and python_version < "3.13"
+filelock==3.12.4 ; python_version >= "3.10" and python_version < "3.13"
+fonttools==4.43.0 ; python_version >= "3.10" and python_version < "3.13"
+fsspec==2023.9.2 ; python_version >= "3.10" and python_version < "3.13"
+gitdb==4.0.10 ; python_version >= "3.10" and python_version < "3.13"
+gitpython==3.1.37 ; python_version >= "3.10" and python_version < "3.13"
+google-auth-oauthlib==1.0.0 ; python_version >= "3.10" and python_version < "3.13"
+google-auth==2.23.2 ; python_version >= "3.10" and python_version < "3.13"
+grpcio==1.59.0 ; python_version >= "3.10" and python_version < "3.13"
+gym-notices==0.0.8 ; python_version >= "3.10" and python_version < "3.13"
+gym==0.26.2 ; python_version >= "3.10" and python_version < "3.13"
+gymnasium==0.29.1 ; python_version >= "3.10" and python_version < "3.13"
+idna==3.4 ; python_version >= "3.10" and python_version < "3.13"
+ipykernel==6.25.2 ; python_version >= "3.10" and python_version < "3.13"
+ipython==8.16.1 ; python_version >= "3.10" and python_version < "3.13"
+jedi==0.19.1 ; python_version >= "3.10" and python_version < "3.13"
+jinja2==3.1.2 ; python_version >= "3.10" and python_version < "3.13"
+jupyter-client==8.3.1 ; python_version >= "3.10" and python_version < "3.13"
+jupyter-core==5.3.2 ; python_version >= "3.10" and python_version < "3.13"
+kiwisolver==1.4.5 ; python_version >= "3.10" and python_version < "3.13"
+markdown==3.4.4 ; python_version >= "3.10" and python_version < "3.13"
+markupsafe==2.1.3 ; python_version >= "3.10" and python_version < "3.13"
+matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "3.13"
+matplotlib==3.8.0 ; python_version >= "3.10" and python_version < "3.13"
+mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.13"
+nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.13"
+networkx==3.1 ; python_version >= "3.10" and python_version < "3.13"
+numpy==1.26.0 ; python_version >= "3.10" and python_version < "3.13"
+oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "3.13"
+packaging==23.2 ; python_version >= "3.10" and python_version < "3.13"
+pandas==2.1.1 ; python_version >= "3.10" and python_version < "3.13"
+parso==0.8.3 ; python_version >= "3.10" and python_version < "3.13"
+pathtools==0.1.2 ; python_version >= "3.10" and python_version < "3.13"
+pexpect==4.8.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform != "win32"
+pickleshare==0.7.5 ; python_version >= "3.10" and python_version < "3.13"
+pillow==10.0.1 ; python_version >= "3.10" and python_version < "3.13"
+platformdirs==3.11.0 ; python_version >= "3.10" and python_version < "3.13"
+prompt-toolkit==3.0.39 ; python_version >= "3.10" and python_version < "3.13"
+protobuf==4.24.4 ; python_version >= "3.10" and python_version < "3.13"
+psutil==5.9.5 ; python_version >= "3.10" and python_version < "3.13"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform != "win32"
+pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "3.13"
+pyasn1-modules==0.3.0 ; python_version >= "3.10" and python_version < "3.13"
+pyasn1==0.5.0 ; python_version >= "3.10" and python_version < "3.13"
+pybind11==2.11.1 ; python_version >= "3.10" and python_version < "3.13"
+pycparser==2.21 ; python_version >= "3.10" and python_version < "3.13" and implementation_name == "pypy"
+pygments==2.16.1 ; python_version >= "3.10" and python_version < "3.13"
+pyparsing==3.1.1 ; python_version >= "3.10" and python_version < "3.13"
+python-box==7.1.1 ; python_version >= "3.10" and python_version < "3.13"
+python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "3.13"
+pytz==2023.3.post1 ; python_version >= "3.10" and python_version < "3.13"
+pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.13"
+pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "3.13"
+pyzmq==25.1.1 ; python_version >= "3.10" and python_version < "3.13"
+requests-oauthlib==1.3.1 ; python_version >= "3.10" and python_version < "3.13"
+requests==2.31.0 ; python_version >= "3.10" and python_version < "3.13"
+rsa==4.9 ; python_version >= "3.10" and python_version < "3.13"
+seaborn==0.13.0 ; python_version >= "3.10" and python_version < "3.13"
+sentry-sdk==1.31.0 ; python_version >= "3.10" and python_version < "3.13"
+setproctitle==1.3.3 ; python_version >= "3.10" and python_version < "3.13"
+setuptools-scm==8.0.4 ; python_version >= "3.10" and python_version < "3.13"
+setuptools==68.2.2 ; python_version >= "3.10" and python_version < "3.13"
+six==1.16.0 ; python_version >= "3.10" and python_version < "3.13"
+smmap==5.0.1 ; python_version >= "3.10" and python_version < "3.13"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version < "3.13"
+sympy==1.12 ; python_version >= "3.10" and python_version < "3.13"
+tensorboard-data-server==0.7.1 ; python_version >= "3.10" and python_version < "3.13"
+tensorboard==2.14.1 ; python_version >= "3.10" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.11"
+torch==2.1.0 ; python_version >= "3.10" and python_version < "3.13"
+tornado==6.3.3 ; python_version >= "3.10" and python_version < "3.13"
+traitlets==5.11.2 ; python_version >= "3.10" and python_version < "3.13"
+typing-extensions==4.8.0 ; python_version >= "3.10" and python_version < "3.13"
+tzdata==2023.3 ; python_version >= "3.10" and python_version < "3.13"
+urllib3==2.0.6 ; python_version >= "3.10" and python_version < "3.13"
+wandb==0.15.12 ; python_version >= "3.10" and python_version < "3.13"
+wcwidth==0.2.8 ; python_version >= "3.10" and python_version < "3.13"
+werkzeug==3.0.0 ; python_version >= "3.10" and python_version < "3.13"
diff --git a/scripts/cluster_scripts/run_imitation_cluster.py b/scripts/cluster_scripts/run_imitation_cluster.py
deleted file mode 100644
index be1263c0..00000000
--- a/scripts/cluster_scripts/run_imitation_cluster.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""Run sample factory experiments on a SLURM cluster."""
-import argparse
-import os
-import pathlib
-import shutil
-from datetime import datetime
-from subprocess import Popen
-
-from cfgs.config import PROJECT_PATH
-from scripts.cluster_scripts.utils import Overrides
-
-
-def make_code_snap(experiment, code_path, str_time):
-    """Copy code to directory to ensure that the run launches with correct commit.
-
-    Args:
-        experiment (str): Name of experiment
-        code_path (str): Path to where we are saving the code.
-        str_time (str): Unique time identifier used to distinguish
-                        experiments with same name.
-
-    Returns
-    -------
-        snap_dir (str): path to where the code has been copied.
-    """
-    if len(code_path) > 0:
-        snap_dir = pathlib.Path(code_path)
-    else:
-        snap_dir = pathlib.Path.cwd()
-    snap_dir /= str_time
-    snap_dir /= f'{experiment}'
-    snap_dir.mkdir(exist_ok=True, parents=True)
-
-    def copy_dir(dir, pat):
-        dst_dir = snap_dir / 'code' / dir
-        dst_dir.mkdir(exist_ok=True, parents=True)
-        for f in (src_dir / dir).glob(pat):
-            shutil.copy(f, dst_dir / f.name)
-
-    dirs_to_copy = [
-        '.', './cfgs/', './cfgs/algorithm', './cfgs/imitation',
-        './nocturne/envs/', './nocturne/pybind11',
-        '.examples/imitation_learning', './build'
-    ]
-    src_dir = pathlib.Path(PROJECT_PATH)
-    for dir in dirs_to_copy:
-        copy_dir(dir, '*.py')
-        copy_dir(dir, '*.yaml')
-
-    return snap_dir
-
-
-def main():
-    """Launch experiments on SLURM cluster by overriding Hydra config."""
-    username = os.environ["USER"]
-    parser = argparse.ArgumentParser()
-    parser.add_argument('experiment', type=str)
-    parser.add_argument('--code_path',
-                        default=f'/checkpoint/{username}/nocturne/il_runs')
-    parser.add_argument('--dry', action='store_true')
-    args = parser.parse_args()
-
-    now = datetime.now()
-    str_time = now.strftime('%Y.%m.%d_%H%M%S')
-    snap_dir = make_code_snap(args.experiment, args.code_path, str_time)
-    overrides = Overrides()
-    overrides.add('hydra/launcher', ['submitit_slurm'])
-    overrides.add('hydra.launcher.partition', ['learnlab'])
-    overrides.add('experiment', [args.experiment])
-    overrides.add('num_files', [1000])
-    overrides.add('epochs', [1400])
-    overrides.add('seed', [0, 1, 2, 3, 4])
-
-    cmd = [
-        'python',
-        str(snap_dir / 'code' / 'nocturne' / 'utils' / 'imitation_learning' /
-            'train.py'), '-m'
-    ]
-    print(cmd)
-    cmd += overrides.cmd()
-
-    if args.dry:
-        print(' '.join(cmd))
-    else:
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(snap_dir / 'code')
-        p = Popen(cmd, env=env)
-        p.communicate()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/cluster_scripts/run_ppo_cluster.py b/scripts/cluster_scripts/run_ppo_cluster.py
deleted file mode 100644
index fa9da6ab..00000000
--- a/scripts/cluster_scripts/run_ppo_cluster.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run on-policy PPO experiments on a SLURM cluster."""
-import argparse
-import os
-import pathlib
-import shutil
-from datetime import datetime
-from subprocess import Popen
-
-from cfgs.config import PROJECT_PATH
-from scripts.cluster_scripts.utils import Overrides
-
-
-def make_code_snap(experiment, code_path, slurm_dir='exp'):
-    """Copy code to directory to ensure that the run launches with correct commit.
-
-    Args:
-        experiment (str): Name of experiment
-        code_path (str): Path to where we are saving the code.
-        str_time (str): Unique time identifier used to distinguish
-                        experiments with same name.
-
-    Returns
-    -------
-        snap_dir (str): path to where the code has been copied.
-    """
-    now = datetime.now()
-    if len(code_path) > 0:
-        snap_dir = pathlib.Path(code_path) / slurm_dir
-    else:
-        snap_dir = pathlib.Path.cwd() / slurm_dir
-    snap_dir /= now.strftime('%Y.%m.%d')
-    snap_dir /= now.strftime('%H%M%S') + f'_{experiment}'
-    snap_dir.mkdir(exist_ok=True, parents=True)
-
-    def copy_dir(dir, pat):
-        dst_dir = snap_dir / 'code' / dir
-        dst_dir.mkdir(exist_ok=True, parents=True)
-        for f in (src_dir / dir).glob(pat):
-            shutil.copy(f, dst_dir / f.name)
-
-    dirs_to_copy = [
-        '.', './cfgs/', './cfgs/algo', './algos/', './algos/ppo/',
-        './algos/ppo/ppo_utils', './algos/ppo/r_mappo',
-        './algos/ppo/r_mappo/algorithm', './algos/ppo/utils',
-        '.nocturne/envs/', './nocturne_utils/', '.nocturne/python/', './build'
-    ]
-    src_dir = pathlib.Path(os.path.dirname(os.getcwd()))
-    for dir in dirs_to_copy:
-        copy_dir(dir, '*.py')
-        copy_dir(dir, '*.yaml')
-
-    return snap_dir
-
-
-def main():
-    """Launch experiments on SLURM cluster by overriding Hydra config."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('experiment', type=str)
-    parser.add_argument('--code_path',
-                        default='/checkpoint/eugenevinitsky/nocturne')
-    parser.add_argument('--dry', action='store_true')
-    args = parser.parse_args()
-
-    snap_dir = make_code_snap(args.experiment, args.code_path)
-    print(str(snap_dir))
-    overrides = Overrides()
-    overrides.add('hydra/launcher', ['submitit_slurm'])
-    overrides.add('hydra.launcher.partition', ['learnlab'])
-    overrides.add('experiment', [args.experiment])
-    # experiment parameters
-    overrides.add('episode_length', [200])
-    # algo
-    overrides.add('algo', ['ppo'])
-    overrides.add('algo.entropy_coef', [-0.001, 0.0, 0.001])
-    overrides.add('algo.n_rollout_threads', [128])
-    # rewards
-    overrides.add('rew_cfg.goal_achieved_bonus', [10, 50])
-    # misc
-    overrides.add('scenario_path',
-                  [PROJECT_PATH / 'scenarios/twenty_car_intersection.json'])
-
-    cmd = [
-        'python',
-        str(snap_dir / 'code' / 'algos' / 'ppo' / 'nocturne_runner.py'), '-m'
-    ]
-    print(cmd)
-    cmd += overrides.cmd()
-
-    if args.dry:
-        print(' '.join(cmd))
-    else:
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(snap_dir / 'code')
-        p = Popen(cmd, env=env)
-        p.communicate()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/cluster_scripts/run_rllib_cluster.py b/scripts/cluster_scripts/run_rllib_cluster.py
deleted file mode 100644
index c97961dc..00000000
--- a/scripts/cluster_scripts/run_rllib_cluster.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run rllib experiments on a SLURM cluster."""
-import argparse
-import os
-import pathlib
-import shutil
-from datetime import datetime
-from subprocess import Popen
-
-from cfgs.config import PROJECT_PATH
-from scripts.utils import Overrides
-
-
-def make_code_snap(experiment, code_path, str_time):
-    """Copy code to directory to ensure that the run launches with correct commit.
-
-    Args:
-        experiment (str): Name of experiment
-        code_path (str): Path to where we are saving the code.
-        str_time (str): Unique time identifier used to distinguish
-                        experiments with same name.
-
-    Returns
-    -------
-        snap_dir (str): path to where the code has been copied.
-    """
-    if len(code_path) > 0:
-        snap_dir = pathlib.Path(code_path)
-    else:
-        snap_dir = pathlib.Path.cwd()
-    snap_dir /= str_time
-    snap_dir /= f'{experiment}'
-    snap_dir.mkdir(exist_ok=True, parents=True)
-
-    def copy_dir(dir, pat):
-        dst_dir = snap_dir / 'code' / dir
-        dst_dir.mkdir(exist_ok=True, parents=True)
-        for f in (src_dir / dir).glob(pat):
-            shutil.copy(f, dst_dir / f.name)
-
-    dirs_to_copy = [
-        '.', './cfgs/', './examples/', './cfgs/algorithm', './envs/',
-        './nocturne_utils/', './python/', './scenarios/', './build'
-    ]
-    src_dir = pathlib.Path(PROJECT_PATH)
-    for dir in dirs_to_copy:
-        copy_dir(dir, '*.py')
-        copy_dir(dir, '*.yaml')
-
-    return snap_dir
-
-
-def main():
-    """Launch experiments on SLURM cluster by overriding Hydra config."""
-    username = os.environ["USER"]
-    parser = argparse.ArgumentParser()
-    parser.add_argument('experiment', type=str)
-    parser.add_argument(
-        '--code_path',
-        default=f'/checkpoint/{username}/nocturne/sample_factory_runs')
-    parser.add_argument('--dry', action='store_true')
-    args = parser.parse_args()
-
-    now = datetime.now()
-    str_time = now.strftime('%Y.%m.%d_%H%M%S')
-    snap_dir = make_code_snap(args.experiment, args.code_path, str_time)
-    overrides = Overrides()
-    overrides.add('hydra/launcher', ['ray'])
-    overrides.add('hydra.launcher.partition', ['learnlab'])
-
-    cmd = [
-        'python',
-        str(snap_dir / 'code' / 'examples' / 'run_rllib.py'), '-m'
-    ]
-    cmd += overrides.cmd()
-    print(cmd)
-
-    if args.dry:
-        print(' '.join(cmd))
-    else:
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(snap_dir / 'code')
-        p = Popen(cmd, env=env)
-        p.communicate()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/cluster_scripts/run_sample_factory_cluster.py b/scripts/cluster_scripts/run_sample_factory_cluster.py
deleted file mode 100644
index a313be12..00000000
--- a/scripts/cluster_scripts/run_sample_factory_cluster.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run sample factory experiments on a SLURM cluster."""
-import argparse
-import os
-import pathlib
-import shutil
-from datetime import datetime
-from subprocess import Popen
-
-from cfgs.config import PROJECT_PATH
-from scripts.cluster_scripts.utils import Overrides
-
-
-def make_code_snap(experiment, code_path, str_time):
-    """Copy code to directory to ensure that the run launches with correct commit.
-
-    Args:
-        experiment (str): Name of experiment
-        code_path (str): Path to where we are saving the code.
-        str_time (str): Unique time identifier used to distinguish
-                        experiments with same name.
-
-    Returns
-    -------
-        snap_dir (str): path to where the code has been copied.
-    """
-    if len(code_path) > 0:
-        snap_dir = pathlib.Path(code_path)
-    else:
-        snap_dir = pathlib.Path.cwd()
-    snap_dir /= str_time
-    snap_dir /= f'{experiment}'
-    snap_dir.mkdir(exist_ok=True, parents=True)
-
-    def copy_dir(dir, pat):
-        dst_dir = snap_dir / 'code' / dir
-        dst_dir.mkdir(exist_ok=True, parents=True)
-        for f in (src_dir / dir).glob(pat):
-            shutil.copy(f, dst_dir / f.name)
-
-    dirs_to_copy = [
-        '.', './cfgs/', './examples/', './examples/sample_factory_files',
-        './cfgs/algorithm', './nocturne/envs/', './nocturne_utils/',
-        './nocturne/python/', './scenarios/', './build'
-    ]
-    src_dir = pathlib.Path(PROJECT_PATH)
-    for dir in dirs_to_copy:
-        copy_dir(dir, '*.py')
-        copy_dir(dir, '*.yaml')
-
-    return snap_dir
-
-
-def main():
-    """Launch experiments on SLURM cluster by overriding Hydra config."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('experiment', type=str)
-    parser.add_argument(
-        '--code_path',
-        default='/checkpoint/eugenevinitsky/nocturne/sample_factory_runs')
-    parser.add_argument('--dry', action='store_true')
-    args = parser.parse_args()
-
-    now = datetime.now()
-    str_time = now.strftime('%Y.%m.%d_%H%M%S')
-    snap_dir = make_code_snap(args.experiment, args.code_path, str_time)
-    overrides = Overrides()
-    overrides.add('hydra/launcher', ['submitit_slurm'])
-    overrides.add('hydra.launcher.partition', ['learnlab'])
-    overrides.add('experiment', [args.experiment])
-    overrides.add('num_files', [10000])
-    overrides.add('seed', [0, 1, 2, 3, 4])
-    overrides.add('scenario.max_visible_road_points', [500])
-    overrides.add('rew_cfg.collision_penalty', [0, -80.0])
-
-    cmd = [
-        'python',
-        str(snap_dir / 'code' / 'examples' / 'sample_factory_files' /
-            'run_sample_factory.py'), '-m', 'algorithm=APPO'
-    ]
-    print(cmd)
-    cmd += overrides.cmd()
-
-    if args.dry:
-        print(' '.join(cmd))
-    else:
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(snap_dir / 'code')
-        p = Popen(cmd, env=env)
-        p.communicate()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/cluster_scripts/utils.py b/scripts/cluster_scripts/utils.py
deleted file mode 100644
index 21be3246..00000000
--- a/scripts/cluster_scripts/utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Storage for SLURM running utilities."""
-
-
-class Overrides(object):
-    """Utility class used to convert commands into a bash runnable string."""
-
-    def __init__(self):
-        """Initialize class."""
-        self.kvs = dict()
-
-    def add(self, key, values):
-        """Add each of the desired key value pairs into a dict."""
-        value = ','.join(str(v) for v in values)
-        assert key not in self.kvs
-        self.kvs[key] = value
-
-    def cmd(self):
-        """Append the keys together into a command that can be run."""
-        cmd = []
-        for k, v in self.kvs.items():
-            cmd.append(f'{k}={v}')
-        return cmd
diff --git a/scripts/data_analysis/corner_case_search.py b/scripts/data_analysis/corner_case_search.py
deleted file mode 100644
index f181d6b5..00000000
--- a/scripts/data_analysis/corner_case_search.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run through the data to look for cases where there are undesirable corner cases.
-
-The cases we currently check for are:
-1) is a vehicle initialized in a colliding state with another vehicle
-2) is a vehicle initialized in a colliding state with a road edge?
-"""
-from copy import deepcopy
-from pathlib import Path
-import os
-import sys
-
-import hydra
-import imageio
-import matplotlib.pyplot as plt
-import numpy as np
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, \
-    get_scenario_dict, set_display_window
-from nocturne import Simulation
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def main(cfg):
-    """See file docstring."""
-    set_display_window()
-    SAVE_IMAGES = False
-    MAKE_MOVIES = False
-    output_folder = 'corner_case_vis'
-    output_path = Path(PROJECT_PATH) / f'nocturne_utils/{output_folder}'
-    output_path.mkdir(exist_ok=True)
-    files = list(os.listdir(PROCESSED_TRAIN_NO_TL))
-    files = [file for file in files if 'tfrecord' in file]
-    # track the number of collisions at each time-step
-    collide_counter = np.zeros((2, 90))
-    file_has_veh_collision_counter = 0
-    file_has_edge_collision_counter = 0
-    total_edge_collision_counter = 0
-    total_veh_collision_counter = 0
-    initialized_collision_counter = 0
-    total_veh_counter = 0
-
-    start_cfg = deepcopy(cfg)
-    start_cfg['scenario']['start_time'] = 0
-    start_cfg['scenario']['allow_non_vehicles'] = False
-    for file_idx, file in enumerate(files):
-        found_collision = False
-        edge_collision = False
-        sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file),
-                         get_scenario_dict(cfg))
-        vehs = sim.getScenario().getObjectsThatMoved()
-        # this checks if the vehicles has actually moved any distance at all
-        valid_vehs = []
-        for veh in vehs:
-            veh.expert_control = True
-            obj_pos = veh.getPosition()
-            obj_pos = np.array([obj_pos.x, obj_pos.y])
-            goal_pos = veh.getGoalPosition()
-            goal_pos = np.array([goal_pos.x, goal_pos.y])
-            if np.linalg.norm(obj_pos - goal_pos) > 0.5:
-                valid_vehs.append(veh)
-        veh_edge_collided = [False for _ in vehs]
-        veh_veh_collided = [False for _ in vehs]
-        initialized_collided = [False for _ in vehs]
-        for time_index in range(90):
-            for veh_index, veh in enumerate(valid_vehs):
-                collided = veh.getCollided()
-                if collided and not np.isclose(veh.getPosition().x, -10000.0):
-                    collide_counter[int(veh.collision_type) - 1,
-                                    time_index] += 1
-                    if int(veh.collision_type) == 2:
-                        veh_edge_collided[veh_index] = True
-                    if int(veh.collision_type) == 1:
-                        veh_veh_collided[veh_index] = True
-                    if time_index == 0:
-                        initialized_collided[veh_index] = True
-                if np.isclose(veh.getPosition().x, -10000.0):
-                    collided = False
-                if time_index == 0 and not found_collision and collided and SAVE_IMAGES:
-                    img = sim.getScenario().getImage(
-                        img_width=1600,
-                        img_height=1600,
-                        draw_target_positions=True,
-                        padding=50.0,
-                    )
-                    fig = plt.figure()
-                    plt.imshow(img)
-                    plt.savefig(f'{output_folder}/{file}.png')
-                    plt.close(fig)
-                if not found_collision and collided:
-                    found_collision = True
-                    if int(veh.collision_type) == 1:
-                        file_has_veh_collision_counter += 1
-                    else:
-                        file_has_edge_collision_counter += 1
-                        edge_collision = True
-            sim.step(0.1)
-        total_veh_counter += len(valid_vehs)
-        total_edge_collision_counter += np.sum(veh_edge_collided)
-        total_veh_collision_counter += np.sum(veh_veh_collided)
-        initialized_collision_counter += np.sum(initialized_collided)
-        print(f'at file {file_idx} we have {collide_counter} collisions for a\
-                 ratio of {collide_counter / (file_idx + 1)}')
-        print(f'the number of files that have a veh collision at all is\
-                 {file_has_veh_collision_counter / (file_idx + 1)}')
-        print(f'the number of files that have a edge collision at all is\
-                 {file_has_edge_collision_counter / (file_idx + 1)}')
-        print(f'the fraction of vehicles that have had an edge collision\
-                is {total_edge_collision_counter / total_veh_counter}')
-        print(f'the fraction of vehicles that have had a collision at all\
-                is {(total_edge_collision_counter + total_veh_collision_counter) / total_veh_counter}'
-              )
-        print(
-            f'the fraction of vehicles that are initialized in collision are \
-                {initialized_collision_counter / total_veh_counter}')
-        if found_collision and edge_collision and MAKE_MOVIES:
-            movie_frames = []
-            fig = plt.figure()
-            sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file),
-                             get_scenario_dict(start_cfg))
-            vehs = sim.getScenario().getObjectsThatMoved()
-            for veh in vehs:
-                veh.expert_control = True
-            for time_index in range(89):
-                movie_frames.append(sim.getScenario().getImage(
-                    img_width=1600, img_height=1600))
-                sim.step(0.1)
-            movie_frames = np.array(movie_frames)
-            imageio.mimwrite(f'{output_path}/{os.path.basename(file)}.mp4',
-                             movie_frames,
-                             fps=10)
-            if file_has_edge_collision_counter + file_has_veh_collision_counter > 10:
-                sys.exit()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/data_analysis/data_analysis.py b/scripts/data_analysis/data_analysis.py
deleted file mode 100644
index aab91bf1..00000000
--- a/scripts/data_analysis/data_analysis.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Utils that we use to understand the datasets we are working with."""
-import os
-
-import hydra
-import matplotlib.pyplot as plt
-import numpy as np
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, get_scenario_dict
-from nocturne import Simulation
-
-
-def run_analysis(cfg, files):
-    """Compute the expert accelerations and number of vehicles across the dataset.
-
-    Args:
-        files ([str]): List of files to analyze
-
-    Returns
-    -------
-        [np.float], [np.float]: List of expert accels, list of number
-                                of moving vehicles in file
-    """
-    observed_accels = []
-    num_vehicles = []
-    cfg['start_time'] = 0
-    cfg['allow_non_vehicles'] = False
-    for file_idx, file in enumerate(files):
-        sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file),
-                         get_scenario_dict(cfg))
-        vehs = sim.scenario().getObjectsThatMoved()
-        # this checks if the vehicles has actually moved any distance at all
-        valid_vehs = []
-        prev_speeds = []
-        for veh in vehs:
-            veh.expert_control = True
-            obj_pos = veh.position
-            goal_pos = veh.target_position
-            if (obj_pos - goal_pos).norm() > 0.5:
-                valid_vehs.append(veh)
-            if veh in valid_vehs:
-                veh_speed = sim.scenario().getExpertSpeeds(0, veh.id)
-                veh_speed = np.linalg.norm([veh_speed.x, veh_speed.y])
-                if not np.isclose(veh.position.x, -10000.0):
-                    prev_speeds.append(
-                        (veh_speed, True, [veh.position.x, veh.position.y], 0))
-                else:
-                    prev_speeds.append(
-                        (veh_speed, False, [veh.position.x,
-                                            veh.position.y], 0))
-        num_vehicles.append(len(valid_vehs))
-        sim.step(0.1)
-        for i in range(1, 90):
-            for veh_index, veh in enumerate(valid_vehs):
-                # check if the vehicle is actually valid
-                veh_speed = sim.scenario().getExpertSpeeds(i, veh.id)
-                veh_speed = veh_speed.norm()
-                if np.isclose(veh.position.x, -10000.0):
-                    prev_speeds[veh_index] = (veh_speed, False,
-                                              [veh.position.x,
-                                               veh.position.y], i)
-                else:
-                    # approximate the accel using an euler step but only
-                    # if the prior step was a step where the agent
-                    # was valid
-                    if prev_speeds[veh_index][1]:
-                        accel = (veh_speed - prev_speeds[veh_index][0]) / 0.1
-                        observed_accels.append(accel)
-                    prev_speeds[veh_index] = (veh_speed, True,
-                                              [veh.position.x,
-                                               veh.position.y], i)
-            sim.step(0.1)
-
-        if file_idx > 300:
-            break
-    return observed_accels, num_vehicles
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def analyze_accels(cfg):
-    """Plot the expert accels and number of observed moving vehicles."""
-    f_path = PROCESSED_TRAIN_NO_TL
-    with open(os.path.join(f_path, 'valid_files.txt')) as file:
-        files = [line.strip() for line in file]
-    observed_accels_valid, num_vehicles_valid = run_analysis(cfg, files)
-    with open(os.path.join(f_path, 'invalid_files.txt')) as file:
-        files = [line.strip() for line in file]
-    _, num_vehicles_invalid = run_analysis(cfg, files)
-
-    output_path = os.path.join(PROJECT_PATH, 'nocturne_utils/data_analysis')
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    observed_accels = np.array(observed_accels_valid)
-    print(np.max(observed_accels))
-    print(np.min(observed_accels))
-    observed_accels = observed_accels[np.abs(observed_accels) < 5]
-    plt.figure()
-    plt.hist(observed_accels)
-    plt.savefig(os.path.join(output_path, 'observed_accels.png'))
-    plt.figure()
-    plt.hist(
-        num_vehicles_valid,
-        bins=30,
-        density=True,
-        histtype='step',
-        cumulative=True,
-    )
-    plt.hist(
-        num_vehicles_invalid,
-        bins=30,
-        density=True,
-        histtype='step',
-        cumulative=True,
-    )
-    plt.legend(['valid', 'invalid'])
-    plt.savefig(os.path.join(output_path, 'num_vehs_cdf.png'))
-    plt.figure()
-    plt.hist(num_vehicles_valid, bins=30, alpha=0.5, color='b')
-    plt.axvline(np.mean(num_vehicles_valid), color='b', label='_nolegend_')
-    plt.hist(num_vehicles_invalid, bins=30, alpha=0.5, color='r')
-    plt.axvline(np.mean(num_vehicles_invalid), color='r', label='_nolegend_')
-    plt.legend(['valid', 'invalid'])
-    plt.savefig(os.path.join(output_path, 'num_vehs_hist.png'))
-
-
-if __name__ == '__main__':
-    analyze_accels()
diff --git a/scripts/data_analysis/speed_test.py b/scripts/data_analysis/speed_test.py
deleted file mode 100644
index 6b51383c..00000000
--- a/scripts/data_analysis/speed_test.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Utils that we use to understand the datasets we are working with."""
-import json
-import os
-import time
-
-import hydra
-import numpy as np
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, get_scenario_dict, set_display_window
-from nocturne import Simulation, Action
-
-
-def run_speed_test(files, cfg):
-    """Compute the expert accelerations and number of vehicles across the dataset.
-
-    Args:
-        files ([str]): List of files to analyze
-
-    Returns
-    -------
-        [np.float], [np.float]: List of expert accels, list of number
-                                of moving vehicles in file
-    """
-    times_list = []
-    for file in files:
-        sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file),
-                         get_scenario_dict(cfg))
-        vehs = sim.scenario().getObjectsThatMoved()
-        scenario = sim.getScenario()
-        veh = vehs[np.random.randint(len(vehs))]
-        t = time.perf_counter()
-        _ = scenario.flattened_visible_state(veh, 80, (180 / 180) * np.pi)
-        veh.apply_action(Action(1.0, 1.0, 1.0))
-        sim.step(0.1)
-        times_list.append(time.perf_counter() - t)
-    print('avg, std. time to get obs is {}, {}'.format(np.mean(times_list),
-                                                       np.std(times_list)))
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def analyze_accels(cfg):
-    """Plot the expert accels and number of observed moving vehicles."""
-    f_path = PROCESSED_TRAIN_NO_TL
-    with open(os.path.join(f_path, 'valid_files.json')) as file:
-        valid_veh_dict = json.load(file)
-        files = list(valid_veh_dict.keys())
-    run_speed_test(files[0:10], cfg)
-
-
-if __name__ == '__main__':
-    set_display_window()
-    analyze_accels()
diff --git a/scripts/json_generation/make_solvable_files.py b/scripts/json_generation/make_solvable_files.py
deleted file mode 100644
index 97fcf52c..00000000
--- a/scripts/json_generation/make_solvable_files.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Find all cases where collisions are required to achieve the goal.
-
-Due to errors in Waymo labeling, some space that is crossable is mistakenly
-labeled as a road edge. This file finds most of those cases.
-"""
-import argparse
-import json
-import multiprocessing
-from multiprocessing import Process, Lock
-import os
-
-import numpy as np
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, \
-    get_default_scenario_dict, set_display_window
-from nocturne import Simulation
-
-
-def is_file_valid(file_list, output_file, output_file_invalid, lock=None):
-    """Test if file requires an agent to collide with a road edge to get to goal.
-
-    We test for this by making the agent have very thin width. If an agent
-    is in collision with a road edge despite this thin width, it was crossing
-    that road edge because that road edge was on the way to its goal. We also
-    shrink the length to avoid the cases where the vehicle is initialized
-    in collision with a road edge.
-
-    If a file has more than 80% of the agents need to collide with a road edge to get
-    to goal, we store it in output_file_invalid instead.
-
-    Args
-    ----
-        file_list ([str]): list of file paths.
-        output_file (str): file to store valid json names.
-        output_file_invalid (_type_): file to store invalid json names.
-        lock (Lock, optional): Lock used for safe file writing.
-    """
-    file_valid_dict = {}
-    file_invalid_dict = {}
-    cfg = get_default_scenario_dict()
-    cfg['start_time'] = 0
-    cfg['allow_non_vehicles'] = False
-    for i, file in enumerate(file_list):
-        sim = Simulation(str(file), cfg)
-        vehs = sim.scenario().getObjectsThatMoved()
-        for veh in vehs:
-            # we shrink the vehicle width and length to tiny values.
-            # then, if a vehicle collides with a road edge, we know it had to
-            # cross that road edge to actually get to its goal
-            veh._scale_shape(length_scale=0.3, width_scale=0.1)
-            veh.expert_control = True
-        # dict tracking which vehicles were forced to collide with
-        # an edge on their way to goal
-        veh_edge_collided = {veh.id: False for veh in vehs}
-        for _ in range(90):
-            for veh in vehs:
-                collided = veh.collided
-                # the second conditions check whether the
-                # the vehicle has "collided", but only because
-                # it was invalid at the same time as another
-                # vehicle was invalid
-                if collided and not np.isclose(veh.position.x, -10000.0):
-                    if int(veh.collision_type) == 2:
-                        veh_edge_collided[veh.id] = True
-            sim.step(0.1)
-        # write all the vehicle ids that had a collision to a file
-        # so that we know which vehicles should be set to be experts
-        # if more than 80% of the vehicles are experts, we throw the file
-        # away
-        if np.sum(list(
-                veh_edge_collided.values())) / len(veh_edge_collided) < 0.8:
-            storage = file_valid_dict
-        else:
-            storage = file_invalid_dict
-        storage[str(file).split('/')[-1]] = [
-            key for key, val in veh_edge_collided.items() if val
-        ]
-
-    for file, return_dict in zip([output_file, output_file_invalid],
-                                 [file_valid_dict, file_invalid_dict]):
-        if lock is not None:
-            lock.acquire()
-        with open(file, 'r') as fp:
-            temp_dict = json.load(fp)
-        with open(file, 'w') as fp:
-            temp_dict.update(return_dict)
-            json.dump(temp_dict, fp, indent=4)
-        if lock is not None:
-            lock.release()
-
-
-def main():
-    """See file docstring."""
-    set_display_window()
-    parser = argparse.ArgumentParser(
-        description="Load and show waymo scenario data.")
-    parser.add_argument(
-        "--parallel",
-        action='store_true',
-        help="If true, split the conversion up over multiple processes")
-    parser.add_argument(
-        "--n_processes",
-        type=int,
-        default=40,
-        help="Number of processes over which to split file generation")
-    parser.add_argument("--datatype",
-                        default='train',
-                        type=str,
-                        choices=['train', 'valid'],
-                        nargs='+',
-                        help="Whether to convert, train or valid data")
-
-    args = parser.parse_args()
-    # TODO(eugenevinitsky) this currently assumes that we have
-    # constructed the scenes without traffic lights and not
-    # other scenes
-    folders_to_convert = []
-    if 'train' in args.datatype:
-        folders_to_convert.append(PROCESSED_TRAIN_NO_TL)
-    if 'valid' in args.datatype:
-        folders_to_convert.append(PROCESSED_VALID_NO_TL)
-
-    lock = Lock()
-    for folder_path in folders_to_convert:
-        files = os.listdir(folder_path)
-        files = [
-            os.path.join(folder_path, file) for file in files
-            if 'tfrecord' in file
-        ]
-
-        output_file = os.path.join(folder_path, 'valid_files.json')
-        with open(output_file, 'w') as fp:
-            json.dump({}, fp)
-
-        output_file_invalid = os.path.join(folder_path, 'invalid_files.json')
-        with open(output_file_invalid, 'w') as fp:
-            json.dump({}, fp)
-
-        if args.parallel:
-            # leave some cpus free but have at least one and don't use more than n_processes
-            num_cpus = min(max(multiprocessing.cpu_count() - 2, 1),
-                           args.n_processes)
-            num_files = len(files)
-            process_list = []
-            for i in range(num_cpus):
-                p = Process(target=is_file_valid,
-                            args=[
-                                files[i * num_files // num_cpus:(i + 1) *
-                                      num_files // num_cpus], output_file,
-                                output_file_invalid, lock
-                            ])
-                p.start()
-                process_list.append(p)
-
-            for process in process_list:
-                process.join()
-        else:
-            is_file_valid(files, output_file, output_file_invalid, lock=None)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/json_generation/run_waymo_constructor.py b/scripts/json_generation/run_waymo_constructor.py
deleted file mode 100644
index 1f0579fd..00000000
--- a/scripts/json_generation/run_waymo_constructor.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Utils for converting TFRecords into Nocturne compatible JSON."""
-import argparse
-from pathlib import Path
-import os
-import multiprocessing
-
-from cfgs.config import TRAIN_DATA_PATH, VALID_DATA_PATH, PROCESSED_TRAIN_NO_TL, \
-    PROCESSED_VALID_NO_TL, PROCESSED_TRAIN, PROCESSED_VALID
-import waymo_scenario_construction as waymo
-
-
-def convert_files(args, files, output_dir, rank):
-    """Convert the list of files into nocturne compatible JSON.
-
-    Args
-    ----
-        args (NameSpace): args from the argument parser.
-        files ([str]): list of file paths for TFRecords that we should convert
-        output_dir (str): output path in which we should store the JSON
-        rank (int): rank of the process.
-    """
-    cnt = 0
-    for file in files:
-        inner_count = 0
-        for data in waymo.load_protobuf(str(file)):
-            file_name = os.path.basename(file).split(
-                '.')[1] + f'_{inner_count}.json'
-            # this file is useful for debugging
-            if args.output_txt and cnt == 0 and rank == 0:
-                with open(os.path.basename(file).split('.')[1] + '.txt',
-                          'w') as f:
-                    f.write(str(data))
-            waymo.waymo_to_scenario(os.path.join(output_dir, file_name), data,
-                                    args.no_tl)
-            inner_count += 1
-            cnt += 1
-            if cnt >= args.num and not args.all_files:
-                break
-        print(inner_count)
-
-
-def main():
-    """Run the json generators."""
-    parser = argparse.ArgumentParser(
-        description="Load and show waymo scenario data.")
-    parser.add_argument("--file",
-                        type=str,
-                        default=os.path.join(
-                            TRAIN_DATA_PATH,
-                            'training.tfrecord-00995-of-01000'))
-    parser.add_argument("--num", type=int, default=1)
-    parser.add_argument("--output_txt",
-                        action='store_true',
-                        help='output a txt version of one of the protobufs')
-    parser.add_argument("--all_files",
-                        action='store_true',
-                        help='If true, iterate through the whole dataset')
-    parser.add_argument("--no_tl",
-                        action='store_true',
-                        help="If true, do not generate JSON files\
-             that have a traffic light in them")
-    parser.add_argument(
-        "--parallel",
-        action='store_true',
-        help="If true, split the conversion up over multiple processes")
-    parser.add_argument("--datatype",
-                        default='train',
-                        type=str,
-                        choices=['train', 'valid'],
-                        nargs='+',
-                        help="Whether to convert, train or valid data")
-
-    args = parser.parse_args()
-    folders_to_convert = []
-    if 'train' in args.datatype:
-        folders_to_convert.append(
-            (TRAIN_DATA_PATH,
-             PROCESSED_TRAIN_NO_TL if args.no_tl else PROCESSED_TRAIN))
-    if 'valid' in args.datatype:
-        folders_to_convert.append(
-            (VALID_DATA_PATH,
-             PROCESSED_VALID_NO_TL if args.no_tl else PROCESSED_VALID))
-
-    for folder_path, output_dir in folders_to_convert:
-        if args.num > 1 or args.all_files:
-            files = list(Path(folder_path).glob('*tfrecord*'))
-            if not os.path.exists(output_dir):
-                os.makedirs(output_dir)
-            if not args.all_files:
-                files = files[0:args.num]
-
-        else:
-            output_dir = os.getcwd()
-            files = [args.file]
-
-        if args.parallel:
-            # leave some cpus free but have at least one and don't use more than 40
-            num_cpus = min(max(multiprocessing.cpu_count() - 2, 1), 40)
-            num_files = len(files)
-            process_list = []
-            for i in range(num_cpus):
-                p = multiprocessing.Process(
-                    target=convert_files,
-                    args=[
-                        args, files[i * num_files // num_cpus:(i + 1) *
-                                    num_files // num_cpus], output_dir, i
-                    ])
-                p.start()
-                process_list.append(p)
-
-            for process in process_list:
-                process.join()
-        else:
-            convert_files(args, files, output_dir, rank=0)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/json_generation/waymo_scenario_construction.py b/scripts/json_generation/waymo_scenario_construction.py
deleted file mode 100644
index 29406b44..00000000
--- a/scripts/json_generation/waymo_scenario_construction.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Construct a scenarios.json file from a waymos protobuf."""
-
-from collections import defaultdict
-import math
-import json
-from typing import Any, Dict, Iterator, Optional
-
-import tensorflow as tf
-from waymo_open_dataset.protos import map_pb2, scenario_pb2
-
-from cfgs.config import ERR_VAL
-
-_WAYMO_OBJECT_STR = {
-    scenario_pb2.Track.TYPE_UNSET: "unset",
-    scenario_pb2.Track.TYPE_VEHICLE: "vehicle",
-    scenario_pb2.Track.TYPE_PEDESTRIAN: "pedestrian",
-    scenario_pb2.Track.TYPE_CYCLIST: "cyclist",
-    scenario_pb2.Track.TYPE_OTHER: "other",
-}
-
-_WAYMO_ROAD_STR = {
-    map_pb2.TrafficSignalLaneState.LANE_STATE_UNKNOWN: "unknown",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_STOP: "arrow_stop",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_CAUTION: "arrow_caution",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_GO: "arrow_go",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_STOP: "stop",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_CAUTION: "caution",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_GO: "go",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_FLASHING_STOP: "flashing_stop",
-    map_pb2.TrafficSignalLaneState.LANE_STATE_FLASHING_CAUTION:
-    "flashing_caution",
-}
-
-
-def _parse_object_state(
-        states: scenario_pb2.ObjectState,
-        final_state: scenario_pb2.ObjectState) -> Dict[str, Any]:
-    """Construct a dict representing the trajectory and goals of an object.
-
-    Args:
-        states (scenario_pb2.ObjectState): Protobuf of object state
-        final_state (scenario_pb2.ObjectState): Protobuf of last valid object state.
-
-    Returns
-    -------
-        Dict[str, Any]: Dict representing an object.
-    """
-    return {
-        "position": [{
-            "x": state.center_x,
-            "y": state.center_y
-        } if state.valid else {
-            "x": ERR_VAL,
-            "y": ERR_VAL
-        } for state in states],
-        "width":
-        final_state.width,
-        "length":
-        final_state.length,
-        "heading": [
-            math.degrees(state.heading) if state.valid else ERR_VAL
-            for state in states
-        ],  # Use rad here?
-        "velocity": [{
-            "x": state.velocity_x,
-            "y": state.velocity_y
-        } if state.valid else {
-            "x": ERR_VAL,
-            "y": ERR_VAL
-        } for state in states],
-        "valid": [state.valid for state in states],
-        "goalPosition": {
-            "x": final_state.center_x,
-            "y": final_state.center_y
-        }
-    }
-
-
-def _init_tl_object(track):
-    """Construct a dict representing the traffic light states."""
-    returned_dict = {}
-    for lane_state in track.lane_states:
-        returned_dict[lane_state.lane] = {
-            'state': _WAYMO_ROAD_STR[lane_state.state],
-            'x': lane_state.stop_point.x,
-            'y': lane_state.stop_point.y
-        }
-    return returned_dict
-
-
-def _init_object(track: scenario_pb2.Track) -> Optional[Dict[str, Any]]:
-    """Construct a dict representing the state of the object (vehicle, cyclist, pedestrian).
-
-    Args:
-        track (scenario_pb2.Track): protobuf representing the scenario
-
-    Returns
-    -------
-        Optional[Dict[str, Any]]: dict representing the trajectory and velocity of an object.
-    """
-    final_valid_index = 0
-    for i, state in enumerate(track.states):
-        if state.valid:
-            final_valid_index = i
-
-    obj = _parse_object_state(track.states, track.states[final_valid_index])
-    obj["type"] = _WAYMO_OBJECT_STR[track.object_type]
-    return obj
-
-
-def _init_road(map_feature: map_pb2.MapFeature) -> Optional[Dict[str, Any]]:
-    """Convert an element of the map protobuf to a dict representing its coordinates and type."""
-    feature = map_feature.WhichOneof("feature_data")
-    if feature == 'stop_sign':
-        p = getattr(map_feature,
-                    map_feature.WhichOneof("feature_data")).position
-        geometry = [{"x": p.x, "y": p.y}]
-    elif feature != 'crosswalk' and feature != 'speed_bump':
-        geometry = [{
-            "x": p.x,
-            "y": p.y
-        } for p in getattr(map_feature, map_feature.WhichOneof(
-            "feature_data")).polyline]
-    else:
-        geometry = [{
-            "x": p.x,
-            "y": p.y
-        } for p in getattr(map_feature, map_feature.WhichOneof(
-            "feature_data")).polygon]
-    return {
-        "geometry": geometry,
-        "type": map_feature.WhichOneof("feature_data"),
-    }
-
-
-def load_protobuf(protobuf_path: str) -> Iterator[scenario_pb2.Scenario]:
-    """Yield the sharded protobufs from the TFRecord."""
-    dataset = tf.data.TFRecordDataset(protobuf_path, compression_type="")
-    for data in dataset:
-        scenario = scenario_pb2.Scenario()
-        scenario.ParseFromString(bytearray(data.numpy()))
-        yield scenario
-
-
-def waymo_to_scenario(scenario_path: str,
-                      protobuf: scenario_pb2.Scenario,
-                      no_tl: bool = False) -> None:
-    """Dump a JSON File containing the protobuf parsed into the right format.
-
-    Args
-    ----
-        scenario_path (str): path to dump the json file
-        protobuf (scenario_pb2.Scenario): the protobuf we are converting
-        no_tl (bool, optional): If true, environments with traffic lights are not dumped.
-    """
-    # read the protobuf file to get the right state
-
-    # write the json file
-    # construct the road geometries
-    # place the initial position of the vehicles
-
-    # Construct the traffic light states
-    tl_dict = defaultdict(lambda: {
-        'state': [],
-        'x': [],
-        'y': [],
-        'time_index': []
-    })
-    all_keys = ['state', 'x', 'y']
-    i = 0
-    for dynamic_map_state in protobuf.dynamic_map_states:
-        traffic_light_dict = _init_tl_object(dynamic_map_state)
-        # there is a traffic light but we don't want traffic light scenes so just return
-        if (no_tl and len(traffic_light_dict) > 0):
-            return
-        for id, value in traffic_light_dict.items():
-            for state_key in all_keys:
-                tl_dict[id][state_key].append(value[state_key])
-            tl_dict[id]['time_index'].append(i)
-        i += 1
-
-    # Construct the object states
-    objects = []
-    for track in protobuf.tracks:
-        obj = _init_object(track)
-        if obj is not None:
-            objects.append(obj)
-
-    # Construct the map states
-    roads = []
-    for map_feature in protobuf.map_features:
-        road = _init_road(map_feature)
-        if road is not None:
-            roads.append(road)
-
-    scenario = {
-        "name": scenario_path.split('/')[-1],
-        "objects": objects,
-        "roads": roads,
-        "tl_states": tl_dict
-    }
-    with open(scenario_path, "w") as f:
-        json.dump(scenario, f)
diff --git a/scripts/paper_plots/README.md b/scripts/paper_plots/README.md
deleted file mode 100644
index 2787e62b..00000000
--- a/scripts/paper_plots/README.md
+++ /dev/null
@@ -1 +0,0 @@
-This folder is used to reproduce all the plots from paper TO BE TITLED.
\ No newline at end of file
diff --git a/scripts/paper_plots/create_zsc_plot.py b/scripts/paper_plots/create_zsc_plot.py
deleted file mode 100644
index 5c3df20e..00000000
--- a/scripts/paper_plots/create_zsc_plot.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Utilities for plotting ZSC results."""
-import os
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-
-def create_heat_map(file, title, save_path, white_switch):
-    """Construct a heatmap of the ZSC results.
-
-    Args:
-    ----
-        file (str): file path to zsc results
-        title (str): title of the plot
-        save_path (str): path to save it at
-        white_switch (float): if the value is greater than white_switch
-            we write the cell text as black. This is just to make
-            the plots more readable.
-    """
-    np_arr = np.load(os.path.join(zsc_path, file))
-    np_arr_mean = np.mean(np_arr, axis=-1)
-
-    agent_indices = [f'Agent {i}' for i in range(np_arr.shape[0])]
-
-    fig, ax = plt.subplots()
-    ax.imshow(np_arr_mean)
-
-    # Show all ticks and label them with the respective list entries
-    ax.set_xticks(np.arange(len(agent_indices)), labels=agent_indices)
-    ax.set_yticks(np.arange(len(agent_indices)), labels=agent_indices)
-
-    # Rotate the tick labels and set their alignment.
-    plt.setp(ax.get_xticklabels(),
-             rotation=45,
-             ha="right",
-             rotation_mode="anchor")
-
-    # Loop over data dimensions and create text annotations.
-    for i in range(len(agent_indices)):
-        for j in range(len(agent_indices)):
-            if np_arr_mean[i, j] > white_switch:
-                color = 'black'
-            else:
-                color = 'w'
-            ax.text(j,
-                    i,
-                    f'{np.round(np_arr_mean[i, j], decimals=2)}',
-                    ha="center",
-                    va="center",
-                    color=color)
-
-    ax.set_title(title)
-    fig.tight_layout()
-    plt.savefig(save_path)
-
-
-def compute_average_change(file):
-    """Compare cross play to self play."""
-    np_arr = np.load(os.path.join(zsc_path, file))
-    np_arr_mean = np.mean(np_arr, axis=-1)
-    self_play = np.mean(np.diag(np_arr_mean))
-    cross_play = np.mean(
-        np_arr_mean[np.where(~np.eye(np_arr_mean.shape[0], dtype=bool))])
-    self_play_std = np.std(np.diag(np_arr_mean)) / np.sqrt(
-        np_arr_mean.shape[0])
-    cross_play_std = np.std(
-        np_arr_mean[np.where(~np.eye(np_arr_mean.shape[0], dtype=bool))]
-    ) / np.sqrt(np_arr_mean.shape[0]**2 - np_arr_mean.shape[0])
-    print(
-        f'self play: {self_play} ± {self_play_std}, cross play: {cross_play} ± {cross_play_std}'
-    )
-
-
-if __name__ == '__main__':
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.23/srt_v10/17.02.40/23/srt_v10'
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12'
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12'
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12'
-    # 10000 on valid
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12'
-    # 10000 on train
-    # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12'
-    zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.06.01/srt_v27/17.35.33/123/srt_v27'
-    create_heat_map('train_zsc_goal.npy',
-                    "Cross-play Goal Rate",
-                    'cross_play_heat_map.png',
-                    white_switch=.8)
-    create_heat_map('train_zsc_collision.npy',
-                    "Cross-play Collision Rate",
-                    'cross_play_collision_map.png',
-                    white_switch=0.18)
-    compute_average_change('train_zsc_goal.npy')
-    compute_average_change('train_zsc_collision.npy')
diff --git a/scripts/paper_plots/eval_il_agents.py b/scripts/paper_plots/eval_il_agents.py
deleted file mode 100644
index 9f79ee26..00000000
--- a/scripts/paper_plots/eval_il_agents.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run script that generates summary statistics for a folder of IL agents."""
-import json
-import os
-
-import numpy as np
-import torch
-
-from nocturne.utils.eval.average_displacement import compute_average_displacement
-from cfgs.config import PROCESSED_VALID_NO_TL, PROJECT_PATH
-
-if __name__ == '__main__':
-    outer_model_folder = '/checkpoint/eugenevinitsky/nocturne/sweep/imitation/2022.06.13/arxiv_il_v4_1kf/18.49.39'
-    models = []
-    cfg_dicts = []
-    for (dirpath, dirnames, filenames) in os.walk(outer_model_folder):
-        if 'configs.json' in filenames:
-            with open(os.path.join(dirpath, 'configs.json'), 'r') as file:
-                cfg_dict = json.load(file)
-            # now snag the model with the largest checkpoint
-            max_val = -100
-            cur_model_name = None
-            for file in filenames:
-                if '.pth' in file:
-                    checkpoint_val = int(file.split('.')[0].split('_')[-1])
-                    if checkpoint_val > max_val:
-                        max_val = checkpoint_val
-                        cur_model_name = file
-            cfg_dicts.append(cfg_dict)
-            model = torch.load(os.path.join(dirpath, cur_model_name)).to('cpu')
-            model.actions_grids = [x.to('cpu') for x in model.actions_grids]
-            model.eval()
-            model.nn[0].eval()
-            models.append(model)
-    results = np.zeros((len(cfg_dicts), 8))
-    for i, (cfg_dict, model) in enumerate(zip(cfg_dicts, models)):
-        ade, fde, collisions, goals = compute_average_displacement(
-            PROCESSED_VALID_NO_TL, model=model, configs=cfg_dict)
-        results[i, 0] = ade[0]
-        results[i, 1] = ade[1]
-        results[i, 2] = fde[0]
-        results[i, 3] = fde[1]
-        results[i, 4] = collisions[0]
-        results[i, 5] = collisions[1]
-        results[i, 6] = goals[0]
-        results[i, 7] = goals[1]
-    np.save(os.path.join(PROJECT_PATH, 'scripts/paper_plots/il_results.npy'),
-            results)
-    print(
-        f'ade {np.mean(results[:, 0])} ± {np.std(results[:, 0]) / np.sqrt(results[:, 0].shape[0])}'
-    )
-    print(
-        f'fde {np.mean(results[:, 2])} ± {np.std(results[:, 2]) / np.sqrt(results[:, 0].shape[0])}'
-    )
-    print(
-        f'collisions {np.mean(results[:, 4])} ± {np.std(results[:, 4]) / np.sqrt(results[:, 0].shape[0])}'
-    )
-    print(
-        f'goals {np.mean(results[:, 6])} ± {np.std(results[:, 6]) / np.sqrt(results[:, 0].shape[0])}'
-    )
diff --git a/scripts/paper_plots/eval_sample_factory.py b/scripts/paper_plots/eval_sample_factory.py
deleted file mode 100644
index 601b4a71..00000000
--- a/scripts/paper_plots/eval_sample_factory.py
+++ /dev/null
@@ -1,1317 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Run a policy over the entire train set.
-
-TODO(ev) refactor, this is wildly similar to visualize_sample_factory
-"""
-
-from copy import deepcopy
-from collections import deque, defaultdict
-import itertools
-from itertools import repeat
-import json
-import multiprocessing as mp
-import os
-import sys
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import torch
-
-from sample_factory.algorithms.appo.actor_worker import transform_dict_observations
-from sample_factory.algorithms.appo.learner import LearnerWorker
-from sample_factory.algorithms.appo.model import create_actor_critic
-from sample_factory.algorithms.appo.model_utils import get_hidden_size
-from sample_factory.algorithms.utils.action_distributions import ContinuousActionDistribution, \
-     CategoricalActionDistribution
-from sample_factory.algorithms.utils.arguments import load_from_checkpoint
-from sample_factory.algorithms.utils.multi_agent_wrapper import MultiAgentWrapper, is_multiagent_env
-from sample_factory.envs.create_env import create_env
-from sample_factory.utils.utils import log, AttrDict
-from examples.sample_factory_files.run_sample_factory import register_custom_components
-
-from cfgs.config import PROCESSED_VALID_NO_TL, PROCESSED_TRAIN_NO_TL, \
-    ERR_VAL, set_display_window
-
-CB_color_cycle = [
-    '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3',
-    '#999999', '#e41a1c', '#dede00'
-]
-
-
-class Bunch(object):
-    """Converts a dict into an object with the keys as attributes."""
-
-    def __init__(self, adict):
-        self.__dict__.update(adict)
-
-
-def ccw(A, B, C):
-    """Blah."""
-    return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] - A[0])
-
-
-def intersect(A, B, C, D):
-    """Check if two line segments AB and CD intersect."""
-    return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
-
-
-def poly_intersection(poly1, poly2):
-    """Compute if two polylines intersect."""
-    for i, p1_first_point in enumerate(poly1[:-1]):
-        p1_second_point = poly1[i + 1]
-
-        for j, p2_first_point in enumerate(poly2[:-1]):
-            p2_second_point = poly2[j + 1]
-
-            if intersect(p1_first_point, p1_second_point, p2_first_point,
-                         p2_second_point):
-                return True
-
-    return False
-
-
-def run_rollouts(env,
-                 cfg,
-                 device,
-                 expert_trajectory_dict,
-                 distance_bins,
-                 intersection_bins,
-                 veh_intersection_dict,
-                 actor_1,
-                 actor_2=None):
-    """Run a single rollout.
-
-    Args:
-        env (_type_): Env we are running.
-        cfg (dict): dictionary configuring the environment.
-        device (str): device you want to run the model on
-        expert_trajectory_dict (dict[str]: np.array): expert trajectories
-            keyed by ID
-        distance_bins (np.array): bins used to compute the goal
-            rate as a function of the starting distance from goal
-        intersection_bins (np.array): bins used to compute the
-            goal rate as a function of the number of intersections
-            between paths in the expert trajectories
-        veh_intersection_dict (dict[str]: np.array): dict mapping
-            a vehicle ID to the number of intersections it
-            experienced
-        actor_1: SampleFactory agent
-        actor_2: SampleFactory agent. Will be none unless we're testing for
-                ZSC
-
-    Returns
-    -------
-        avg_goal: average goal rate of agents
-        avg_collisions: average collision rate of agents
-        avg_veh_edge_collisions: average veh-edge collision rate
-        avg_veh_veh_collisions: average veh-veh collision rate
-        success_rate_by_distance: np.array(number of distance bins, 4)
-            where the row indexes how far the vehicle was from goal
-            at initialization and where the column index is
-            [goal rate, collision rate, veh-veh collision rate, counter of
-                            number of vehicles in this bin]
-        success_rate_by_num_agents: np.array(maximum number of vehicles, 4)
-            where the row index is how many vehicles were in this episode
-            where the column index is [goal rate, collision rate,
-                            veh-veh collision rate, counter of
-                            number of vehicles in this bin]
-        success_rate_by_intersections: np.array(number of intersections, 4)
-            where the row index is how many intersections that vehicle
-            had and where the column index is [goal rate, collision rate,
-                            veh-veh collision rate, counter of
-                            number of vehicles in this bin]
-        np.mean(ades): mean average displacement error of all vehicles in the
-                       episode
-        np.mean(fdes): mean final displacement error of all vehicles in the
-                       episode
-        veh_counter(int): how many vehicles were in that episode
-    """
-    episode_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
-    true_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
-    obs = env.reset()
-    rollout_traj_dict = defaultdict(lambda: np.zeros((80, 2)))
-    # some key information for tracking statistics
-    goal_dist = env.goal_dist_normalizers
-    valid_indices = env.valid_indices
-    agent_id_to_env_id_map = env.agent_id_to_env_id_map
-    env_id_to_agent_id_map = env.env_id_to_agent_id_map
-
-    success_rate_by_num_agents = np.zeros((cfg.max_num_vehicles, 4))
-    success_rate_by_distance = np.zeros((distance_bins.shape[-1], 4))
-    success_rate_by_intersections = np.zeros((intersection_bins.shape[-1], 4))
-    if actor_2 is not None:
-        # pick which valid indices go to which policy
-        val = np.random.uniform()
-        if val < 0.5:
-            num_choice = int(np.floor(len(valid_indices) / 2.0))
-        else:
-            num_choice = int(np.ceil(len(valid_indices) / 2.0))
-        indices_1 = list(
-            np.random.choice(valid_indices, num_choice, replace=False))
-        indices_2 = [val for val in valid_indices if val not in indices_1]
-        rnn_states = torch.zeros(
-            [env.num_agents, get_hidden_size(cfg)],
-            dtype=torch.float32,
-            device=device)
-        rnn_states_2 = torch.zeros(
-            [env.num_agents, get_hidden_size(cfg)],
-            dtype=torch.float32,
-            device=device)
-    else:
-        rnn_states = torch.zeros(
-            [env.num_agents, get_hidden_size(cfg)],
-            dtype=torch.float32,
-            device=device)
-    episode_reward = np.zeros(env.num_agents)
-    finished_episode = [False] * env.num_agents
-    goal_achieved = [False] * len(valid_indices)
-    collision_observed = [False] * len(valid_indices)
-    veh_veh_collision_observed = [False] * len(valid_indices)
-    veh_counter = 0
-
-    while not all(finished_episode):
-        with torch.no_grad():
-            obs_torch = AttrDict(transform_dict_observations(obs))
-            for key, x in obs_torch.items():
-                obs_torch[key] = torch.from_numpy(x).to(device).float()
-
-            # we have to make a copy before doing the pass
-            # because (for some reason), sample factory is making
-            # some changes to the obs in the forwards pass
-            # TBD what it is
-            if actor_2 is not None:
-                obs_torch_2 = deepcopy(obs_torch)
-                policy_outputs_2 = actor_2(obs_torch_2,
-                                           rnn_states_2,
-                                           with_action_distribution=True)
-
-            policy_outputs = actor_1(obs_torch,
-                                     rnn_states,
-                                     with_action_distribution=True)
-
-            # sample actions from the distribution by default
-            # also update the indices that should be drawn from the second policy
-            # with its outputs
-            actions = policy_outputs.actions
-            if actor_2 is not None:
-                actions[indices_2] = policy_outputs_2.actions[indices_2]
-
-            action_distribution = policy_outputs.action_distribution
-            if isinstance(action_distribution, ContinuousActionDistribution):
-                if not cfg.continuous_actions_sample:  # TODO: add similar option for discrete actions
-                    actions = action_distribution.means
-                    if actor_2 is not None:
-                        actions[
-                            indices_2] = policy_outputs_2.action_distribution.means[
-                                indices_2]
-            if isinstance(action_distribution, CategoricalActionDistribution):
-                if not cfg.discrete_actions_sample:
-                    actions = policy_outputs['action_logits'].argmax(axis=1)
-                    if actor_2 is not None:
-                        actions[indices_2] = policy_outputs_2[
-                            'action_logits'].argmax(axis=1)[indices_2]
-
-            actions = actions.cpu().numpy()
-
-            for veh in env.unwrapped.get_objects_that_moved():
-                # only check vehicles we are actually controlling
-                if veh.expert_control is False:
-                    rollout_traj_dict[veh.id][
-                        env.step_num] = veh.position.numpy()
-                if int(veh.collision_type) == 1:
-                    if veh.getID() in env_id_to_agent_id_map.keys():
-                        agent_id = env_id_to_agent_id_map[veh.getID()]
-                        idx = valid_indices.index(agent_id)
-                        veh_veh_collision_observed[idx] = 1
-
-            rnn_states = policy_outputs.rnn_states
-            if actor_2 is not None:
-                rnn_states_2 = policy_outputs_2.rnn_states
-
-            obs, rew, done, infos = env.step(actions)
-            episode_reward += rew
-
-            for i, index in enumerate(valid_indices):
-                goal_achieved[
-                    i] = infos[index]['goal_achieved'] or goal_achieved[i]
-                collision_observed[
-                    i] = infos[index]['collided'] or collision_observed[i]
-
-            for agent_i, done_flag in enumerate(done):
-                if done_flag:
-                    finished_episode[agent_i] = True
-                    episode_rewards[agent_i].append(episode_reward[agent_i])
-                    true_rewards[agent_i].append(infos[agent_i].get(
-                        'true_reward', episode_reward[agent_i]))
-                    log.info(
-                        'Episode finished for agent %d. Reward: %.3f, true_reward: %.3f',
-                        agent_i, episode_reward[agent_i],
-                        true_rewards[agent_i][-1])
-                    rnn_states[agent_i] = torch.zeros([get_hidden_size(cfg)],
-                                                      dtype=torch.float32,
-                                                      device=device)
-                    episode_reward[agent_i] = 0
-
-            if all(finished_episode):
-                avg_episode_rewards_str, avg_true_reward_str = '', ''
-                for agent_i in range(env.num_agents):
-                    avg_rew = np.mean(episode_rewards[agent_i])
-                    avg_true_rew = np.mean(true_rewards[agent_i])
-                    if not np.isnan(avg_rew):
-                        if avg_episode_rewards_str:
-                            avg_episode_rewards_str += ', '
-                        avg_episode_rewards_str += f'#{agent_i}: {avg_rew:.3f}'
-                    if not np.isnan(avg_true_rew):
-                        if avg_true_reward_str:
-                            avg_true_reward_str += ', '
-                        avg_true_reward_str += f'#{agent_i}: {avg_true_rew:.3f}'
-                avg_goal = infos[0]['episode_extra_stats']['goal_achieved']
-                avg_collisions = infos[0]['episode_extra_stats']['collided']
-                avg_veh_edge_collisions = infos[0]['episode_extra_stats'][
-                    'veh_edge_collision']
-                avg_veh_veh_collisions = infos[0]['episode_extra_stats'][
-                    'veh_veh_collision']
-                success_rate_by_num_agents[len(valid_indices) - 1,
-                                           0] += avg_goal
-                success_rate_by_num_agents[len(valid_indices) - 1,
-                                           1] += avg_collisions
-                success_rate_by_num_agents[len(valid_indices) - 1,
-                                           2] += np.mean(
-                                               veh_veh_collision_observed)
-                success_rate_by_num_agents[len(valid_indices) - 1, 3] += 1
-                # track how well we do as a function of distance
-                for i, index in enumerate(valid_indices):
-                    env_id = agent_id_to_env_id_map[index]
-                    bin = np.searchsorted(distance_bins, goal_dist[env_id])
-                    success_rate_by_distance[bin - 1, :] += [
-                        goal_achieved[i], collision_observed[i],
-                        veh_veh_collision_observed[i], 1
-                    ]
-                # track how well we do as number of intersections
-                for i, index in enumerate(valid_indices):
-                    env_id = agent_id_to_env_id_map[index]
-                    bin = min(veh_intersection_dict[env_id],
-                              distance_bins.shape[-1] - 1)
-                    success_rate_by_intersections[bin, :] += [
-                        goal_achieved[i], collision_observed[i],
-                        veh_veh_collision_observed[i], 1
-                    ]
-                # compute ADE and FDE
-                ades = []
-                fdes = []
-                for agent_id, traj in rollout_traj_dict.items():
-                    masking_arr = traj.sum(axis=1)
-                    mask = (masking_arr != 0.0) * (masking_arr !=
-                                                   traj.shape[1] * ERR_VAL)
-                    expert_mask_arr = expert_trajectory_dict[agent_id].sum(
-                        axis=1)
-                    expert_mask = (expert_mask_arr != 0.0) * (
-                        expert_mask_arr != traj.shape[1] * ERR_VAL)
-                    ade = np.linalg.norm(traj -
-                                         expert_trajectory_dict[agent_id],
-                                         axis=-1)[mask * expert_mask]
-                    ades.append(ade.mean())
-                    fde = np.linalg.norm(
-                        traj - expert_trajectory_dict[agent_id],
-                        axis=-1)[np.max(np.argwhere(mask * expert_mask))]
-                    fdes.append(fde)
-                    veh_counter += 1
-
-                log.info('Avg episode rewards: %s, true rewards: %s',
-                         avg_episode_rewards_str, avg_true_reward_str)
-                log.info(
-                    'Avg episode reward: %.3f, avg true_reward: %.3f',
-                    np.mean([
-                        np.mean(episode_rewards[i])
-                        for i in range(env.num_agents)
-                    ]),
-                    np.mean([
-                        np.mean(true_rewards[i]) for i in range(env.num_agents)
-                    ]))
-
-                return (avg_goal, avg_collisions, avg_veh_edge_collisions,
-                        avg_veh_veh_collisions, success_rate_by_distance,
-                        success_rate_by_num_agents,
-                        success_rate_by_intersections, np.mean(ades),
-                        np.mean(fdes), veh_counter)
-
-
-def run_eval(cfgs,
-             test_zsc,
-             output_path,
-             scenario_dir,
-             files,
-             file_type,
-             device='cuda'):
-    """Eval a stored agent over all files in validation set.
-
-    Args:
-        cfg (dict): configuration file for instantiating the agents and environment.
-        test_zsc (bool): if true, we play all agents against all agents
-        num_file_loops (int): how many times to loop over the file set
-
-    Returns
-    -------
-        None: None
-    """
-    actor_critics = []
-    if not isinstance(cfgs, list):
-        cfgs = [cfgs]
-    for i, cfg in enumerate(cfgs):
-        if not isinstance(cfg, Bunch):
-            cfg = Bunch(cfg)
-        cfg = load_from_checkpoint(cfg)
-
-        render_action_repeat = cfg.render_action_repeat if cfg.render_action_repeat is not None else cfg.env_frameskip
-        if render_action_repeat is None:
-            log.warning('Not using action repeat!')
-            render_action_repeat = 1
-        log.debug('Using action repeat %d during evaluation',
-                  render_action_repeat)
-
-        cfg.env_frameskip = 1  # for evaluation
-        cfg.num_envs = 1
-        # this config is used for computing displacement errors
-        ade_cfg = deepcopy(cfg)
-        ade_cfg['remove_at_goal'] = False
-        ade_cfg['remove_at_collide'] = False
-
-        def make_env_func(env_config):
-            return create_env(cfg.env, cfg=cfg, env_config=env_config)
-
-        env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0}))
-        env.seed(0)
-
-        is_multiagent = is_multiagent_env(env)
-        if not is_multiagent:
-            env = MultiAgentWrapper(env)
-
-        if hasattr(env.unwrapped, 'reset_on_init'):
-            # reset call ruins the demo recording for VizDoom
-            env.unwrapped.reset_on_init = False
-
-        actor_critic = create_actor_critic(cfg, env.observation_space,
-                                           env.action_space)
-
-        device = torch.device(device)
-        actor_critic.model_to_device(device)
-
-        policy_id = cfg.policy_index
-        checkpoints = LearnerWorker.get_checkpoints(
-            LearnerWorker.checkpoint_dir(cfg, policy_id))
-        checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device)
-        actor_critic.load_state_dict(checkpoint_dict['model'])
-        actor_critics.append([i, actor_critic])
-
-    # we bin the success rate into bins of 10 meters between 0 and 400
-    # the second dimension is the counts
-    distance_bins = np.linspace(0, 400, 40)
-    intersections_bins = np.linspace(0, 7, 7)
-    num_files = cfg['num_eval_files']
-    num_file_loops = cfg['num_file_loops']
-    # TODO(eugenevinitsky) horrifying copy and paste
-    if test_zsc:
-        goal_array = np.zeros((len(actor_critics), len(actor_critics),
-                               num_file_loops * num_files))
-        collision_array = np.zeros((len(actor_critics), len(actor_critics),
-                                    num_files * num_file_loops))
-        success_rate_by_num_agents = np.zeros(
-            (len(actor_critics), len(actor_critics), cfg.max_num_vehicles, 4))
-        success_rate_by_distance = np.zeros(
-            (len(actor_critics), len(actor_critics), distance_bins.shape[-1],
-             4))
-        success_rate_by_intersections = np.zeros(
-            (len(actor_critics), len(actor_critics),
-             intersections_bins.shape[-1], 4))
-        ade_array = np.zeros((len(actor_critics), len(actor_critics),
-                              num_file_loops * num_files))
-        fde_array = np.zeros((len(actor_critics), len(actor_critics),
-                              num_file_loops * num_files))
-        veh_veh_collision_array = np.zeros(
-            (len(actor_critics), len(actor_critics),
-             num_file_loops * num_files))
-        veh_edge_collision_array = np.zeros(
-            (len(actor_critics), len(actor_critics),
-             num_file_loops * num_files))
-    else:
-        goal_array = np.zeros((len(actor_critics), num_file_loops * num_files))
-        collision_array = np.zeros(
-            (len(actor_critics), num_file_loops * num_files))
-        veh_veh_collision_array = np.zeros(
-            (len(actor_critics), num_file_loops * num_files))
-        veh_edge_collision_array = np.zeros(
-            (len(actor_critics), num_file_loops * num_files))
-        success_rate_by_num_agents = np.zeros(
-            (len(actor_critics), cfg.max_num_vehicles, 4))
-        success_rate_by_distance = np.zeros(
-            (len(actor_critics), distance_bins.shape[-1], 4))
-        success_rate_by_intersections = np.zeros(
-            (len(actor_critics), intersections_bins.shape[-1], 4))
-        ade_array = np.zeros((len(actor_critics), num_file_loops * num_files))
-        fde_array = np.zeros((len(actor_critics), num_file_loops * num_files))
-
-    if test_zsc:
-        output_generator = itertools.product(actor_critics, actor_critics)
-    else:
-        output_generator = actor_critics
-
-    for output in output_generator:
-        if test_zsc:
-            (index_1, actor_1), (index_2, actor_2) = output
-        else:
-            (index_1, actor_1) = output
-        goal_frac = []
-        collision_frac = []
-        veh_veh_collision_frac = []
-        veh_edge_collision_frac = []
-        average_displacement_error = []
-        final_displacement_error = []
-        veh_counter = 0
-        for loop_num in range(num_file_loops):
-            for file_num, file in enumerate(files[0:cfg['num_eval_files']]):
-                print(loop_num * cfg['num_eval_files'] + file_num)
-                print('file is {}'.format(os.path.join(scenario_dir, file)))
-
-                env.unwrapped.files = [os.path.join(scenario_dir, file)]
-
-                # step the env to its conclusion to generate the expert trajectories we compare against
-                env.cfg = ade_cfg
-                env.reset()
-                expert_trajectory_dict = defaultdict(lambda: np.zeros((80, 2)))
-                env.unwrapped.make_all_vehicles_experts()
-                for i in range(80):
-                    for veh in env.unwrapped.get_objects_that_moved():
-                        expert_trajectory_dict[
-                            veh.id][i] = veh.position.numpy()
-                    env.unwrapped.simulation.step(0.1)
-
-                # compute the number of expert trajectories that intersect
-                # while filtering out the bits of the trajectory
-                # that were invalid
-                vehs_with_intersecting_ids = defaultdict(int)
-                for veh_id in expert_trajectory_dict.keys():
-                    for veh_id2 in expert_trajectory_dict.keys():
-                        if veh_id == veh_id2:
-                            continue
-                        trajectory = expert_trajectory_dict[veh_id]
-                        trajectory2 = expert_trajectory_dict[veh_id2]
-                        expert_mask_arr = trajectory.sum(axis=1)
-                        expert_mask = (expert_mask_arr != 0.0) * (
-                            expert_mask_arr != trajectory.shape[1] * ERR_VAL)
-                        trajectory = trajectory[expert_mask]
-                        expert_mask_arr = trajectory2.sum(axis=1)
-                        expert_mask = (expert_mask_arr != 0.0) * (
-                            expert_mask_arr != trajectory2.shape[1] * ERR_VAL)
-                        trajectory2 = trajectory2[expert_mask]
-                        if poly_intersection(trajectory, trajectory2):
-                            vehs_with_intersecting_ids[
-                                veh_id] += poly_intersection(
-                                    trajectory, trajectory2)
-
-                env.cfg = cfg
-                if test_zsc:
-                    output = run_rollouts(env, cfg, device,
-                                          expert_trajectory_dict,
-                                          distance_bins, intersections_bins,
-                                          vehs_with_intersecting_ids, actor_1,
-                                          actor_2)
-                else:
-                    output = run_rollouts(env, cfg, device,
-                                          expert_trajectory_dict,
-                                          distance_bins, intersections_bins,
-                                          vehs_with_intersecting_ids, actor_1)
-
-                avg_goal, avg_collisions, avg_veh_edge_collisions, avg_veh_veh_collisions, \
-                    success_rate_by_distance_return, success_rate_by_num_agents_return, \
-                    success_rate_by_intersections_return, \
-                    _, _, _ = output
-                # TODO(eugenevinitsky) hideous copy and pasting
-                goal_frac.append(avg_goal)
-                collision_frac.append(avg_collisions)
-                veh_veh_collision_frac.append(avg_veh_veh_collisions)
-                veh_edge_collision_frac.append(avg_veh_edge_collisions)
-                if test_zsc:
-                    success_rate_by_distance[
-                        index_1, index_2] += success_rate_by_distance_return
-                    success_rate_by_num_agents[
-                        index_1, index_2] += success_rate_by_num_agents_return
-                    success_rate_by_intersections[
-                        index_1,
-                        index_2] += success_rate_by_intersections_return
-                else:
-                    success_rate_by_distance[
-                        index_1] += success_rate_by_distance_return
-                    success_rate_by_num_agents[
-                        index_1] += success_rate_by_num_agents_return
-                    success_rate_by_intersections[
-                        index_1] += success_rate_by_intersections_return
-                # do some logging
-                log.info(
-                    f'Avg goal achieved {np.mean(goal_frac)}±{np.std(goal_frac) / len(goal_frac)}'
-                )
-                log.info(
-                    f'Avg veh-veh collisions {np.mean(veh_veh_collision_frac)}±\
-                        {np.std(veh_veh_collision_frac) / np.sqrt(len(veh_veh_collision_frac))}'
-                )
-                log.info(
-                    f'Avg veh-edge collisions {np.mean(veh_edge_collision_frac)}±\
-                        {np.std(veh_edge_collision_frac) / np.sqrt(len(veh_edge_collision_frac))}'
-                )
-                log.info(f'Avg num collisions {np.mean(collision_frac)}±\
-                        {np.std(collision_frac) / len(collision_frac)}')
-
-                env.cfg = ade_cfg
-                # okay, now run the rollout one more time but this time set
-                # remove_at_goal and remove_at_collide to be false so we can do the ADE computations
-                if test_zsc:
-                    output = run_rollouts(env, cfg, device,
-                                          expert_trajectory_dict,
-                                          distance_bins, intersections_bins,
-                                          vehs_with_intersecting_ids, actor_1,
-                                          actor_2)
-                else:
-                    output = run_rollouts(env, cfg, device,
-                                          expert_trajectory_dict,
-                                          distance_bins, intersections_bins,
-                                          vehs_with_intersecting_ids, actor_1)
-
-                _, _, _, _, _, _, _, ade, fde, veh_counter = output
-                average_displacement_error.append(ade)
-                final_displacement_error.append(fde)
-                log.info(f'Avg ADE {np.mean(average_displacement_error)}±\
-                        {np.std(average_displacement_error) / np.sqrt(len(average_displacement_error))}'
-                         )
-                log.info(f'Avg FDE {np.mean(final_displacement_error)}±\
-                        {np.std(final_displacement_error) / np.sqrt(len(final_displacement_error))}'
-                         )
-
-        if test_zsc:
-            goal_array[index_1, index_2] = goal_frac
-            collision_array[index_1, index_2] = collision_frac
-            veh_veh_collision_array[index_1, index_2] = veh_veh_collision_frac
-            veh_edge_collision_array[index_1,
-                                     index_2] = veh_edge_collision_frac
-            ade_array[index_1, index_2] = average_displacement_error
-            fde_array[index_1, index_2] = final_displacement_error
-        else:
-            goal_array[index_1] = goal_frac
-            collision_array[index_1] = collision_frac
-            veh_veh_collision_array[index_1] = veh_veh_collision_frac
-            veh_edge_collision_array[index_1] = veh_edge_collision_frac
-            ade_array[index_1] = average_displacement_error
-            fde_array[index_1] = final_displacement_error
-
-    if test_zsc:
-        file_type += '_zsc'
-    np.save(os.path.join(output_path, '{}_goal.npy'.format(file_type)),
-            goal_array)
-    np.save(os.path.join(output_path, '{}_collision.npy'.format(file_type)),
-            collision_array)
-    np.save(
-        os.path.join(output_path,
-                     '{}_veh_veh_collision.npy'.format(file_type)),
-        veh_veh_collision_array)
-    np.save(
-        os.path.join(output_path,
-                     '{}_veh_edge_collision.npy'.format(file_type)),
-        veh_edge_collision_array)
-    np.save(os.path.join(output_path, '{}_ade.npy'.format(file_type)),
-            ade_array)
-    np.save(os.path.join(output_path, '{}_fde.npy'.format(file_type)),
-            fde_array)
-    with open(
-            os.path.join(output_path,
-                         '{}_success_by_veh_number.npy'.format(file_type)),
-            'wb') as f:
-        np.save(f, success_rate_by_num_agents)
-    with open(
-            os.path.join(output_path,
-                         '{}_success_by_dist.npy'.format(file_type)),
-            'wb') as f:
-        np.save(f, success_rate_by_distance)
-    with open(
-            os.path.join(
-                output_path,
-                '{}_success_by_num_intersections.npy'.format(file_type)),
-            'wb') as f:
-        np.save(f, success_rate_by_intersections)
-
-    env.close()
-
-    return
-
-
-def load_wandb(experiment_name, cfg_filter, force_reload=False):
-    """Pull the results from the wandb server.
-
-    Args:
-    ----
-        experiment_name (str): name of the wandb group.
-        cfg_filter (function): use the config dict to filter
-                               which runs are actually kept
-        force_reload (bool, optional): if true we overwrite
-                                       the wandb csv
-                                       even if it exists.
-    """
-    if not os.path.exists(
-            'wandb_{}.csv'.format(experiment_name)) or force_reload:
-        import wandb
-
-        api = wandb.Api()
-        entity, project = "eugenevinitsky", "nocturne4"  # set to your entity and project
-        runs = api.runs(entity + "/" + project)
-
-        history_list = []
-        for run in runs:
-            if run.name == experiment_name:
-
-                # # .config contains the hyperparameters.
-                # #  We remove special values that start with _.
-                config = {
-                    k: v
-                    for k, v in run.config.items() if not k.startswith('_')
-                }
-                if cfg_filter(config):
-                    history_df = run.history()
-                    history_df['seed'] = config['seed']
-                    history_df['num_files'] = config['num_files']
-                    history_list.append(history_df)
-
-        runs_df = pd.concat(history_list)
-        runs_df.to_csv('wandb_{}.csv'.format(experiment_name))
-
-
-def plot_goal_achieved(experiment_name, global_step_cutoff=3e9):
-    """Use the WANDB CSV to plot number of train steps v. goal achieved."""
-    plt.figure(dpi=300)
-    df = pd.read_csv("wandb_{}.csv".format(experiment_name))
-    df["timestamp"] = pd.to_datetime(df["_timestamp"] * 1e9)
-
-    # technically not correct if the number of seeds varies by num_files
-    # but in this case we're alright
-    num_seeds = len(np.unique(df.seed.values))
-
-    values_num_files = np.unique(df.num_files.values)
-    column = "0_aux/avg_goal_achieved"
-    dfs = []
-    stdevs = []
-    for num_files in values_num_files:
-        if num_files == 1:
-            continue
-
-        df_n = df[(df.num_files == num_files)
-                  & (df.global_step < global_step_cutoff)].set_index(
-                      'global_step').sort_index()
-        if num_files == -1:
-            col_name = 134453
-        else:
-            col_name = num_files
-        dfs.append((df_n[column] * 100).ewm(
-            halflife=500,
-            min_periods=10).mean().rename(f"num_files={col_name}"))
-        stdevs.append((df_n[column] * 100).ewm(halflife=500,
-                                               min_periods=10).std())
-
-    values_num_files = [
-        val if val != -1 else 134453 for val in values_num_files
-    ]
-    temp = list(zip(values_num_files, dfs, stdevs))
-    temp = sorted(temp, key=lambda x: x[0])
-    values_num_files, dfs, stdevs = zip(*temp)
-    ax = plt.gca()
-    for i in range(len(dfs)):
-        x = dfs[i].index.values
-        y = dfs[i].values
-        yerr = stdevs[i].replace(np.nan, 0) / np.sqrt(num_seeds)
-        ax.plot(x,
-                y,
-                label=f'Training Files: {values_num_files[i]}',
-                color=CB_color_cycle[i])
-        ax.fill_between(x,
-                        y - 2 * yerr,
-                        y + 2 * yerr,
-                        color=CB_color_cycle[i],
-                        alpha=0.3)
-    plt.grid(ls='--', color='#ccc')
-    plt.legend()
-    plt.xlabel("Environment step")
-    plt.ylabel("% Goals Achieved")
-    plt.savefig('goal_achieved_v_step', bbox_inches='tight', pad_inches=0.1)
-
-
-def eval_generalization(output_folder,
-                        num_eval_files,
-                        files,
-                        file_type,
-                        scenario_dir,
-                        num_file_loops,
-                        test_zsc=False,
-                        cfg_filter=None):
-    """Evaluate generalization for all agent checkpoints in output_folder.
-
-    Args:
-    ----
-        output_folder (str): path to folder containing agent checkpoints
-        num_eval_files (int): how many files to use for eval
-        files (list[str]): list of scenario files to use for eval
-        file_type (str): 'train' or 'test' used to indicate if we are
-                         testing in or out of distribution
-        scenario_dir (str): path to directory where `files` are stored
-        num_file_loops (int): how many times to iterate over the files.
-                              Used for in-distribution testing if
-                              in-distribution we trained on M files
-                              but we want to test over N files where
-                              N > M.
-        test_zsc (bool, optional): If true we pair up ever
-                                   agent in the folder and compute
-                                   all the cross-play scores. Defaults to False.
-        cfg_filter (_type_, optional): function used to filter over
-                                       whether eval should actually be done on that
-                                       agent. Filters using the agent config dict.
-    """
-    file_paths = []
-    cfg_dicts = []
-    for (dirpath, dirnames, filenames) in os.walk(output_folder):
-        if 'cfg.json' in filenames:
-            with open(os.path.join(dirpath, 'cfg.json'), 'r') as file:
-                cfg_dict = json.load(file)
-
-            if cfg_filter is not None and not cfg_filter(cfg_dict):
-                continue
-            file_paths.append(dirpath)
-            cfg_dict['cli_args'] = {}
-            cfg_dict['fps'] = 0
-            cfg_dict['render_action_repeat'] = None
-            cfg_dict['no_render'] = None
-            cfg_dict['policy_index'] = 0
-            cfg_dict['record_to'] = os.path.join(os.getcwd(), '..', 'recs')
-            cfg_dict['continuous_actions_sample'] = False
-            cfg_dict['discrete_actions_sample'] = False
-            # for the train set, we don't want to loop over
-            # files we didn't train on
-            # also watch out for -1 which means "train on all files"
-            if cfg_dict[
-                    'num_files'] < num_eval_files and 'train' in file_type and cfg_dict[
-                        'num_files'] != -1:
-                cfg_dict['num_eval_files'] = cfg_dict['num_files']
-                cfg_dict['num_file_loops'] = num_file_loops * int(
-                    max(num_eval_files // cfg_dict['num_files'], 1))
-            else:
-                cfg_dict['num_eval_files'] = num_eval_files
-                cfg_dict['num_file_loops'] = num_file_loops
-            cfg_dicts.append(cfg_dict)
-    if test_zsc:
-        # TODO(eugenevinitsky) we're currently storing the ZSC result in a random
-        # folder which seems bad.
-        run_eval([Bunch(cfg_dict) for cfg_dict in cfg_dicts],
-                 test_zsc=test_zsc,
-                 output_path=file_paths[0],
-                 scenario_dir=scenario_dir,
-                 files=files,
-                 file_type=file_type)
-        print('stored ZSC result in {}'.format(file_paths[0]))
-    else:
-        # why 13? because a 16 GB GPU can do a forwards pass on 13 copies of the model
-        # for 20 vehicles at once. More than that and you'll run out of memory
-        num_cpus = min(13, mp.cpu_count() - 2)
-        device = 'cuda'
-        # if torch.cuda.is_available():
-        #     device = 'cuda'
-        # else:
-        #     device = 'cpu'
-        with mp.Pool(processes=num_cpus) as pool:
-            list(
-                pool.starmap(
-                    run_eval,
-                    zip(cfg_dicts, repeat(test_zsc), file_paths,
-                        repeat(scenario_dir), repeat(files), repeat(file_type),
-                        repeat(device))))
-    print(file_paths)
-
-
-def main():
-    """Script entry point."""
-    set_display_window()
-    register_custom_components()
-    RUN_EVAL = False
-    TEST_ZSC = False
-    PLOT_RESULTS = True
-    RELOAD_WANDB = False
-    VERSION = 5
-    NUM_EVAL_FILES = 200
-    NUM_FILE_LOOPS = 1  # the number of times to loop over a fixed set of files
-    experiment_names = ['srt_v27']
-    # output_folder = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.20/new_road_sample/18.32.35'
-    # output_folder = [
-    #     '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.23/srt_v10/17.02.40/'
-    # ]
-    # 10 files
-    # output_folder = [
-    #     '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/'
-    # ]
-    # SRT submission results
-    output_folder = [
-        '/checkpoint/eugenevinitsky/nocturne/sweep/2022.06.01/srt_v27/17.35.33'
-    ]
-    generalization_dfs = []
-
-    cfg_filter = None
-
-    if TEST_ZSC:
-
-        def cfg_filter(cfg_dict):
-            if cfg_dict['scenario']['road_edge_first'] is False and cfg_dict[
-                    'scenario']['max_visible_road_points'] == 500 and cfg_dict[
-                        'algorithm']['encoder_hidden_size'] == 256 and cfg_dict[
-                            'num_files'] == 10000:
-                return True
-            else:
-                return False
-    else:
-
-        def cfg_filter(cfg_dict):
-            if cfg_dict['scenario']['road_edge_first'] is False and cfg_dict[
-                    'scenario']['max_visible_road_points'] == 500 and cfg_dict[
-                        'algorithm']['encoder_hidden_size'] == 256:
-                return True
-            else:
-                return False
-
-    '''
-    ###############################################################################
-    #########           Build the generalization dataframes ######################
-    ##############################################################################
-    '''
-
-    if RUN_EVAL:
-        if TEST_ZSC:
-            output_generator = [(PROCESSED_VALID_NO_TL,
-                                 'test_{}'.format(VERSION))]
-        else:
-            output_generator = [
-                (PROCESSED_TRAIN_NO_TL, 'train_{}'.format(VERSION)),
-                (PROCESSED_VALID_NO_TL, 'test_{}'.format(VERSION))
-            ]
-
-        for file_path, file_type in output_generator:
-            with open(os.path.join(file_path, 'valid_files.json')) as file:
-                valid_veh_dict = json.load(file)
-                files = list(valid_veh_dict.keys())
-                if file_type == 'test_{}'.format(VERSION):
-                    # sort the files so that we have a consistent order
-                    np.random.seed(0)
-                    np.random.shuffle(files)
-                if file_type == 'train_{}'.format(VERSION):
-                    # for train make sure we use the same ordering
-                    # that is used in base_env
-                    # TODO(eugenevinitsky) this is dangerous and could
-                    # break easily
-                    files = sorted(files)
-            for folder in output_folder:
-                eval_generalization(folder,
-                                    NUM_EVAL_FILES,
-                                    files,
-                                    file_type=file_type,
-                                    scenario_dir=file_path,
-                                    num_file_loops=NUM_FILE_LOOPS,
-                                    test_zsc=TEST_ZSC,
-                                    cfg_filter=cfg_filter)
-
-    if PLOT_RESULTS:
-        # okay, now build a pandas dataframe of the results that we will use for plotting
-        # the generalization results
-        for folder in output_folder:
-            for file_type in [
-                    'train_{}'.format(VERSION), 'test_{}'.format(VERSION)
-                    # 'train',
-                    # 'test'
-            ]:
-                file_paths = []
-                data_dicts = []
-                for (dirpath, dirnames, filenames) in os.walk(folder):
-                    if 'cfg.json' in filenames:
-                        file_paths.append(dirpath)
-                        with open(os.path.join(dirpath, 'cfg.json'),
-                                  'r') as file:
-                            cfg_dict = json.load(file)
-                        if cfg_filter(cfg_dict):
-                            # TODO(eugenevinitsky) why do they not all have this?
-                            goal = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_goal.npy'.format(file_type))))
-                            collide = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_collision.npy'.format(file_type))))
-                            ade = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_ade.npy'.format(file_type))))
-                            fde = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_fde.npy'.format(file_type))))
-                            veh_veh_collision = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_veh_veh_collision.npy'.format(
-                                            file_type))))
-                            veh_edge_collision = np.mean(
-                                np.load(
-                                    os.path.join(
-                                        dirpath,
-                                        '{}_veh_edge_collision.npy'.format(
-                                            file_type))))
-                            success_by_num_intersections = np.load(
-                                os.path.join(
-                                    dirpath,
-                                    '{}_success_by_num_intersections.npy'.
-                                    format(file_type)))
-                            # there aren't a lot of data points past 3
-                            # so just bundle them in
-                            success_by_num_intersections[:,
-                                                         3, :] = success_by_num_intersections[:, 3:, :].sum(
-                                                             axis=1)
-                            success_by_num_intersections = success_by_num_intersections[:,
-                                                                                        0:
-                                                                                        4, :]
-                            success_by_veh_num = np.load(
-                                os.path.join(
-                                    dirpath,
-                                    '{}_success_by_veh_number.npy'.format(
-                                        file_type)))
-                            success_by_distance = np.load(
-                                os.path.join(
-                                    dirpath, '{}_success_by_dist.npy'.format(
-                                        file_type)))
-                            num_files = cfg_dict['num_files']
-                            if int(num_files) == -1:
-                                num_files = 134453
-                            if int(num_files) == 1:
-                                continue
-                            data_dicts.append({
-                                'num_files':
-                                num_files,
-                                'goal_rate':
-                                goal * 100,
-                                'collide_rate':
-                                collide * 100,
-                                'ade':
-                                ade,
-                                'fde':
-                                fde,
-                                'veh_veh_collision':
-                                veh_veh_collision,
-                                'veh_edge_collision':
-                                veh_edge_collision,
-                                'goal_by_intersections':
-                                np.nan_to_num(
-                                    success_by_num_intersections[0, :, 0] /
-                                    success_by_num_intersections[0, :, 3]),
-                                'collide_by_intersections':
-                                np.nan_to_num(
-                                    success_by_num_intersections[0, :, 1] /
-                                    success_by_num_intersections[0, :, 3]),
-                                'goal_by_vehicle_num':
-                                np.nan_to_num(success_by_veh_num[0, :, 0] /
-                                              success_by_veh_num[0, :, 3]),
-                                'collide_by_vehicle_num':
-                                np.nan_to_num(success_by_veh_num[0, :, 1] /
-                                              success_by_veh_num[0, :, 3]),
-                                'goal_by_distance':
-                                np.nan_to_num(success_by_distance[0, :, 0] /
-                                              success_by_distance[0, :, 3]),
-                                'collide_by_distance':
-                                np.nan_to_num(success_by_distance[0, :, 1] /
-                                              success_by_distance[0, :, 3]),
-                            })
-                            if cfg_dict['num_files'] == 10000:
-                                print('goal ',
-                                      success_by_num_intersections[0, :, 0])
-                                print('num vehicles in bin',
-                                      success_by_num_intersections[0, :, 3])
-                df = pd.DataFrame(data_dicts)
-                new_dict = {}
-                for key in data_dicts[0].keys():
-                    if key == 'num_files':
-                        continue
-                    new_dict[key] = df.groupby(['num_files'
-                                                ])[key].mean().reset_index()
-                    try:
-                        new_dict[key + '_std'] = df.groupby(
-                            ['num_files'])[key].std().reset_index().rename(
-                                columns={key: key + '_std'})
-                    except ValueError:
-                        # TODO(eugenevinitsky) learn to use pandas dawg
-                        # what even is this
-                        temp_dict = {}
-                        for name, group in df.groupby(['num_files'])[key]:
-                            temp = []
-                            for arr in group:
-                                temp.append(arr)
-                            np_arr = np.vstack(temp)
-                            std_err = np.std(np_arr, axis=0) / np.sqrt(
-                                np_arr.shape[0])
-                            temp_dict[name] = std_err
-                        new_dict[key + '_stderr'] = pd.Series(
-                            data=temp_dict).reset_index().rename(
-                                columns={
-                                    'index': 'num_files',
-                                    0: key + '_stderr'
-                                })
-                first_elem_key = 'goal_rate'
-                first_elem = new_dict[first_elem_key]
-                for key, value in new_dict.items():
-                    if key == first_elem_key:
-                        continue
-                    first_elem = first_elem.merge(value,
-                                                  how='inner',
-                                                  on='num_files')
-                generalization_dfs.append(first_elem)
-            '''
-        ###############################################################################
-        #########  load the training dataframes from wandb ######################
-        ##############################################################################
-        '''
-        global_step_cutoff = 3e9
-        training_dfs = []
-        for experiment_name in experiment_names:
-            load_wandb(experiment_name, cfg_filter, force_reload=RELOAD_WANDB)
-            training_dfs.append(
-                pd.read_csv('wandb_{}.csv'.format(experiment_name)))
-
-        num_seeds = len(np.unique(training_dfs[0].seed))
-        # create the goal plot
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            plt.plot(np.log10(df.num_files),
-                     df.goal_rate,
-                     color=CB_color_cycle[i],
-                     label=file_type)
-            ax = plt.gca()
-            yerr = df.goal_rate_std.replace(np.nan, 0) / np.sqrt(num_seeds)
-            ax.fill_between(np.log10(df.num_files),
-                            df.goal_rate - 2 * yerr,
-                            df.goal_rate + 2 * yerr,
-                            color=CB_color_cycle[i],
-                            alpha=0.3)
-            print(f'{file_type} goal rate', df.goal_rate, yerr)
-        plt.ylim([0, 100])
-        plt.xlabel(' Number of Training Files (Logarithmic Scale)')
-        plt.ylabel('% Goals Achieved')
-        plt.legend()
-        plt.savefig('goal_achieved.png', bbox_inches='tight', pad_inches=0.1)
-
-        # create the collide plot
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            plt.plot(np.log10(df.num_files),
-                     df.collide_rate,
-                     color=CB_color_cycle[i],
-                     label=file_type)
-            ax = plt.gca()
-            yerr = df.collide_rate_std.replace(np.nan, 0) / np.sqrt(num_seeds)
-            ax.fill_between(np.log10(df.num_files),
-                            df.collide_rate - 2 * yerr,
-                            df.collide_rate + 2 * yerr,
-                            color=CB_color_cycle[i],
-                            alpha=0.3)
-            print(f'{file_type} collide rate', df.collide_rate, yerr)
-        plt.ylim([0, 50])
-        plt.xlabel(' Number of Training Files (Logarithmic Scale)')
-        plt.ylabel('% Vehicles Collided')
-        plt.legend()
-        plt.savefig('collide_rate.png', bbox_inches='tight', pad_inches=0.1)
-
-        # create ADE and FDE plots
-
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            yerr = df.ade_std.replace(np.nan, 0) / np.sqrt(num_seeds)
-            plt.plot(np.log10(df.num_files),
-                     df.ade,
-                     label=file_type,
-                     color=CB_color_cycle[i])
-            ax = plt.gca()
-            ax.fill_between(np.log10(df.num_files),
-                            df.ade - 2 * yerr,
-                            df.ade + 2 * yerr,
-                            color=CB_color_cycle[i],
-                            alpha=0.3)
-            print(f'{file_type} ade', df.ade, yerr)
-        plt.xlabel(' Number of Training Files (Logarithmic Scale)')
-        plt.ylabel('Average Displacement Error (m)')
-        plt.ylim([0, 5])
-        plt.legend()
-        plt.savefig('ade.png', bbox_inches='tight', pad_inches=0.1)
-
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            yerr = df.fde_std.replace(np.nan, 0) / np.sqrt(num_seeds)
-            plt.plot(np.log10(df.num_files),
-                     df.fde,
-                     label=file_type,
-                     color=CB_color_cycle[i])
-            ax = plt.gca()
-            ax.fill_between(np.log10(df.num_files),
-                            df.fde - 2 * yerr,
-                            df.fde + 2 * yerr,
-                            color=CB_color_cycle[i],
-                            alpha=0.3)
-            print(f'{file_type} fde', df.fde, yerr)
-        plt.ylim([4, 10])
-        plt.xlabel(' Number of Training Files (Logarithmic Scale)')
-        plt.ylabel('Final Displacement Error (m)')
-        plt.legend()
-        plt.savefig('fde.png', bbox_inches='tight', pad_inches=0.1)
-        plot_goal_achieved(experiment_names[0], global_step_cutoff)
-
-        # create error by number of expert intersections plots
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            values_num_files = np.unique(df.num_files.values)
-            print(values_num_files)
-            for value in values_num_files:
-                if value != 10000:
-                    continue
-                numpy_arr = df[df.num_files ==
-                               value]['goal_by_intersections'].to_numpy()[0]
-                temp_df = pd.DataFrame(numpy_arr).melt()
-                plt.plot(temp_df.index,
-                         temp_df.value * 100,
-                         label=file_type,
-                         color=CB_color_cycle[i])
-                numpy_arr = df[df.num_files == value][
-                    'goal_by_intersections_stderr'].to_numpy()[0]
-                std_err_df = pd.DataFrame(numpy_arr).melt()
-                ax = plt.gca()
-                ax.fill_between(temp_df.index,
-                                100 * (temp_df.value - 2 * std_err_df.value),
-                                100 * (temp_df.value + 2 * std_err_df.value),
-                                color=CB_color_cycle[i],
-                                alpha=0.3)
-
-        plt.xlabel('Number of intersecting paths')
-        plt.ylabel('Percent Goals Achieved')
-        ax.set_xticks([i for i in range(numpy_arr.shape[-1])])
-        plt.legend()
-        plt.savefig('goal_v_intersection.png',
-                    bbox_inches='tight',
-                    pad_inches=0.1)
-
-        # create error by number of expert intersections plots
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            values_num_files = np.unique(df.num_files.values)
-            for value in values_num_files:
-                if value != 10000:
-                    continue
-                numpy_arr = df[df.num_files ==
-                               value]['collide_by_intersections'].to_numpy()[0]
-                temp_df = pd.DataFrame(numpy_arr).melt()
-                plt.plot(temp_df.index,
-                         temp_df.value * 100,
-                         color=CB_color_cycle[i],
-                         label=file_type)
-                numpy_arr = df[df.num_files == value][
-                    'collide_by_intersections_stderr'].to_numpy()[0]
-                std_err_df = pd.DataFrame(numpy_arr).melt()
-                ax = plt.gca()
-                ax.fill_between(temp_df.index,
-                                100 * (temp_df.value - 2 * std_err_df.value),
-                                100 * (temp_df.value + 2 * std_err_df.value),
-                                color=CB_color_cycle[i],
-                                alpha=0.3)
-        plt.xlabel('Number of Intersecting Paths')
-        plt.ylabel('Percent Collisions')
-        ax.set_xticks([i for i in range(numpy_arr.shape[-1])])
-        plt.legend()
-        plt.savefig('collide_v_intersection.png',
-                    bbox_inches='tight',
-                    pad_inches=0.1)
-
-        # create error by number of vehicles plots
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            values_num_files = np.unique(df.num_files.values)
-            print(values_num_files)
-            for value in values_num_files:
-                if value != 10000:
-                    continue
-                numpy_arr = df[df.num_files ==
-                               value]['goal_by_vehicle_num'].to_numpy()[0]
-                temp_df = pd.DataFrame(numpy_arr).melt()
-                plt.plot(temp_df.index,
-                         temp_df.value * 100,
-                         label=file_type,
-                         color=CB_color_cycle[i])
-                numpy_arr = df[df.num_files == value][
-                    'goal_by_vehicle_num_stderr'].to_numpy()[0]
-                std_err_df = pd.DataFrame(numpy_arr).melt()
-                ax = plt.gca()
-                ax.fill_between(temp_df.index,
-                                100 * (temp_df.value - 2 * std_err_df.value),
-                                100 * (temp_df.value + 2 * std_err_df.value),
-                                color=CB_color_cycle[i],
-                                alpha=0.3)
-                # sns.lineplot(x=temp_df.index, y=temp_df.value * 100)
-        plt.xlabel('Number of Controlled Vehicles')
-        plt.ylabel('Percent Goals Achieved')
-        ax.set_xticks([i for i in range(numpy_arr.shape[-1])])
-        plt.legend()
-        plt.savefig('goal_v_vehicle_num.png',
-                    bbox_inches='tight',
-                    pad_inches=0.1)
-
-        # create error by distance plots
-        plt.figure(dpi=300)
-        for i, (df, file_type) in enumerate(
-                zip(generalization_dfs, ['Train', 'Test'])):
-            values_num_files = np.unique(df.num_files.values)
-            print(values_num_files)
-            for value in values_num_files:
-                if value != 10000:
-                    continue
-                numpy_arr = df[df.num_files ==
-                               value]['goal_by_distance'].to_numpy()[0]
-                temp_df = pd.DataFrame(numpy_arr).melt()
-                plt.plot(temp_df.index,
-                         temp_df.value * 100,
-                         label=file_type,
-                         color=CB_color_cycle[i])
-                numpy_arr = df[df.num_files ==
-                               value]['goal_by_distance_stderr'].to_numpy()[0]
-                std_err_df = pd.DataFrame(numpy_arr).melt()
-                ax = plt.gca()
-                ax.fill_between(temp_df.index,
-                                100 * (temp_df.value - 2 * std_err_df.value),
-                                100 * (temp_df.value + 2 * std_err_df.value),
-                                color=CB_color_cycle[i],
-                                alpha=0.3)
-                # sns.lineplot(x=temp_df.index, y=temp_df.value * 100)
-        plt.xlabel('Starting Distance to Goal')
-        plt.ylabel('Percent Goals Achieved')
-        ax.set_xticks([i for i in range(numpy_arr.shape[-1])])
-        plt.legend()
-        plt.savefig('goal_v_distance.png', bbox_inches='tight', pad_inches=0.1)
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/scripts/paper_plots/generate_scenes.py b/scripts/paper_plots/generate_scenes.py
deleted file mode 100644
index 985942ea..00000000
--- a/scripts/paper_plots/generate_scenes.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Example of how to make movies of Nocturne scenarios."""
-import hydra
-import imageio
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, \
-    get_scenario_dict, set_display_window
-from nocturne import Simulation
-
-
-def get_sim(scenario_file, cfg):
-    """Initialize the scenario."""
-    # load scenario, set vehicles to be expert-controlled
-    cfg['scenario']['allow_non_vehicles'] = False
-    sim = Simulation(scenario_path=str(scenario_file),
-                     config=get_scenario_dict(cfg))
-    for obj in sim.getScenario().getObjectsThatMoved():
-        obj.expert_control = True
-    return sim
-
-
-def make_movie(sim,
-               scenario_fn,
-               output_path='./vid.mp4',
-               dt=0.1,
-               steps=90,
-               fps=10):
-    """Make a movie from the scenario."""
-    scenario = sim.getScenario()
-    movie_frames = []
-    timestep = 0
-    movie_frames.append(scenario_fn(scenario, timestep))
-    for i in range(steps):
-        sim.step(dt)
-        timestep += 1
-        movie_frames.append(scenario_fn(scenario, timestep))
-    movie_frames = np.stack(movie_frames, axis=0)
-    imageio.mimwrite(output_path, movie_frames, fps=fps)
-    print('>', output_path)
-    del sim
-    del movie_frames
-
-
-def make_image(sim, scenario_file, scenario_fn, output_path='./img.png'):
-    """Make a single image from the scenario."""
-    scenario = sim.getScenario()
-    img = scenario_fn(scenario)
-    dpi = 100
-    height, width, depth = img.shape
-    figsize = width / dpi, height / dpi
-    plt.figure(figsize=figsize, dpi=dpi)
-    plt.axis('off')
-    plt.imshow(img)
-    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
-    print('>', output_path)
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def main(cfg):
-    """See file docstring."""
-    set_display_window()
-
-    # files = ['tfrecord-00358-of-01000_{}.json'.format(i) for i in range(500)]
-
-    files = [
-        'tfrecord-00358-of-01000_60.json',  # unprotected turn
-        'tfrecord-00358-of-01000_72.json',  # four way stop
-        'tfrecord-00358-of-01000_257.json',  # crowded four way stop
-        'tfrecord-00358-of-01000_332.json',  # crowded merge road
-        'tfrecord-00358-of-01000_79.json',  # crowded parking lot
-    ]
-    for file in files:
-        file = os.path.join(PROCESSED_TRAIN_NO_TL, file)
-        sim = get_sim(file, cfg)
-        if os.path.exists(file):
-            # image of whole scenario
-            # make_image(
-            #     sim,
-            #     file,
-            #     scenario_fn=lambda scenario: scenario.getImage(
-            #         img_width=2000,
-            #         img_height=2000,
-            #         padding=50.0,
-            #         draw_target_positions=True,
-            #     ),
-            #     output_path=PROJECT_PATH /
-            #     'scripts/paper_plots/figs/scene_{}.png'.format(
-            #         os.path.basename(file)),
-            # )
-
-            veh_index = -3
-            make_image(
-                sim,
-                file,
-                scenario_fn=lambda scenario: scenario.getImage(
-                    img_height=1600,
-                    img_width=1600,
-                    draw_target_positions=True,
-                    padding=0.0,
-                    source=scenario.getVehicles()[veh_index],
-                    view_height=80,
-                    view_width=80,
-                    rotate_with_source=True,
-                ),
-                output_path=PROJECT_PATH /
-                'scripts/paper_plots/figs/cone_original_{}.png'.format(
-                    os.path.basename(file)),
-            )
-            make_image(
-                sim,
-                file,
-                scenario_fn=lambda scenario: scenario.getConeImage(
-                    source=scenario.getVehicles()[veh_index],
-                    view_dist=cfg['subscriber']['view_dist'],
-                    view_angle=cfg['subscriber']['view_angle'],
-                    head_angle=0.0,
-                    img_height=1600,
-                    img_width=1600,
-                    padding=0.0,
-                    draw_target_position=True,
-                ),
-                output_path=PROJECT_PATH /
-                'scripts/paper_plots/figs/cone_{}.png'.format(
-                    os.path.basename(file)),
-            )
-            make_image(
-                sim,
-                file,
-                scenario_fn=lambda scenario: scenario.getFeaturesImage(
-                    source=scenario.getVehicles()[veh_index],
-                    view_dist=cfg['subscriber']['view_dist'],
-                    view_angle=cfg['subscriber']['view_angle'],
-                    head_angle=0.0,
-                    img_height=1600,
-                    img_width=1600,
-                    padding=0.0,
-                    draw_target_position=True,
-                ),
-                output_path=PROJECT_PATH /
-                'scripts/paper_plots/figs/feature_{}.png'.format(
-                    os.path.basename(file)),
-            )
-
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/utils.py b/scripts/utils.py
deleted file mode 100644
index 21be3246..00000000
--- a/scripts/utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Storage for SLURM running utilities."""
-
-
-class Overrides(object):
-    """Utility class used to convert commands into a bash runnable string."""
-
-    def __init__(self):
-        """Initialize class."""
-        self.kvs = dict()
-
-    def add(self, key, values):
-        """Add each of the desired key value pairs into a dict."""
-        value = ','.join(str(v) for v in values)
-        assert key not in self.kvs
-        self.kvs[key] = value
-
-    def cmd(self):
-        """Append the keys together into a command that can be run."""
-        cmd = []
-        for k, v in self.kvs.items():
-            cmd.append(f'{k}={v}')
-        return cmd
diff --git a/scripts/visualization/visualize_waymo_map.py b/scripts/visualization/visualize_waymo_map.py
deleted file mode 100644
index 07f6c191..00000000
--- a/scripts/visualization/visualize_waymo_map.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Plot the text file representation of a protobuf."""
-import matplotlib.patches as mpatches
-import matplotlib.pyplot as plt
-import pprint
-
-pp = pprint.PrettyPrinter()
-
-data = {}
-
-current = data
-file = 'output.txt'
-show_tracks = True
-parent_keys = []
-with open(file, 'r') as f:
-    lines = f.read().split('\n')
-    for line in lines:
-        # print(line)
-        if ":" in line:
-            k, v = [x.strip() for x in line.split(':')]
-            if k in current:
-                current[k].append(v)
-            else:
-                current[k] = [v]
-        elif "{" in line:
-            k = line[:-1].strip()
-            if k not in current:
-                current[k] = []
-            parent_keys.append(k)
-            current[k].append({})
-            current = current[k][-1]
-        elif "}" in line:
-            current = data
-            for k in parent_keys[:-1]:
-                current = current[k][-1]
-            parent_keys = parent_keys[:-1]
-        else:
-            pass
-
-# message Scenario:
-# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/scenario.proto
-print('\nScenario')
-print(data.keys())
-
-# message Track, message ObjectState:
-# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/scenario.proto
-print('\nObjects (vehicles, pedestrians, cyclists..)')
-print(len(data['tracks']))
-print(data['tracks'][0].keys())
-print(len(data['tracks'][0]['states']))
-print(data['tracks'][0]['states'][0].keys())
-
-# message MapFeature:
-# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/map.proto
-print('\nMap (roads, lanes..)')
-print(len(data['map_features']))
-print(data['map_features'][0].keys())
-
-# supported values are '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted'
-fig = plt.figure(figsize=(20, 20))
-
-for mf in data['map_features']:
-    k = list(mf.keys())[1]
-    assert len(mf[k]) == 1
-    v = mf[k][0]
-
-    if k == 'lane':
-        xs = []
-        ys = []
-        for pt in v['polyline']:
-            xs.append(float(pt['x'][0]))
-            ys.append(float(pt['y'][0]))
-        plt.plot(xs, ys, color='cyan', linewidth=1)
-
-    elif k == 'road_line':
-        edge_type = v['type'][0]
-        # linestyle = 'solid' if edge_type == 'TYPE_ROAD_EDGE_BOUNDARY' else 'dashdot'
-        # print(edge_type)
-
-        xs = []
-        ys = []
-        for pt in v['polyline']:
-            xs.append(float(pt['x'][0]))
-            ys.append(float(pt['y'][0]))
-        plt.plot(xs, ys, color='orange')
-
-    elif k == 'road_edge':
-        edge_type = v['type'][0]
-        linestyle = 'solid' if edge_type == 'TYPE_ROAD_EDGE_BOUNDARY' else 'dashdot'
-
-        xs = []
-        ys = []
-        for pt in v['polyline']:
-            xs.append(float(pt['x'][0]))
-            ys.append(float(pt['y'][0]))
-        plt.plot(xs, ys, color='black', linestyle=linestyle)
-
-    elif k == 'stop_sign':
-        pos = v['position'][0]
-        plt.plot(float(pos['x'][0]), float(pos['y'][0]), 'ro')
-
-    elif k == 'crosswalk':
-        xs = []
-        ys = []
-        for pt in v['polygon']:
-            xs.append(float(pt['x'][0]))
-            ys.append(float(pt['y'][0]))
-        plt.plot(xs, ys, color='purple', linestyle=linestyle)
-
-    elif k == 'speed_bump':
-        xs = []
-        ys = []
-        for pt in v['polygon']:
-            xs.append(float(pt['x'][0]))
-            ys.append(float(pt['y'][0]))
-        plt.plot(xs, ys, color='green', linestyle=linestyle)
-
-    else:
-        print('Error with key', k)
-
-if show_tracks:
-    img_arr = []
-
-    from celluloid import Camera
-    camera = Camera(plt.gcf())
-    ax = plt.gca()
-    # in range(len(data['tracks'][0]['states'])):
-    for i in range(20):
-        for object in data['tracks']:
-            if object['states'][i]['valid'][0] != 'false':
-                plt.scatter(float(object['states'][i]['center_x'][0]),
-                            float(object['states'][i]['center_y'][0]),
-                            c='blue',
-                            s=40)
-        # TODO(eugenevinitsky) this is a horrible way of copying over the figure
-        lines = list(ax.get_lines())
-        for obj in lines:
-            plt.plot(obj.get_data()[0], obj.get_data()[1])
-        camera.snap()
-    animation = camera.animate()
-    animation.save('animation.mp4')
-
-patches = []
-patches.append(mpatches.Patch(color='cyan', label='lane_center'))
-patches.append(mpatches.Patch(color='orange', label='road_line'))
-patches.append(mpatches.Patch(color='black', label='road_edge'))
-patches.append(mpatches.Patch(color='red', label='stop_sign'))
-patches.append(mpatches.Patch(color='purple', label='crosswalk'))
-patches.append(mpatches.Patch(color='green', label='speedbump'))
-plt.legend(handles=patches)
-
-plt.savefig(file.split('.')[0] + '.png')
diff --git a/scripts/visualization/waymo_movie.py b/scripts/visualization/waymo_movie.py
deleted file mode 100644
index c20ac6c1..00000000
--- a/scripts/visualization/waymo_movie.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Make a movie from a random file."""
-import os
-
-import hydra
-import imageio
-import matplotlib.pyplot as plt
-import numpy as np
-
-from cfgs.config import PROCESSED_TRAIN_NO_TL, get_scenario_dict, set_display_window
-from nocturne import Simulation
-
-
-@hydra.main(config_path="../../cfgs/", config_name="config")
-def main(cfg):
-    """See file docstring."""
-    set_display_window()
-    _ = plt.figure()
-    files = os.listdir(PROCESSED_TRAIN_NO_TL)
-    file = os.path.join(PROCESSED_TRAIN_NO_TL,
-                        files[np.random.randint(len(files))])
-    sim = Simulation(file, get_scenario_dict(cfg))
-    frames = []
-    scenario = sim.getScenario()
-    for veh in scenario.getVehicles():
-        veh.expert_control = True
-    for i in range(90):
-        img = scenario.getImage(
-            img_width=1600,
-            img_height=1600,
-            draw_target_positions=False,
-            padding=50.0,
-        )
-        frames.append(img)
-        sim.step(0.1)
-
-    movie_frames = np.array(frames)
-    output_path = f'{os.path.basename(file)}.mp4'
-    imageio.mimwrite(output_path, movie_frames, fps=30)
-    print('>', output_path)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index d8306bef..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,63 +0,0 @@
-[metadata]
-name = nocturne
-version = 0.0.1
-description = A data-driven, fast driving simulator for multi-agent coordination under partial observability.
-long_description = file: README.rst # todo
-author = Nathan Lichtle, Eugene Vinitsky, and Xiaomeng Yang
-author_email = nathan.lichtle@berkeley, ... # todo
-url = https://github.com/facebookresearch/nocturne/
-license = MIT
-
-classifiers = # todo
-    Development Status :: 5 - Production/Stable
-    Intended Audience :: Developers
-    Topic :: Software Development :: Libraries :: Python Modules
-    Topic :: Utilities
-    Programming Language :: C++
-    Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.6
-    Programming Language :: Python :: 3.7
-    Programming Language :: Python :: 3.8
-    Programming Language :: Python :: 3.9
-    Programming Language :: Python :: 3.10
-    License :: OSI Approved :: BSD License
-    Programming Language :: Python :: Implementation :: PyPy
-    Programming Language :: Python :: Implementation :: CPython
-    Programming Language :: C++
-    Topic :: Software Development :: Libraries :: Python Modules
-
-keywords = 
-    Driving
-    Simulation
-    Autonomous Vehicles
-    Waymo
-    Reinforcement Learning
-
-# todo
-project_urls =
-    Documentation = https://nocturne.readthedocs.io/
-    Bug Tracker = https://github.com/fb/nocturne/issues
-    Discussions = https://github.com/fb/nocturne/discussions
-    Changelog = https://nocturne.readthedocs.io/en/latest/changelog.html
-    Chat = https://gitter.im/nocturne/Lobby
-
-[options]
-zip_safe = False
-python_requires = >=3.8
-
-[tool:pytest]
-minversion = 6.0
-addopts = -ra -q
-testpaths =
-    tests
-
-[flake8]
-max-line-length = 120
-show_source = True
-exclude = .git, __pycache__, build, docs, _deps, third_party, algos, nocturne/envs/__init__.py, examples/nocturne_functions.py
-
-[pydocstyle]
-inherit = false
-match = .*\.py
-match_dir = ^(?!.git|__pycache__|build|docs|_deps|third_party|algos).*
-convention = numpy

- - - -	- - - -	- - - -
- - Eugene Vinitsky - -	- - Nathan Lichtlé - -	- - Xiaomeng Yang - -