diff --git a/.gitignore b/.gitignore index c6335cf4..1529576e 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,12 @@ dataset/ # configs configs.json + +# pyenv +.python-version + +# poetry +poetry.lock + +# wandb +wandb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..d1117cb4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,66 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: sort-simple-yaml + - id: check-json + - id: check-merge-conflict + - id: check-symlinks + - id: debug-statements + - id: check-added-large-files +- repo: https://github.com/python-poetry/poetry + rev: 1.6.0 + hooks: + - id: poetry-check + - id: poetry-lock + # - id: poetry-publish +- repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black + args: [--line-length, '120'] +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort +- repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + args: [--max-line-length=120, --extend-ignore=E203] +- repo: https://github.com/PyCQA/pydocstyle + rev: 6.3.0 + hooks: + - id: pydocstyle + args: [--convention=numpy] + additional_dependencies: [tomli] +- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks + rev: v2.10.0 + hooks: + - id: pretty-format-toml + args: [--autofix, --no-sort] + - id: pretty-format-yaml + args: [--autofix] +- repo: local + hooks: + - id: pylint + name: pylint + entry: poetry run pylint + language: system + types: [python] + - id: poetry-export-requirements + name: poetry-export-requirements + entry: poetry export --without-hashes --with=main,research -f requirements.txt -o requirements.txt + language: system + types: [python] + pass_filenames: false + - id: poetry-export-requirements-dev + name: poetry-export-requirements-dev + entry: poetry export --without-hashes --only dev -f requirements.txt -o requirements.dev.txt + language: system + types: [python] + pass_filenames: false diff --git a/README.md b/README.md index cafb3bff..7f00d26b 100644 --- a/README.md +++ b/README.md @@ -1,235 +1,156 @@ -# Nocturne +# `nocturne_lab`: fast driving simulator 🧪 + 🚗 -Nocturne is a 2D, partially observed, driving simulator, built in C++ for speed and exported as a Python library. +`nocturne_lab` is a maintained fork of [Nocturne](https://github.com/facebookresearch/nocturne); a 2D, partially observed, driving simulator built in C++. Currently, `nocturne_lab` is used internally at the Emerge lab. You can get started with the intro examples 🏎️💨 [here](https://github.com/Emerge-Lab/nocturne_lab/tree/feature/nocturne_fork_cleanup/examples). -It is currently designed to handle traffic scenarios from the [Waymo Open Dataset](https://github.com/waymo-research/waymo-open-dataset), and with some work could be extended to support different driving datasets. Using the Python library `nocturne`, one is able to train controllers for AVs to solve various tasks from the Waymo dataset, which we provide as a benchmark, then use the tools we offer to evaluate the designed controllers. +## Basic usage -Using this rich data source, Nocturne contains a wide range of scenarios whose solution requires the formation of complex coordination, theory of mind, and handling of partial observability. Below we show replays of the expert data, centered on the light blue agent, with the corresponding view of the agent on the right. - -![Intersection Scene with Obscured View](./docs/readme_files/git_intersection_combined.gif) +```python +from nocturne.envs.base_env import BaseEnv -Nocturne features a rich variety of scenes, ranging from parking lots, to merges, to roundabouts, to unsignalized intersections. +# Initialize an environment +env = BaseEnv(config=env_config) -![Intersection Scene with Obscured View](./docs/readme_files/nocturne_3_by_3_scenes.gif) +# Reset +obs_dict = env.reset() -More videos can be found [here](https://www.nathanlct.com/research/nocturne). +# Get info +agent_ids = [agent_id for agent_id in obs_dict.keys()] +dead_agent_ids = [] -The corresponding paper is available at: [https://arxiv.org/abs/2206.09889](https://arxiv.org/abs/2206.09889). Please cite the paper and not the GitHub repository, using the following citation: +for step in range(1000): -```bibtex -@article{nocturne2022, - author = {Vinitsky, Eugene and Lichtlé, Nathan and Yang, Xiaomeng and Amos, Brandon and Foerster, Jakob}, - journal = {arXiv preprint arXiv:2206.09889}, - title = {{Nocturne: a scalable driving benchmark for bringing multi-agent learning one step closer to the real world}}, - url = {https://arxiv.org/abs/2206.09889}, - year = {2022} -} -``` + # Sample actions + action_dict = { + agent_id: env.action_space.sample() + for agent_id in agent_ids + if agent_id not in dead_agent_ids + } + + # Step in env + obs_dict, rew_dict, done_dict, info_dict = env.step(action_dict) -# Installation + # Update dead agents + for agent_id, is_done in done_dict.items(): + if is_done and agent_id not in dead_agent_ids: + dead_agent_ids.append(agent_id) -**Feel free to [open an issue](https://github.com/facebookresearch/nocturne/issues/new/choose) at any time if you encounter a problem, need some help with installing or using Nocturne, want to ask us any related question, or even propose a new feature. We will be happy to help!** + # Reset if all agents are done + if done_dict["__all__"]: + obs_dict = env.reset() + dead_agent_ids = [] -## Dependencies +# Close environment +env.close() +``` -[CMake](https://cmake.org/) is required to compile the C++ library. +## Implemented algorithms -Run `cmake --version` to see whether CMake is already installed in your environment. If not, refer to the CMake website instructions for installation, or you can use: +| Algorithm | Reference | Code | Compatible with | Notes | +| -------------------------------------- | ---------------------------------------------------------- | ----- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PPO **single-agent** control | [Schulman et al., 2017](https://arxiv.org/pdf/1707.06347.pdf) | [ppo_with_sb3.ipynb](https://github.com/Emerge-Lab/nocturne_lab/blob/feature/nocturne_fork_cleanup/examples/04_ppo_with_sb3.ipynb) | Stable baselines 3 | | +| PPO **multi-agent** control | [Schulman et al., 2017](https://arxiv.org/pdf/1707.06347.pdf) | `#TODO` | Stable baselines 3 | SB3 doesn't support multi-agent environments. Using the `VecEnv`class to treat observations from multiple agents as a set of vectorized single-agent environments. | +| | | | | | +| | | | | | -- `sudo apt-get -y install cmake` (Linux) -- `brew install cmake` (MacOS) +## Installation -### All machines besides OS with Mac M1 chip follow instructions below -Nocturne uses [SFML](https://github.com/SFML/SFML) for drawing and visualization, as well as on [pybind11](https://pybind11.readthedocs.io/en/latest/) for compiling the C++ code as a Python library. +### Requirements -To install SFML: +* Python (>=3.10) -- `sudo apt-get install libsfml-dev` (Linux) -- `brew install sfml` (MacOS) +### Virtual environment +Below different options for setting up a virtual environment are described. Either option works although `pyenv` is recommended. -pybind11 is included as a submodule and will be installed in the next step. +> _Note:_ The virtual environment needs to be **activated each time** before you start working. -### Machines with a Mac M1 chip -Unfortunately if you have a Mac M1 chip you need to ensure that your SFML version is x86_64 instead of arm64; by default brew will install the arm64 variant. The following instructions will help you do this. +#### Option 1: `pyenv` +Create a virtual environment by running: -1. Make sure you have rosetta2 installed. You can do this by running `softwareupdate --install-rosetta` from the command line. -2. Build an x86_64 version of brew (which you alias to brow) using the instructions here: [stackoverflow](https://stackoverflow.com/questions/64951024/how-can-i-run-two-isolated-installations-of-homebrew). -3. Now, run `brow install sfml` -then everything will compile fine. +```shell +pyenv virtualenv 3.10.12 nocturne_lab +``` -## Installing Nocturne +The virtual environment should be activated every time you start a new shell session before running subsequent commands: -Start by cloning the repo: +```shell +pyenv shell nocturne_lab +``` -```bash -git clone https://github.com/facebookresearch/nocturne.git -cd nocturne +Fortunately, `pyenv` provides a way to assign a virtual environment to a directory. To set it for this project, run: +```shell +pyenv local nocturne_lab ``` -Then run the following to install git submodules: +#### Option 2: `conda` +Create a conda environment by running: -```bash -git submodule sync -git submodule update --init --recursive +```shell +conda env create -f ./environment.yml ``` -If you are using [Conda](https://docs.conda.io/en/latest/) (recommended), you can instantiate an environment and install Nocturne into it with the following: - -```bash -# create the environment and install the dependencies -conda env create -f environment.yml +This creates a conda environment using Python 3.10 called `nocturne_lab`. -# activate the environment where the Python library should be installed -conda activate nocturne +To activate the virtual environment, run: -# run the C++ build and install Nocturne into the simulation environment -python setup.py develop +```shell +conda activate nocturne_lab ``` -If you are not using Conda, simply run the last command to build and install Nocturne at your default Python path. +#### Option 3: `venv` +Create a virtual environment by running: -You should then be all set to use the library. To find an example of constructing a Gym environment, using a basic Simulation, or rendering scenes, go to -```examples``` and run respectively, ```create_env.py```, ```nocturne_functions.py``` or ```rendering.py```. +```shell +python -m venv .venv +``` -Python tests can be run with `pytest`. +The virtual environment should be activated every time you start a new shell session before running the subsequent command: -
-Click here for a list of common installation errors +```shell +source .venv/bin/activate +``` -### pybind11 installation errors +### Dependencies -If you are getting errors with pybind11, install it directly in your conda environment (eg. `conda install -c conda-forge pybind11` or `pip install pybind11`, cf. https://pybind11.readthedocs.io/en/latest/installing.html for more info). -
+`poetry` is used to manage the project and its dependencies. Start by installing `poetry` in your virtual environment: -## Dataset +```shell +pip install poetry +``` + +Before installing the package, you first need to synchronise and update the git submodules by running: -### Downloading the dataset -Two versions of the dataset are available: -- a mini-one that is about 1 GB and consists of 1000 training files and 100 validation / test files at: [Dropbox Link](https://www.dropbox.com/sh/8mxue9rdoizen3h/AADGRrHYBb86pZvDnHplDGvXa?dl=0). -- the full dataset (150 GB) and consists of 134453 training files and 12205 validation / test files: [Dropbox Link](https://www.dropbox.com/sh/wv75pjd8phxizj3/AABfNPWfjQdoTWvdVxsAjUL_a?dl=0) +```shell +# Synchronise and update git submodules +git submodule sync +git submodule update --init --recursive +``` -Place the dataset in a folder of your choosing, unzip the folders inside of it, and change the DATA_FOLDER in ```cfgs/config.py``` to point to where you have -downloaded it. +Now install the package by running: -### (Optional) Rebuilding the Dataset -**Warning** this step is not necessary, the dataset has already been downloaded in the prior step. This is only needed if you want to rebuild the dataset from scratch. +```shell +poetry install +``` -First, go to [Waymo Open](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb) and follow the instructions to install the required packages. This may require additional steps if you are not on a Linux machine. +> _Note:_ Under the hood the `nocturne` package uses the `nocturne_cpp` Python package that wraps the Nocturne C++ code base and provides bindings for Python to interact with the C++ code using `pybind11`. -If you do want to rebuild the dataset, download the Waymo Motion version 1.1 files. -- Open ```cfgs/config.py``` and change ```DATA_FOLDER``` to be the path to your Waymo motion files -- Run ```python scripts/json_generation/run_waymo_constructor.py --parallel --no_tl --all_files --datatype train valid```. This will construct, in parallel, a dataset of all the train and validation files in the waymo motion data. It should take on the order of 5 minutes with 20 CPUs. If you want to include traffic lights scenes, remove the ```--no_tl``` flag. -- To ensure that only files that have a guaranteed solution are included (for example, that there are no files where the agent goal is across an apparently uncrossable road edge), run ```python scripts/json_generation/make_solvable_files.py --datatype train valid```. -## C++ build instructions +### Development setup +To configure the development setup, run: +```shell +# Install poetry dev dependencies +poetry install --only=dev -If you want to build the C++ library independently of the Python one, run the following: +# Install pre-commit (for flake8, isort, black, etc.) +pre-commit install -```bash -cd nocturne/cpp -mkdir build -cd build -cmake .. -make -make install +# Optional: Install poetry docs dependencies +poetry install --only=docs ``` -Subsequently, the C++ tests can be ran with `./tests/nocturne_test` from within the `nocturne/cpp/build` directory. - -# Usage - -To get a sense of available functionality in Nocturne, we have provided a few examples in the `examples` folder of how to construct the env (`create_env.py`), how to construct particular observations (`nocturne_functions.py`), and how to render results (`rendering.py`). - -**Note**: by default, Nocturne will log to ```$NOCTURNE_LOG_DIR``` which is set in ```nocturne/__init__.py``` and defaults to ```/logs```. If you'd like to log somewhere else, go to ```nocturne/__init__.py``` and change ```$NOCTURNE_LOG_DIR``` to a different path. - -The following goes over how to use training algorithms using the Nocturne environment. - -## Running the RL algorithms -Nocturne comes shipped with a default Gym environment in ```nocturne/envs/base_env.py```. Atop this, we build integration for a few popular RL libraries. - -Nocturne by default comes with support for three versions of Proximal Policy Optimization: -1. Sample Factory, a high throughput asynchronous PPO implementation (https://github.com/alex-petrenko/sample-factory) -2. RLlib's PPO (https://github.com/ray-project/ray/tree/master/rllib) -3. Multi-Agent PPO from (https://github.com/marlbenchmark/on-policy) -Each algorithm is in its corresponding folder in examples and has a corresponding config file in cfgs/ - -**Warning:** only the Sample Factory code has been extensively swept and tested. The default hyperparameters in there -should work for training the agents from the corresponding paper. The other versions are provided for convenience -but are not guaranteed to train a performant agent with the current hyperparameter settings. - -### Important hyperparameters to be aware of -There are a few key hyperparameters that we expect users to care quite a bit about. Each of these can be toggled by adding -```++=``` to the run command. -- ```num_files```: this controls how many training scenarios are used. Set to -1 to use all of them. -- ```max_num_vehicles```: this controls the maximum number of controllable agents in a scenario. If there are more than ```max_num_vehicles``` controllable agents in the scene, we sample ```max_num_vehicles``` randomly from them and set the remainder to be experts. If you want to ensure that all agents are controllable, simply pick a large number like 100. - -### Running Sample Factory -Files from Sample Factory can be run from examples/sample_factory_files and should work by default by running -```python examples/sample_factory_files/run_sample_factory.py algorithm=APPO``` -Additional config options for hyperparameters can be found in the config file. - -Once you have a trained checkpoint, you can visualize the results and make a movie of them by running ```python examples/sample_factory_files/visualize_sample_factory.py ```. - -*Warning*: because of how the algorithm is configured, Sample Factory works best with a fixed number of agents -operating on a fixed horizon. To enable this, we use the config parameter ```max_num_vehicles``` which initializes the environment with only scenes that have fewer controllable agents than ```max_num_vehicles```. Additionally, if there are fewer than ```max_num_vehicles``` in the scene we add dummy agents that receive a vector of -1 at all timesteps. When a vehicle exits the scene we continue providing it a vector of -1 as an observation and a reward of 0. - -### Running RLlib -Files from RLlib examples can be run from examples/rllib_files and should work by default by running -```python examples/rllib_files/run_rllib.py``` - -### Running on-policy PPO -Files from [MAPPO](https://github.com/marlbenchmark/on-policy) examples can be run from examples/rllib_files and should work by default by running -```python examples/on_policy_files/nocturne_runner.py algorithm=ppo``` - -## Running the IL Algorithms -Nocturne comes with a baseline implementation of behavioral cloning and a corresponding -DataLoader. This can be run via ```python examples/imitation_learning/train.py```. - -# Contributors - - - - - - - - - - - - - - -
- - Eugene Vinitsky - - - - Nathan Lichtlé - - - - Xiaomeng Yang - -
- - Eugene Vinitsky - - - - Nathan Lichtlé - - - - Xiaomeng Yang - -
- -# License - -The majority of Nocturne is licensed under the MIT license, however portions of the project are available under separate license terms. The Waymo Motion Dataset License can be found at https://waymo.com/open/terms/. +## Ongoing work + +Here is a list of features that we are developing: + +- @Daphne: Support for SB3's PPO algorithm with multi-agent control +- @Alex: Logging and unit testing +- @Tiyas: Random resets diff --git a/algos/ppo/__init__.py b/algos/ppo/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/algos/ppo/base_runner.py b/algos/ppo/base_runner.py deleted file mode 100644 index e4656b04..00000000 --- a/algos/ppo/base_runner.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import wandb -import os -import numpy as np -import torch -from tensorboardX import SummaryWriter -from algos.ppo.utils.shared_buffer import SharedReplayBuffer - - -def _t2n(x): - """Convert torch tensor to a numpy array.""" - return x.detach().cpu().numpy() - - -class Runner(object): - """ - Base class for training recurrent policies. - :param config: (dict) Config dictionary containing parameters for training. - """ - - def __init__(self, config): - - self.all_args = config['cfg.algo'] - self.envs = config['envs'] - self.eval_envs = config['eval_envs'] - self.device = config['device'] - self.num_agents = config['num_agents'] - if config.__contains__("render_envs"): - self.render_envs = config['render_envs'] - - # parameters - # self.env_name = self.all_args.env_name - self.algorithm_name = self.all_args.algorithm_name - self.experiment_name = self.all_args.experiment - self.use_centralized_V = self.all_args.use_centralized_V - self.use_obs_instead_of_state = self.all_args.use_obs_instead_of_state - self.num_env_steps = self.all_args.num_env_steps - self.episode_length = self.all_args.episode_length - # self.episodes_per_thread = self.all_args.episodes_per_thread - self.n_rollout_threads = self.all_args.n_rollout_threads - self.n_eval_rollout_threads = self.all_args.n_eval_rollout_threads - self.n_render_rollout_threads = self.all_args.n_render_rollout_threads - self.use_linear_lr_decay = self.all_args.use_linear_lr_decay - self.hidden_size = self.all_args.hidden_size - self.use_wandb = self.all_args.wandb - self.use_render = self.all_args.use_render - self.recurrent_N = self.all_args.recurrent_N - - # interval - self.save_interval = self.all_args.save_interval - self.use_eval = self.all_args.use_eval - self.eval_interval = self.all_args.eval_interval - self.log_interval = self.all_args.log_interval - - # dir - self.model_dir = self.all_args.model_dir - - if self.use_wandb: - self.save_dir = str(wandb.run.dir) - self.run_dir = str(wandb.run.dir) - else: - self.run_dir = config["logdir"] - self.log_dir = str(self.run_dir / 'logs') - if not os.path.exists(self.log_dir): - os.makedirs(self.log_dir) - self.writter = SummaryWriter(self.log_dir) - self.save_dir = str(self.run_dir / 'models') - if not os.path.exists(self.save_dir): - os.makedirs(self.save_dir) - - from algos.ppo.r_mappo.r_mappo import R_MAPPO as TrainAlgo - from algos.ppo.r_mappo.algorithm.rMAPPOPolicy import R_MAPPOPolicy as Policy - share_observation_space = self.envs.share_observation_space[ - 0] if self.use_centralized_V else self.envs.observation_space[0] - - # policy network - self.policy = Policy(self.all_args, - self.envs.observation_space[0], - share_observation_space, - self.envs.action_space[0], - device=self.device) - - if self.model_dir is not None: - self.restore() - - # algorithm - self.trainer = TrainAlgo(self.all_args, - self.policy, - device=self.device) - - # buffer - self.buffer = SharedReplayBuffer(self.all_args, self.num_agents, - self.envs.observation_space[0], - share_observation_space, - self.envs.action_space[0]) - - def run(self): - """Collect training data, perform training updates, and evaluate policy.""" - raise NotImplementedError - - def warmup(self): - """Collect warmup pre-training data.""" - raise NotImplementedError - - def collect(self, step): - """Collect rollouts for training.""" - raise NotImplementedError - - def insert(self, data): - """ - Insert data into buffer. - :param data: (Tuple) data to insert into training buffer. - """ - raise NotImplementedError - - @torch.no_grad() - def compute(self): - """Calculate returns for the collected data.""" - self.trainer.prep_rollout() - next_values = self.trainer.policy.get_values( - np.concatenate(self.buffer.share_obs[-1]), - np.concatenate(self.buffer.rnn_states_critic[-1]), - np.concatenate(self.buffer.masks[-1])) - next_values = np.array( - np.split(_t2n(next_values), self.n_rollout_threads)) - self.buffer.compute_returns(next_values, self.trainer.value_normalizer) - - def train(self): - """Train policies with data in buffer. """ - self.trainer.prep_training() - train_infos = self.trainer.train(self.buffer) - self.buffer.after_update() - return train_infos - - def save(self): - """Save policy's actor and critic networks.""" - policy_actor = self.trainer.policy.actor - torch.save(policy_actor.state_dict(), str(self.save_dir) + "/actor.pt") - policy_critic = self.trainer.policy.critic - torch.save(policy_critic.state_dict(), - str(self.save_dir) + "/critic.pt") - - def restore(self): - """Restore policy's networks from a saved model.""" - policy_actor_state_dict = torch.load(str(self.model_dir) + '/actor.pt') - self.policy.actor.load_state_dict(policy_actor_state_dict) - if not self.all_args.use_render: - policy_critic_state_dict = torch.load( - str(self.model_dir) + '/critic.pt') - self.policy.critic.load_state_dict(policy_critic_state_dict) - - def log_train(self, train_infos, total_num_steps): - """ - Log training info. - :param train_infos: (dict) information about training update. - :param total_num_steps: (int) total number of training env steps. - """ - for k, v in train_infos.items(): - if self.use_wandb: - wandb.log({k: v}, step=total_num_steps) - else: - self.writter.add_scalars(k, {k: v}, total_num_steps) - - def log_env(self, env_infos, total_num_steps): - """ - Log env info. - :param env_infos: (dict) information about env state. - :param total_num_steps: (int) total number of training env steps. - """ - for k, v in env_infos.items(): - if len(v) > 0: - if self.use_wandb: - wandb.log({k: np.mean(v)}, step=total_num_steps) - else: - self.writter.add_scalars(k, {k: np.mean(v)}, - total_num_steps) diff --git a/algos/ppo/env_wrappers.py b/algos/ppo/env_wrappers.py deleted file mode 100644 index eb0191d8..00000000 --- a/algos/ppo/env_wrappers.py +++ /dev/null @@ -1,867 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -""" -Modified from OpenAI Baselines code to work with multi-agent envs -""" -import numpy as np -import torch -from multiprocessing import Process, Pipe -from abc import ABC, abstractmethod -from algos.ppo.utils.util import tile_images - - -class CloudpickleWrapper(object): - """ - Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle) - """ - - def __init__(self, x): - self.x = x - - def __getstate__(self): - import cloudpickle - return cloudpickle.dumps(self.x) - - def __setstate__(self, ob): - import pickle - self.x = pickle.loads(ob) - - -class ShareVecEnv(ABC): - """ - An abstract asynchronous, vectorized environment. - Used to batch data from multiple copies of an environment, so that - each observation becomes an batch of observations, and expected action is a batch of actions to - be applied per-environment. - """ - closed = False - viewer = None - - metadata = {'render.modes': ['human', 'rgb_array']} - - def __init__(self, num_envs, observation_space, share_observation_space, - action_space): - self.num_envs = num_envs - self.observation_space = observation_space - self.share_observation_space = share_observation_space - self.action_space = action_space - - @abstractmethod - def reset(self): - """ - Reset all the environments and return an array of - observations, or a dict of observation arrays. - If step_async is still doing work, that work will - be cancelled and step_wait() should not be called - until step_async() is invoked again. - """ - pass - - @abstractmethod - def step_async(self, actions): - """ - Tell all the environments to start taking a step - with the given actions. - Call step_wait() to get the results of the step. - You should not call this if a step_async run is - already pending. - """ - pass - - @abstractmethod - def step_wait(self): - """ - Wait for the step taken with step_async(). - Returns (obs, rews, dones, infos): - - obs: an array of observations, or a dict of - arrays of observations. - - rews: an array of rewards - - dones: an array of "episode done" booleans - - infos: a sequence of info objects - """ - pass - - def close_extras(self): - """ - Clean up the extra resources, beyond what's in this base class. - Only runs when not self.closed. - """ - pass - - def close(self): - if self.closed: - return - if self.viewer is not None: - self.viewer.close() - self.close_extras() - self.closed = True - - def step(self, actions): - """ - Step the environments synchronously. - This is available for backwards compatibility. - """ - self.step_async(actions) - return self.step_wait() - - def render(self, mode='human'): - imgs = self.get_images() - bigimg = tile_images(imgs) - if mode == 'human': - self.get_viewer().imshow(bigimg) - return self.get_viewer().isopen - elif mode == 'rgb_array': - return bigimg - else: - raise NotImplementedError - - def get_images(self): - """ - Return RGB images from each environment - """ - raise NotImplementedError - - @property - def unwrapped(self): - if isinstance(self, VecEnvWrapper): - return self.venv.unwrapped - else: - return self - - def get_viewer(self): - if self.viewer is None: - from gym.envs.classic_control import rendering - self.viewer = rendering.SimpleImageViewer() - return self.viewer - - -def worker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, reward, done, info = env.step(data) - if 'bool' in done.__class__.__name__: - if done: - ob = env.reset() - else: - if np.all(done): - ob = env.reset() - - remote.send((ob, reward, done, info)) - elif cmd == 'reset': - ob = env.reset() - remote.send((ob)) - elif cmd == 'render': - if data == "rgb_array": - fr = env.render(mode=data) - remote.send(fr) - elif data == "human": - env.render(mode=data) - elif cmd == 'reset_task': - ob = env.reset_task() - remote.send(ob) - elif cmd == 'close': - env.close() - remote.close() - break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.share_observation_space, - env.action_space)) - else: - raise NotImplementedError - - -class GuardSubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=worker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = False # could cause zombie process - p.start() - for remote in self.work_remotes: - remote.close() - - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self): - for remote in self.remotes: - remote.send(('reset', None)) - obs = [remote.recv() for remote in self.remotes] - return np.stack(obs) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - -class SubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=worker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self): - for remote in self.remotes: - remote.send(('reset', None)) - obs = [remote.recv() for remote in self.remotes] - return np.stack(obs) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - def render(self, mode="rgb_array"): - for remote in self.remotes: - remote.send(('render', mode)) - if mode == "rgb_array": - frame = [remote.recv() for remote in self.remotes] - return np.stack(frame) - - -def shareworker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, s_ob, reward, done, info, available_actions = env.step(data) - if 'bool' in done.__class__.__name__: - if done: - ob, s_ob, available_actions = env.reset() - else: - if np.all(done): - ob, s_ob, available_actions = env.reset() - - remote.send((ob, s_ob, reward, done, info, available_actions)) - elif cmd == 'reset': - ob, s_ob, available_actions = env.reset() - remote.send((ob, s_ob, available_actions)) - elif cmd == 'reset_task': - ob = env.reset_task() - remote.send(ob) - elif cmd == 'render': - if data == "rgb_array": - fr = env.render(mode=data) - remote.send(fr) - elif data == "human": - env.render(mode=data) - elif cmd == 'close': - env.close() - remote.close() - break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.share_observation_space, - env.action_space)) - elif cmd == 'render_vulnerability': - fr = env.render_vulnerability(data) - remote.send((fr)) - else: - raise NotImplementedError - - -class ShareSubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=shareworker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, share_obs, rews, dones, infos, available_actions = zip(*results) - return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack( - dones), infos, np.stack(available_actions) - - def reset(self): - for remote in self.remotes: - remote.send(('reset', None)) - results = [remote.recv() for remote in self.remotes] - obs, share_obs, available_actions = zip(*results) - return np.stack(obs), np.stack(share_obs), np.stack(available_actions) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - -def choosesimpleworker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, reward, done, info = env.step(data) - remote.send((ob, reward, done, info)) - elif cmd == 'reset': - ob = env.reset(data) - remote.send((ob)) - elif cmd == 'reset_task': - ob = env.reset_task() - remote.send(ob) - elif cmd == 'close': - env.close() - remote.close() - break - elif cmd == 'render': - if data == "rgb_array": - fr = env.render(mode=data) - remote.send(fr) - elif data == "human": - env.render(mode=data) - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.share_observation_space, - env.action_space)) - else: - raise NotImplementedError - - -class ChooseSimpleSubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=choosesimpleworker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self, reset_choose): - for remote, choose in zip(self.remotes, reset_choose): - remote.send(('reset', choose)) - obs = [remote.recv() for remote in self.remotes] - return np.stack(obs) - - def render(self, mode="rgb_array"): - for remote in self.remotes: - remote.send(('render', mode)) - if mode == "rgb_array": - frame = [remote.recv() for remote in self.remotes] - return np.stack(frame) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - -def chooseworker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, s_ob, reward, done, info, available_actions = env.step(data) - remote.send((ob, s_ob, reward, done, info, available_actions)) - elif cmd == 'reset': - ob, s_ob, available_actions = env.reset(data) - remote.send((ob, s_ob, available_actions)) - elif cmd == 'reset_task': - ob = env.reset_task() - remote.send(ob) - elif cmd == 'close': - env.close() - remote.close() - break - elif cmd == 'render': - remote.send(env.render(mode='rgb_array')) - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.share_observation_space, - env.action_space)) - else: - raise NotImplementedError - - -class ChooseSubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=chooseworker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, share_obs, rews, dones, infos, available_actions = zip(*results) - return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack( - dones), infos, np.stack(available_actions) - - def reset(self, reset_choose): - for remote, choose in zip(self.remotes, reset_choose): - remote.send(('reset', choose)) - results = [remote.recv() for remote in self.remotes] - obs, share_obs, available_actions = zip(*results) - return np.stack(obs), np.stack(share_obs), np.stack(available_actions) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - -def chooseguardworker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, reward, done, info = env.step(data) - remote.send((ob, reward, done, info)) - elif cmd == 'reset': - ob = env.reset(data) - remote.send((ob)) - elif cmd == 'reset_task': - ob = env.reset_task() - remote.send(ob) - elif cmd == 'close': - env.close() - remote.close() - break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.share_observation_space, - env.action_space)) - else: - raise NotImplementedError - - -class ChooseGuardSubprocVecEnv(ShareVecEnv): - - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [ - Process(target=chooseguardworker, - args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, - env_fn) in zip(self.work_remotes, self.remotes, env_fns) - ] - for p in self.ps: - p.daemon = False # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - self.remotes[0].send(('get_spaces', None)) - observation_space, share_observation_space, action_space = self.remotes[ - 0].recv() - ShareVecEnv.__init__(self, len(env_fns), observation_space, - share_observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self, reset_choose): - for remote, choose in zip(self.remotes, reset_choose): - remote.send(('reset', choose)) - obs = [remote.recv() for remote in self.remotes] - return np.stack(obs) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - -# single env -class DummyVecEnv(ShareVecEnv): - - def __init__(self, env_fns): - self.envs = [fn() for fn in env_fns] - env = self.envs[0] - ShareVecEnv.__init__(self, len(env_fns), env.observation_space, - env.share_observation_space, env.action_space) - self.actions = None - - def step_async(self, actions): - self.actions = actions - - def step_wait(self): - results = [env.step(a) for (a, env) in zip(self.actions, self.envs)] - # TODO(eugenevinitsky) remove this - obs, rews, dones, infos = map(np.array, zip(*results)) - - for (i, done) in enumerate(dones): - if 'bool' in done.__class__.__name__: - if done: - obs[i] = self.envs[i].reset() - else: - if np.all(done): - obs[i] = self.envs[i].reset() - - self.actions = None - return obs, rews, dones, infos - - def reset(self): - obs = [env.reset() for env in self.envs] - return np.array(obs) - - def close(self): - for env in self.envs: - env.close() - - def render(self, mode="human"): - if mode == "rgb_array": - return np.array([env.render(mode=mode) for env in self.envs]) - elif mode == "human": - for env in self.envs: - env.render(mode=mode) - else: - raise NotImplementedError - - -class ShareDummyVecEnv(ShareVecEnv): - - def __init__(self, env_fns): - self.envs = [fn() for fn in env_fns] - env = self.envs[0] - ShareVecEnv.__init__(self, len(env_fns), env.observation_space, - env.share_observation_space, env.action_space) - self.actions = None - - def step_async(self, actions): - self.actions = actions - - def step_wait(self): - results = [env.step(a) for (a, env) in zip(self.actions, self.envs)] - obs, share_obs, rews, dones, infos, available_actions = map( - np.array, zip(*results)) - - for (i, done) in enumerate(dones): - if 'bool' in done.__class__.__name__: - if done: - obs[i], share_obs[i], available_actions[i] = self.envs[ - i].reset() - else: - if np.all(done): - obs[i], share_obs[i], available_actions[i] = self.envs[ - i].reset() - self.actions = None - - return obs, share_obs, rews, dones, infos, available_actions - - def reset(self): - results = [env.reset() for env in self.envs] - obs, share_obs, available_actions = map(np.array, zip(*results)) - return obs, share_obs, available_actions - - def close(self): - for env in self.envs: - env.close() - - def render(self, mode="human"): - if mode == "rgb_array": - return np.array([env.render(mode=mode) for env in self.envs]) - elif mode == "human": - for env in self.envs: - env.render(mode=mode) - else: - raise NotImplementedError - - -class ChooseDummyVecEnv(ShareVecEnv): - - def __init__(self, env_fns): - self.envs = [fn() for fn in env_fns] - env = self.envs[0] - ShareVecEnv.__init__(self, len(env_fns), env.observation_space, - env.share_observation_space, env.action_space) - self.actions = None - - def step_async(self, actions): - self.actions = actions - - def step_wait(self): - results = [env.step(a) for (a, env) in zip(self.actions, self.envs)] - obs, share_obs, rews, dones, infos, available_actions = map( - np.array, zip(*results)) - self.actions = None - return obs, share_obs, rews, dones, infos, available_actions - - def reset(self, reset_choose): - results = [ - env.reset(choose) for (env, choose) in zip(self.envs, reset_choose) - ] - obs, share_obs, available_actions = map(np.array, zip(*results)) - return obs, share_obs, available_actions - - def close(self): - for env in self.envs: - env.close() - - def render(self, mode="human"): - if mode == "rgb_array": - return np.array([env.render(mode=mode) for env in self.envs]) - elif mode == "human": - for env in self.envs: - env.render(mode=mode) - else: - raise NotImplementedError - - -class ChooseSimpleDummyVecEnv(ShareVecEnv): - - def __init__(self, env_fns): - self.envs = [fn() for fn in env_fns] - env = self.envs[0] - ShareVecEnv.__init__(self, len(env_fns), env.observation_space, - env.share_observation_space, env.action_space) - self.actions = None - - def step_async(self, actions): - self.actions = actions - - def step_wait(self): - results = [env.step(a) for (a, env) in zip(self.actions, self.envs)] - obs, rews, dones, infos = map(np.array, zip(*results)) - self.actions = None - return obs, rews, dones, infos - - def reset(self, reset_choose): - obs = [ - env.reset(choose) for (env, choose) in zip(self.envs, reset_choose) - ] - return np.array(obs) - - def close(self): - for env in self.envs: - env.close() - - def render(self, mode="human"): - if mode == "rgb_array": - return np.array([env.render(mode=mode) for env in self.envs]) - elif mode == "human": - for env in self.envs: - env.render(mode=mode) - else: - raise NotImplementedError diff --git a/algos/ppo/ppo_utils/act.py b/algos/ppo/ppo_utils/act.py deleted file mode 100644 index 387c9b3e..00000000 --- a/algos/ppo/ppo_utils/act.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -from .distributions import Bernoulli, Categorical, DiagGaussian -import torch -import torch.nn as nn - - -class ACTLayer(nn.Module): - """ - MLP Module to compute actions. - :param action_space: (gym.Space) action space. - :param inputs_dim: (int) dimension of network input. - :param use_orthogonal: (bool) whether to use orthogonal initialization. - :param gain: (float) gain of the output layer of the network. - """ - - def __init__(self, action_space, inputs_dim, use_orthogonal, gain, device): - super(ACTLayer, self).__init__() - self.mixed_action = False - self.multi_discrete = False - - if action_space.__class__.__name__ == "Discrete": - action_dim = action_space.n - self.action_out = Categorical(inputs_dim, action_dim, - use_orthogonal, gain) - elif action_space.__class__.__name__ == "Box": - action_dim = action_space.shape[0] - self.action_out = DiagGaussian(inputs_dim, action_dim, - use_orthogonal, gain, device) - elif action_space.__class__.__name__ == "MultiBinary": - action_dim = action_space.shape[0] - self.action_out = Bernoulli(inputs_dim, action_dim, use_orthogonal, - gain) - elif action_space.__class__.__name__ == "MultiDiscrete": - self.multi_discrete = True - action_dims = action_space.high - action_space.low + 1 - self.action_outs = [] - for action_dim in action_dims: - self.action_outs.append( - Categorical(inputs_dim, action_dim, use_orthogonal, gain)) - self.action_outs = nn.ModuleList(self.action_outs) - else: # discrete + continous - self.mixed_action = True - continous_dim = action_space[0].shape[0] - discrete_dim = action_space[1].n - self.action_outs = nn.ModuleList([ - DiagGaussian(inputs_dim, continous_dim, use_orthogonal, gain), - Categorical(inputs_dim, discrete_dim, use_orthogonal, gain) - ]) - - self.to(device) - - def forward(self, x, available_actions=None, deterministic=False): - """ - Compute actions and action logprobs from given input. - :param x: (torch.Tensor) input to network. - :param available_actions: (torch.Tensor) denotes which actions are available to agent - (if None, all actions available) - :param deterministic: (bool) whether to sample from action distribution or return the mode. - - :return actions: (torch.Tensor) actions to take. - :return action_log_probs: (torch.Tensor) log probabilities of taken actions. - """ - if self.mixed_action: - actions = [] - action_log_probs = [] - for action_out in self.action_outs: - action_logit = action_out(x) - action = action_logit.mode( - ) if deterministic else action_logit.sample() - action_log_prob = action_logit.log_probs(action) - actions.append(action.float()) - action_log_probs.append(action_log_prob) - - actions = torch.cat(actions, -1) - action_log_probs = torch.sum(torch.cat(action_log_probs, -1), - -1, - keepdim=True) - - elif self.multi_discrete: - actions = [] - action_log_probs = [] - for action_out in self.action_outs: - action_logit = action_out(x) - action = action_logit.mode( - ) if deterministic else action_logit.sample() - action_log_prob = action_logit.log_probs(action) - actions.append(action) - action_log_probs.append(action_log_prob) - - actions = torch.cat(actions, -1) - action_log_probs = torch.cat(action_log_probs, -1) - - else: - action_logits = self.action_out(x) - actions = action_logits.mode( - ) if deterministic else action_logits.sample() - action_log_probs = action_logits.log_probs(actions) - - return actions, action_log_probs - - def get_probs(self, x, available_actions=None): - """ - Compute action probabilities from inputs. - :param x: (torch.Tensor) input to network. - :param available_actions: (torch.Tensor) denotes which actions are available to agent - (if None, all actions available) - - :return action_probs: (torch.Tensor) - """ - if self.mixed_action or self.multi_discrete: - action_probs = [] - for action_out in self.action_outs: - action_logit = action_out(x) - action_prob = action_logit.probs - action_probs.append(action_prob) - action_probs = torch.cat(action_probs, -1) - else: - action_logits = self.action_out(x, available_actions) - action_probs = action_logits.probs - - return action_probs - - def evaluate_actions(self, - x, - action, - available_actions=None, - active_masks=None): - """ - Compute log probability and entropy of given actions. - :param x: (torch.Tensor) input to network. - :param action: (torch.Tensor) actions whose entropy and log probability to evaluate. - :param available_actions: (torch.Tensor) denotes which actions are available to agent - (if None, all actions available) - :param active_masks: (torch.Tensor) denotes whether an agent is active or dead. - - :return action_log_probs: (torch.Tensor) log probabilities of the input actions. - :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs. - """ - if self.mixed_action: - a, b = action.split((2, 1), -1) - b = b.long() - action = [a, b] - action_log_probs = [] - dist_entropy = [] - for action_out, act in zip(self.action_outs, action): - action_logit = action_out(x) - action_log_probs.append(action_logit.log_probs(act)) - if active_masks is not None: - if len(action_logit.entropy().shape) == len( - active_masks.shape): - dist_entropy.append( - (action_logit.entropy() * active_masks).sum() / - active_masks.sum()) - else: - dist_entropy.append((action_logit.entropy() * - active_masks.squeeze(-1)).sum() / - active_masks.sum()) - else: - dist_entropy.append(action_logit.entropy().mean()) - - action_log_probs = torch.sum(torch.cat(action_log_probs, -1), - -1, - keepdim=True) - dist_entropy = dist_entropy[0] / 2.0 + dist_entropy[ - 1] / 0.98 #! dosen't make sense - - elif self.multi_discrete: - action = torch.transpose(action, 0, 1) - action_log_probs = [] - dist_entropy = [] - for action_out, act in zip(self.action_outs, action): - action_logit = action_out(x) - action_log_probs.append(action_logit.log_probs(act)) - if active_masks is not None: - dist_entropy.append( - (action_logit.entropy() * - active_masks.squeeze(-1)).sum() / active_masks.sum()) - else: - dist_entropy.append(action_logit.entropy().mean()) - - action_log_probs = torch.cat(action_log_probs, - -1) # ! could be wrong - dist_entropy = torch.tensor(dist_entropy).mean() - - else: - action_logits = self.action_out(x, available_actions) - action_log_probs = action_logits.log_probs(action) - if active_masks is not None: - dist_entropy = ( - action_logits.entropy() * - active_masks.squeeze(-1)).sum() / active_masks.sum() - else: - dist_entropy = action_logits.entropy().mean() - - return action_log_probs, dist_entropy diff --git a/algos/ppo/ppo_utils/cnn.py b/algos/ppo/ppo_utils/cnn.py deleted file mode 100644 index 95fb8218..00000000 --- a/algos/ppo/ppo_utils/cnn.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -from torchvision import transforms -import torch.nn as nn -from .util import init -"""CNN Modules and utils.""" - - -class Flatten(nn.Module): - - def forward(self, x): - return x.view(x.size(0), -1) - - -class CNNLayer(nn.Module): - - def __init__(self, - obs_shape, - hidden_size, - use_orthogonal, - use_ReLU, - kernel_size=3, - stride=1): - super(CNNLayer, self).__init__() - - active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][use_orthogonal] - gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) - - self.resize = transforms.Resize(84) - - def init_(m): - return init(m, - init_method, - lambda x: nn.init.constant_(x, 0), - gain=gain) - - input_channel = obs_shape[0] - input_width = obs_shape[1] - input_height = obs_shape[2] - - self.cnn = nn.Sequential( - init_( - nn.Conv2d(in_channels=input_channel, - out_channels=hidden_size // 2, - kernel_size=kernel_size, - stride=stride)), active_func, Flatten(), - init_( - nn.Linear( - hidden_size // 2 * (input_width - kernel_size + stride) * - (input_height - kernel_size + stride), - hidden_size)), active_func, - init_(nn.Linear(hidden_size, hidden_size)), active_func) - - def forward(self, x): - # TODO(eugenevinitsky) hardcoding is bad - x = self.resize(x) / 255.0 - x = self.cnn(x) - return x - - -class CNNBase(nn.Module): - - def __init__(self, args, obs_shape): - super(CNNBase, self).__init__() - - self._use_orthogonal = args.use_orthogonal - self._use_ReLU = args.use_ReLU - self.hidden_size = args.hidden_size - - self.cnn = CNNLayer(obs_shape, self.hidden_size, self._use_orthogonal, - self._use_ReLU) - - def forward(self, x): - x = self.cnn(x) - return x diff --git a/algos/ppo/ppo_utils/distributions.py b/algos/ppo/ppo_utils/distributions.py deleted file mode 100644 index 9249d700..00000000 --- a/algos/ppo/ppo_utils/distributions.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -import torch.nn as nn -from .util import init -""" -Modify standard PyTorch distributions so they to make compatible with this codebase. -""" - -# -# Standardize distribution interfaces -# - - -# Categorical -class FixedCategorical(torch.distributions.Categorical): - - def sample(self): - return super().sample().unsqueeze(-1) - - def log_probs(self, actions): - return (super().log_prob(actions.squeeze(-1)).view( - actions.size(0), -1).sum(-1).unsqueeze(-1)) - - def mode(self): - return self.probs.argmax(dim=-1, keepdim=True) - - -# Normal -class FixedNormal(torch.distributions.Normal): - - def log_probs(self, actions): - return super().log_prob(actions).sum(-1, keepdim=True) - - def entrop(self): - return super.entropy().sum(-1) - - def mode(self): - return self.mean - - -# Bernoulli -class FixedBernoulli(torch.distributions.Bernoulli): - - def log_probs(self, actions): - return super.log_prob(actions).view(actions.size(0), - -1).sum(-1).unsqueeze(-1) - - def entropy(self): - return super().entropy().sum(-1) - - def mode(self): - return torch.gt(self.probs, 0.5).float() - - -class Categorical(nn.Module): - - def __init__(self, - num_inputs, - num_outputs, - use_orthogonal=True, - gain=0.01): - super(Categorical, self).__init__() - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][use_orthogonal] - - def init_(m): - return init(m, init_method, lambda x: nn.init.constant_(x, 0), - gain) - - self.linear = init_(nn.Linear(num_inputs, num_outputs)) - - def forward(self, x, available_actions=None): - x = self.linear(x) - if available_actions is not None: - x[available_actions == 0] = -1e10 - return FixedCategorical(logits=x) - - -class DiagGaussian(nn.Module): - - def __init__(self, - num_inputs, - num_outputs, - use_orthogonal=True, - gain=0.01, - device='cpu'): - super(DiagGaussian, self).__init__() - - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][use_orthogonal] - - def init_(m): - return init(m, init_method, lambda x: nn.init.constant_(x, 0), - gain) - - self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) - self.logstd = AddBias(torch.zeros(num_outputs)) - self.to(device) - self.device = device - - def forward(self, x): - action_mean = self.fc_mean(x) - - # An ugly hack for my KFAC implementation. - zeros = torch.zeros(action_mean.size()).to(self.device) - # if x.is_cuda: - # zeros = zeros.cuda() - - action_logstd = self.logstd(zeros) - return FixedNormal(action_mean, action_logstd.exp()) - - -class Bernoulli(nn.Module): - - def __init__(self, - num_inputs, - num_outputs, - use_orthogonal=True, - gain=0.01): - super(Bernoulli, self).__init__() - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][use_orthogonal] - - def init_(m): - return init(m, init_method, lambda x: nn.init.constant_(x, 0), - gain) - - self.linear = init_(nn.Linear(num_inputs, num_outputs)) - - def forward(self, x): - x = self.linear(x) - return FixedBernoulli(logits=x) - - -class AddBias(nn.Module): - - def __init__(self, bias): - super(AddBias, self).__init__() - self._bias = nn.Parameter(bias.unsqueeze(1)) - - def forward(self, x): - if x.dim() == 2: - bias = self._bias.t().view(1, -1) - else: - bias = self._bias.t().view(1, -1, 1, 1) - - return x + bias diff --git a/algos/ppo/ppo_utils/encoder.py b/algos/ppo/ppo_utils/encoder.py deleted file mode 100644 index e69de29b..00000000 diff --git a/algos/ppo/ppo_utils/mlp.py b/algos/ppo/ppo_utils/mlp.py deleted file mode 100644 index b066a3d2..00000000 --- a/algos/ppo/ppo_utils/mlp.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch.nn as nn -from .util import init, get_clones -"""MLP modules.""" - - -class MLPLayer(nn.Module): - - def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal, - use_ReLU): - super(MLPLayer, self).__init__() - self._layer_N = layer_N - - active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][use_orthogonal] - gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) - - def init_(m): - return init(m, - init_method, - lambda x: nn.init.constant_(x, 0), - gain=gain) - - self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)), - active_func, nn.LayerNorm(hidden_size)) - self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)), - active_func, nn.LayerNorm(hidden_size)) - self.fc2 = get_clones(self.fc_h, self._layer_N) - - def forward(self, x): - x = self.fc1(x) - for i in range(self._layer_N): - x = self.fc2[i](x) - return x - - -class MLPBase(nn.Module): - - def __init__(self, args, obs_shape, cat_self=True, attn_internal=False): - super(MLPBase, self).__init__() - - self._use_feature_normalization = args.use_feature_normalization - self._use_orthogonal = args.use_orthogonal - self._use_ReLU = args.use_ReLU - self._stacked_frames = args.stacked_frames - self._layer_N = args.layer_N - self.hidden_size = args.hidden_size - - obs_dim = obs_shape[0] - - if self._use_feature_normalization: - self.feature_norm = nn.LayerNorm(obs_dim) - - self.mlp = MLPLayer(obs_dim, self.hidden_size, self._layer_N, - self._use_orthogonal, self._use_ReLU) - - def forward(self, x): - if self._use_feature_normalization: - x = self.feature_norm(x) - - x = self.mlp(x) - - return x diff --git a/algos/ppo/ppo_utils/popart.py b/algos/ppo/ppo_utils/popart.py deleted file mode 100644 index 7dd4be1b..00000000 --- a/algos/ppo/ppo_utils/popart.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import math -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class PopArt(torch.nn.Module): - - def __init__(self, - input_shape, - output_shape, - norm_axes=1, - beta=0.99999, - epsilon=1e-5, - device=torch.device("cpu")): - - super(PopArt, self).__init__() - - self.beta = beta - self.epsilon = epsilon - self.norm_axes = norm_axes - self.tpdv = dict(dtype=torch.float32, device=device) - - self.input_shape = input_shape - self.output_shape = output_shape - - self.weight = nn.Parameter(torch.Tensor(output_shape, - input_shape)).to(**self.tpdv) - self.bias = nn.Parameter(torch.Tensor(output_shape)).to(**self.tpdv) - - self.stddev = nn.Parameter(torch.ones(output_shape), - requires_grad=False).to(**self.tpdv) - self.mean = nn.Parameter(torch.zeros(output_shape), - requires_grad=False).to(**self.tpdv) - self.mean_sq = nn.Parameter(torch.zeros(output_shape), - requires_grad=False).to(**self.tpdv) - self.debiasing_term = nn.Parameter(torch.tensor(0.0), - requires_grad=False).to(**self.tpdv) - - self.reset_parameters() - - def reset_parameters(self): - torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) - if self.bias is not None: - fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out( - self.weight) - bound = 1 / math.sqrt(fan_in) - torch.nn.init.uniform_(self.bias, -bound, bound) - self.mean.zero_() - self.mean_sq.zero_() - self.debiasing_term.zero_() - - def forward(self, input_vector): - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - return F.linear(input_vector, self.weight, self.bias) - - @torch.no_grad() - def update(self, input_vector): - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - old_mean, old_var = self.debiased_mean_var() - old_stddev = torch.sqrt(old_var) - - batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes))) - batch_sq_mean = (input_vector**2).mean( - dim=tuple(range(self.norm_axes))) - - self.mean.mul_(self.beta).add_(batch_mean * (1.0 - self.beta)) - self.mean_sq.mul_(self.beta).add_(batch_sq_mean * (1.0 - self.beta)) - self.debiasing_term.mul_(self.beta).add_(1.0 * (1.0 - self.beta)) - - self.stddev = (self.mean_sq - self.mean**2).sqrt().clamp(min=1e-4) - - new_mean, new_var = self.debiased_mean_var() - new_stddev = torch.sqrt(new_var) - - self.weight = self.weight * old_stddev / new_stddev - self.bias = (old_stddev * self.bias + old_mean - new_mean) / new_stddev - - def debiased_mean_var(self): - debiased_mean = self.mean / self.debiasing_term.clamp(min=self.epsilon) - debiased_mean_sq = self.mean_sq / self.debiasing_term.clamp( - min=self.epsilon) - debiased_var = (debiased_mean_sq - debiased_mean**2).clamp(min=1e-2) - return debiased_mean, debiased_var - - def normalize(self, input_vector): - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - mean, var = self.debiased_mean_var() - out = (input_vector - mean[(None, ) * self.norm_axes] - ) / torch.sqrt(var)[(None, ) * self.norm_axes] - - return out - - def denormalize(self, input_vector): - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - mean, var = self.debiased_mean_var() - out = input_vector * torch.sqrt(var)[(None, ) * self.norm_axes] + mean[ - (None, ) * self.norm_axes] - - out = out.cpu().numpy() - - return out diff --git a/algos/ppo/ppo_utils/rnn.py b/algos/ppo/ppo_utils/rnn.py deleted file mode 100644 index 2720be9c..00000000 --- a/algos/ppo/ppo_utils/rnn.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -import torch.nn as nn -"""RNN modules.""" - - -class RNNLayer(nn.Module): - - def __init__(self, inputs_dim, outputs_dim, recurrent_N, use_orthogonal, - device): - super(RNNLayer, self).__init__() - self._recurrent_N = recurrent_N - self._use_orthogonal = use_orthogonal - - self.rnn = nn.GRU(inputs_dim, - outputs_dim, - num_layers=self._recurrent_N) - for name, param in self.rnn.named_parameters(): - if 'bias' in name: - nn.init.constant_(param, 0) - elif 'weight' in name: - if self._use_orthogonal: - nn.init.orthogonal_(param) - else: - nn.init.xavier_uniform_(param) - self.norm = nn.LayerNorm(outputs_dim) - self.to(device) - - def forward(self, x, hxs, masks): - if x.size(0) == hxs.size(0): - x, hxs = self.rnn( - x.unsqueeze(0), - (hxs * - masks.repeat(1, self._recurrent_N).unsqueeze(-1)).transpose( - 0, 1).contiguous()) - x = x.squeeze(0) - hxs = hxs.transpose(0, 1) - else: - # x is a (T, N, -1) tensor that has been flatten to (T * N, -1) - N = hxs.size(0) - T = int(x.size(0) / N) - - # unflatten - x = x.view(T, N, x.size(1)) - - # Same deal with masks - masks = masks.view(T, N) - - # Let's figure out which steps in the sequence have a zero for any agent - # We will always assume t=0 has a zero in it as that makes the logic cleaner - has_zeros = ((masks[1:] == 0.0).any( - dim=-1).nonzero().squeeze().cpu()) - - # +1 to correct the masks[1:] - if has_zeros.dim() == 0: - # Deal with scalar - has_zeros = [has_zeros.item() + 1] - else: - has_zeros = (has_zeros + 1).numpy().tolist() - - # add t=0 and t=T to the list - has_zeros = [0] + has_zeros + [T] - - hxs = hxs.transpose(0, 1) - - outputs = [] - for i in range(len(has_zeros) - 1): - # We can now process steps that don't have any zeros in masks together! - # This is much faster - start_idx = has_zeros[i] - end_idx = has_zeros[i + 1] - temp = (hxs * masks[start_idx].view(1, -1, 1).repeat( - self._recurrent_N, 1, 1)).contiguous() - rnn_scores, hxs = self.rnn(x[start_idx:end_idx], temp) - outputs.append(rnn_scores) - - # assert len(outputs) == T - # x is a (T, N, -1) tensor - x = torch.cat(outputs, dim=0) - - # flatten - x = x.reshape(T * N, -1) - hxs = hxs.transpose(0, 1) - - x = self.norm(x) - return x, hxs diff --git a/algos/ppo/ppo_utils/util.py b/algos/ppo/ppo_utils/util.py deleted file mode 100644 index 6f2735cc..00000000 --- a/algos/ppo/ppo_utils/util.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import copy -import numpy as np - -import torch -import torch.nn as nn - - -def init(module, weight_init, bias_init, gain=1): - weight_init(module.weight.data, gain=gain) - bias_init(module.bias.data) - return module - - -def get_clones(module, N): - return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) - - -def check(input): - output = torch.from_numpy(input) if type(input) == np.ndarray else input - return output diff --git a/algos/ppo/r_mappo/__init__.py b/algos/ppo/r_mappo/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py b/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py deleted file mode 100644 index c211cdb6..00000000 --- a/algos/ppo/r_mappo/algorithm/rMAPPOPolicy.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -from algos.ppo.r_mappo.algorithm.r_actor_critic import R_Actor, R_Critic -from algos.ppo.utils.util import update_linear_schedule - - -class R_MAPPOPolicy: - """ - MAPPO Policy class. Wraps actor and critic networks to compute actions and value function predictions. - - :param args: (argparse.Namespace) arguments containing relevant model and policy information. - :param obs_space: (gym.Space) observation space. - :param cent_obs_space: (gym.Space) value function input space (centralized input for MAPPO, decentralized for IPPO). - :param action_space: (gym.Space) action space. - :param device: (torch.device) specifies the device to run on (cpu/gpu). - """ - - def __init__(self, - args, - obs_space, - cent_obs_space, - act_space, - device=torch.device("cpu")): - self.device = device - self.lr = args.lr - self.critic_lr = args.critic_lr - self.opti_eps = args.opti_eps - self.weight_decay = args.weight_decay - - self.obs_space = obs_space - self.share_obs_space = cent_obs_space - self.act_space = act_space - - self.actor = R_Actor(args, self.obs_space, self.act_space, self.device) - self.critic = R_Critic(args, self.share_obs_space, self.device) - - self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), - lr=self.lr, - eps=self.opti_eps, - weight_decay=self.weight_decay) - self.critic_optimizer = torch.optim.Adam( - self.critic.parameters(), - lr=self.critic_lr, - eps=self.opti_eps, - weight_decay=self.weight_decay) - - def lr_decay(self, episode, episodes): - """ - Decay the actor and critic learning rates. - :param episode: (int) current training episode. - :param episodes: (int) total number of training episodes. - """ - update_linear_schedule(self.actor_optimizer, episode, episodes, - self.lr) - update_linear_schedule(self.critic_optimizer, episode, episodes, - self.critic_lr) - - def get_actions(self, - cent_obs, - obs, - rnn_states_actor, - rnn_states_critic, - masks, - available_actions=None, - deterministic=False): - """ - Compute actions and value function predictions for the given inputs. - :param cent_obs (np.ndarray): centralized input to the critic. - :param obs (np.ndarray): local agent inputs to the actor. - :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor. - :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic. - :param masks: (np.ndarray) denotes points at which RNN states should be reset. - :param available_actions: (np.ndarray) denotes which actions are available to agent - (if None, all actions available) - :param deterministic: (bool) whether the action should be mode of distribution or should be sampled. - - :return values: (torch.Tensor) value function predictions. - :return actions: (torch.Tensor) actions to take. - :return action_log_probs: (torch.Tensor) log probabilities of chosen actions. - :return rnn_states_actor: (torch.Tensor) updated actor network RNN states. - :return rnn_states_critic: (torch.Tensor) updated critic network RNN states. - """ - actions, action_log_probs, rnn_states_actor = self.actor( - obs, rnn_states_actor, masks, available_actions, deterministic) - - values, rnn_states_critic = self.critic(cent_obs, rnn_states_critic, - masks) - return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic - - def get_values(self, cent_obs, rnn_states_critic, masks): - """ - Get value function predictions. - :param cent_obs (np.ndarray): centralized input to the critic. - :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic. - :param masks: (np.ndarray) denotes points at which RNN states should be reset. - - :return values: (torch.Tensor) value function predictions. - """ - values, _ = self.critic(cent_obs, rnn_states_critic, masks) - return values - - def evaluate_actions(self, - cent_obs, - obs, - rnn_states_actor, - rnn_states_critic, - action, - masks, - available_actions=None, - active_masks=None): - """ - Get action logprobs / entropy and value function predictions for actor update. - :param cent_obs (np.ndarray): centralized input to the critic. - :param obs (np.ndarray): local agent inputs to the actor. - :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor. - :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic. - :param action: (np.ndarray) actions whose log probabilites and entropy to compute. - :param masks: (np.ndarray) denotes points at which RNN states should be reset. - :param available_actions: (np.ndarray) denotes which actions are available to agent - (if None, all actions available) - :param active_masks: (torch.Tensor) denotes whether an agent is active or dead. - - :return values: (torch.Tensor) value function predictions. - :return action_log_probs: (torch.Tensor) log probabilities of the input actions. - :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs. - """ - action_log_probs, dist_entropy = self.actor.evaluate_actions( - obs, rnn_states_actor, action, masks, available_actions, - active_masks) - - values, _ = self.critic(cent_obs, rnn_states_critic, masks) - return values, action_log_probs, dist_entropy - - def act(self, - obs, - rnn_states_actor, - masks, - available_actions=None, - deterministic=False): - """ - Compute actions using the given inputs. - :param obs (np.ndarray): local agent inputs to the actor. - :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor. - :param masks: (np.ndarray) denotes points at which RNN states should be reset. - :param available_actions: (np.ndarray) denotes which actions are available to agent - (if None, all actions available) - :param deterministic: (bool) whether the action should be mode of distribution or should be sampled. - """ - actions, _, rnn_states_actor = self.actor(obs, rnn_states_actor, masks, - available_actions, - deterministic) - return actions, rnn_states_actor diff --git a/algos/ppo/r_mappo/algorithm/r_actor_critic.py b/algos/ppo/r_mappo/algorithm/r_actor_critic.py deleted file mode 100644 index ee9dfdf0..00000000 --- a/algos/ppo/r_mappo/algorithm/r_actor_critic.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -import torch.nn as nn -from algos.ppo.ppo_utils.util import init, check -from algos.ppo.ppo_utils.mlp import MLPBase -from algos.ppo.ppo_utils.rnn import RNNLayer -from algos.ppo.ppo_utils.act import ACTLayer -from algos.ppo.ppo_utils.popart import PopArt -from algos.ppo.utils.util import get_shape_from_obs_space - - -class R_Actor(nn.Module): - """ - Actor network class for MAPPO. Outputs actions given observations. - :param args: (argparse.Namespace) arguments containing relevant model information. - :param obs_space: (gym.Space) observation space. - :param action_space: (gym.Space) action space. - :param device: (torch.device) specifies the device to run on (cpu/gpu). - """ - - def __init__(self, - args, - obs_space, - action_space, - device=torch.device("cpu")): - super(R_Actor, self).__init__() - self.hidden_size = args.hidden_size - - self._gain = args.gain - self._use_orthogonal = args.use_orthogonal - self._use_policy_active_masks = args.use_policy_active_masks - self._use_naive_recurrent_policy = args.use_naive_recurrent_policy - self._use_recurrent_policy = args.use_recurrent_policy - self._recurrent_N = args.recurrent_N - self.tpdv = dict(dtype=torch.float32, device=device) - - obs_shape = get_shape_from_obs_space(obs_space) - base = MLPBase - self.base = base(args, obs_shape) - - if self._use_naive_recurrent_policy or self._use_recurrent_policy: - self.rnn = RNNLayer(self.hidden_size, self.hidden_size, - self._recurrent_N, self._use_orthogonal, - device) - - self.act = ACTLayer(action_space, self.hidden_size, - self._use_orthogonal, self._gain, device) - - self.to(device) - - def forward(self, - obs, - rnn_states, - masks, - available_actions=None, - deterministic=False): - """ - Compute actions from the given inputs. - :param obs: (np.ndarray / torch.Tensor) observation inputs into network. - :param rnn_states: (np.ndarray / torch.Tensor) if RNN network, hidden states for RNN. - :param masks: (np.ndarray / torch.Tensor) mask tensor denoting if hidden states should be reinitialized to zeros. - :param available_actions: (np.ndarray / torch.Tensor) denotes which actions are available to agent - (if None, all actions available) - :param deterministic: (bool) whether to sample from action distribution or return the mode. - - :return actions: (torch.Tensor) actions to take. - :return action_log_probs: (torch.Tensor) log probabilities of taken actions. - :return rnn_states: (torch.Tensor) updated RNN hidden states. - """ - obs = check(obs).to(**self.tpdv) - rnn_states = check(rnn_states).to(**self.tpdv) - masks = check(masks).to(**self.tpdv) - if available_actions is not None: - available_actions = check(available_actions).to(**self.tpdv) - - actor_features = self.base(obs) - - if self._use_naive_recurrent_policy or self._use_recurrent_policy: - actor_features, rnn_states = self.rnn(actor_features, rnn_states, - masks) - - actions, action_log_probs = self.act(actor_features, available_actions, - deterministic) - - return actions, action_log_probs, rnn_states - - def evaluate_actions(self, - obs, - rnn_states, - action, - masks, - available_actions=None, - active_masks=None): - """ - Compute log probability and entropy of given actions. - :param obs: (torch.Tensor) observation inputs into network. - :param action: (torch.Tensor) actions whose entropy and log probability to evaluate. - :param rnn_states: (torch.Tensor) if RNN network, hidden states for RNN. - :param masks: (torch.Tensor) mask tensor denoting if hidden states should be reinitialized to zeros. - :param available_actions: (torch.Tensor) denotes which actions are available to agent - (if None, all actions available) - :param active_masks: (torch.Tensor) denotes whether an agent is active or dead. - - :return action_log_probs: (torch.Tensor) log probabilities of the input actions. - :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs. - """ - obs = check(obs).to(**self.tpdv) - rnn_states = check(rnn_states).to(**self.tpdv) - action = check(action).to(**self.tpdv) - masks = check(masks).to(**self.tpdv) - if available_actions is not None: - available_actions = check(available_actions).to(**self.tpdv) - - if active_masks is not None: - active_masks = check(active_masks).to(**self.tpdv) - - actor_features = self.base(obs) - - if self._use_naive_recurrent_policy or self._use_recurrent_policy: - actor_features, rnn_states = self.rnn(actor_features, rnn_states, - masks) - - action_log_probs, dist_entropy = self.act.evaluate_actions( - actor_features, - action, - available_actions, - active_masks=active_masks - if self._use_policy_active_masks else None) - - return action_log_probs, dist_entropy - - -class R_Critic(nn.Module): - """ - Critic network class for MAPPO. Outputs value function predictions given centralized input (MAPPO) or - local observations (IPPO). - :param args: (argparse.Namespace) arguments containing relevant model information. - :param cent_obs_space: (gym.Space) (centralized) observation space. - :param device: (torch.device) specifies the device to run on (cpu/gpu). - """ - - def __init__(self, args, cent_obs_space, device=torch.device("cpu")): - super(R_Critic, self).__init__() - self.hidden_size = args.hidden_size - self._use_orthogonal = args.use_orthogonal - self._use_naive_recurrent_policy = args.use_naive_recurrent_policy - self._use_recurrent_policy = args.use_recurrent_policy - self._recurrent_N = args.recurrent_N - self._use_popart = args.use_popart - self.tpdv = dict(dtype=torch.float32, device=device) - init_method = [nn.init.xavier_uniform_, - nn.init.orthogonal_][self._use_orthogonal] - - cent_obs_shape = get_shape_from_obs_space(cent_obs_space) - base = MLPBase - self.base = base(args, cent_obs_shape) - - if self._use_naive_recurrent_policy or self._use_recurrent_policy: - self.rnn = RNNLayer(self.hidden_size, self.hidden_size, - self._recurrent_N, self._use_orthogonal, - device) - - def init_(m): - return init(m, init_method, lambda x: nn.init.constant_(x, 0)) - - if self._use_popart: - self.v_out = init_(PopArt(self.hidden_size, 1, device=device)) - else: - self.v_out = init_(nn.Linear(self.hidden_size, 1)) - - self.to(device) - - def forward(self, cent_obs, rnn_states, masks): - """ - Compute actions from the given inputs. - :param cent_obs: (np.ndarray / torch.Tensor) observation inputs into network. - :param rnn_states: (np.ndarray / torch.Tensor) if RNN network, hidden states for RNN. - :param masks: (np.ndarray / torch.Tensor) mask tensor denoting if RNN states should be reinitialized to zeros. - - :return values: (torch.Tensor) value function predictions. - :return rnn_states: (torch.Tensor) updated RNN hidden states. - """ - cent_obs = check(cent_obs).to(**self.tpdv) - rnn_states = check(rnn_states).to(**self.tpdv) - masks = check(masks).to(**self.tpdv) - - critic_features = self.base(cent_obs) - if self._use_naive_recurrent_policy or self._use_recurrent_policy: - critic_features, rnn_states = self.rnn(critic_features, rnn_states, - masks) - values = self.v_out(critic_features) - - return values, rnn_states diff --git a/algos/ppo/r_mappo/r_mappo.py b/algos/ppo/r_mappo/r_mappo.py deleted file mode 100644 index 0bae8b24..00000000 --- a/algos/ppo/r_mappo/r_mappo.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import numpy as np -import torch -import torch.nn as nn -from algos.ppo.utils.util import get_gard_norm, huber_loss, mse_loss -from algos.ppo.utils.valuenorm import ValueNorm -from algos.ppo.ppo_utils.util import check - - -class R_MAPPO(): - """ - Trainer class for MAPPO to update policies. - :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information. - :param policy: (R_MAPPO_Policy) policy to update. - :param device: (torch.device) specifies the device to run on (cpu/gpu). - """ - - def __init__(self, args, policy, device=torch.device("cpu")): - - self.device = device - self.tpdv = dict(dtype=torch.float32, device=device) - self.policy = policy - - self.clip_param = args.clip_param - self.ppo_epoch = args.ppo_epoch - self.num_mini_batch = args.num_mini_batch - self.data_chunk_length = args.data_chunk_length - self.value_loss_coef = args.value_loss_coef - self.entropy_coef = args.entropy_coef - self.max_grad_norm = args.max_grad_norm - self.huber_delta = args.huber_delta - - self._use_recurrent_policy = args.use_recurrent_policy - self._use_naive_recurrent = args.use_naive_recurrent_policy - self._use_max_grad_norm = args.use_max_grad_norm - self._use_clipped_value_loss = args.use_clipped_value_loss - self._use_huber_loss = args.use_huber_loss - self._use_popart = args.use_popart - self._use_valuenorm = args.use_valuenorm - self._use_value_active_masks = args.use_value_active_masks - self._use_policy_active_masks = args.use_policy_active_masks - - assert (self._use_popart and self._use_valuenorm) == False, ( - "self._use_popart and self._use_valuenorm can not be set True simultaneously" - ) - - if self._use_popart: - self.value_normalizer = self.policy.critic.v_out - elif self._use_valuenorm: - self.value_normalizer = ValueNorm(1, device=self.device) - else: - self.value_normalizer = None - - def cal_value_loss(self, values, value_preds_batch, return_batch, - active_masks_batch): - """ - Calculate value function loss. - :param values: (torch.Tensor) value function predictions. - :param value_preds_batch: (torch.Tensor) "old" value predictions from data batch (used for value clip loss) - :param return_batch: (torch.Tensor) reward to go returns. - :param active_masks_batch: (torch.Tensor) denotes if agent is active or dead at a given timesep. - - :return value_loss: (torch.Tensor) value function loss. - """ - value_pred_clipped = value_preds_batch + ( - values - value_preds_batch).clamp(-self.clip_param, - self.clip_param) - if self._use_popart or self._use_valuenorm: - self.value_normalizer.update(return_batch) - error_clipped = self.value_normalizer.normalize( - return_batch) - value_pred_clipped - error_original = self.value_normalizer.normalize( - return_batch) - values - else: - error_clipped = return_batch - value_pred_clipped - error_original = return_batch - values - - if self._use_huber_loss: - value_loss_clipped = huber_loss(error_clipped, self.huber_delta) - value_loss_original = huber_loss(error_original, self.huber_delta) - else: - value_loss_clipped = mse_loss(error_clipped) - value_loss_original = mse_loss(error_original) - - if self._use_clipped_value_loss: - value_loss = torch.max(value_loss_original, value_loss_clipped) - else: - value_loss = value_loss_original - - if self._use_value_active_masks: - value_loss = (value_loss * - active_masks_batch).sum() / active_masks_batch.sum() - else: - value_loss = value_loss.mean() - - return value_loss - - def ppo_update(self, sample, update_actor=True): - """ - Update actor and critic networks. - :param sample: (Tuple) contains data batch with which to update networks. - :update_actor: (bool) whether to update actor network. - - :return value_loss: (torch.Tensor) value function loss. - :return critic_grad_norm: (torch.Tensor) gradient norm from critic up9date. - ;return policy_loss: (torch.Tensor) actor(policy) loss value. - :return dist_entropy: (torch.Tensor) action entropies. - :return actor_grad_norm: (torch.Tensor) gradient norm from actor update. - :return imp_weights: (torch.Tensor) importance sampling weights. - """ - share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, \ - value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, \ - adv_targ, available_actions_batch = sample - - old_action_log_probs_batch = check(old_action_log_probs_batch).to( - **self.tpdv) - adv_targ = check(adv_targ).to(**self.tpdv) - value_preds_batch = check(value_preds_batch).to(**self.tpdv) - return_batch = check(return_batch).to(**self.tpdv) - active_masks_batch = check(active_masks_batch).to(**self.tpdv) - - # Reshape to do in a single forward pass for all steps - values, action_log_probs, dist_entropy = self.policy.evaluate_actions( - share_obs_batch, obs_batch, rnn_states_batch, - rnn_states_critic_batch, actions_batch, masks_batch, - available_actions_batch, active_masks_batch) - # actor update - imp_weights = torch.exp(action_log_probs - old_action_log_probs_batch) - - surr1 = imp_weights * adv_targ - surr2 = torch.clamp(imp_weights, 1.0 - self.clip_param, - 1.0 + self.clip_param) * adv_targ - - if self._use_policy_active_masks: - policy_action_loss = ( - -torch.sum(torch.min(surr1, surr2), dim=-1, keepdim=True) * - active_masks_batch).sum() / active_masks_batch.sum() - else: - policy_action_loss = -torch.sum( - torch.min(surr1, surr2), dim=-1, keepdim=True).mean() - - policy_loss = policy_action_loss - - self.policy.actor_optimizer.zero_grad() - - if update_actor: - (policy_loss - dist_entropy * self.entropy_coef).backward() - - if self._use_max_grad_norm: - actor_grad_norm = nn.utils.clip_grad_norm_( - self.policy.actor.parameters(), self.max_grad_norm) - else: - actor_grad_norm = get_gard_norm(self.policy.actor.parameters()) - - self.policy.actor_optimizer.step() - - # critic update - value_loss = self.cal_value_loss(values, value_preds_batch, - return_batch, active_masks_batch) - - self.policy.critic_optimizer.zero_grad() - - (value_loss * self.value_loss_coef).backward() - - if self._use_max_grad_norm: - critic_grad_norm = nn.utils.clip_grad_norm_( - self.policy.critic.parameters(), self.max_grad_norm) - else: - critic_grad_norm = get_gard_norm(self.policy.critic.parameters()) - - self.policy.critic_optimizer.step() - - return value_loss, critic_grad_norm, policy_loss, dist_entropy, actor_grad_norm, imp_weights - - def train(self, buffer, update_actor=True): - """ - Perform a training update using minibatch GD. - :param buffer: (SharedReplayBuffer) buffer containing training data. - :param update_actor: (bool) whether to update actor network. - - :return train_info: (dict) contains information regarding training update (e.g. loss, grad norms, etc). - """ - if self._use_popart or self._use_valuenorm: - advantages = buffer.returns[: - -1] - self.value_normalizer.denormalize( - buffer.value_preds[:-1]) - else: - advantages = buffer.returns[:-1] - buffer.value_preds[:-1] - advantages_copy = advantages.copy() - advantages_copy[buffer.active_masks[:-1] == 0.0] = np.nan - mean_advantages = np.nanmean(advantages_copy) - std_advantages = np.nanstd(advantages_copy) - advantages = (advantages - mean_advantages) / (std_advantages + 1e-5) - - train_info = {} - - train_info['value_loss'] = 0 - train_info['policy_loss'] = 0 - train_info['dist_entropy'] = 0 - train_info['actor_grad_norm'] = 0 - train_info['critic_grad_norm'] = 0 - train_info['ratio'] = 0 - - for _ in range(self.ppo_epoch): - if self._use_recurrent_policy: - data_generator = buffer.recurrent_generator( - advantages, self.num_mini_batch, self.data_chunk_length) - elif self._use_naive_recurrent: - data_generator = buffer.naive_recurrent_generator( - advantages, self.num_mini_batch) - else: - data_generator = buffer.feed_forward_generator( - advantages, self.num_mini_batch) - - for sample in data_generator: - - value_loss, critic_grad_norm, policy_loss, dist_entropy, actor_grad_norm, imp_weights \ - = self.ppo_update(sample, update_actor) - - train_info['value_loss'] += value_loss.item() - train_info['policy_loss'] += policy_loss.item() - train_info['dist_entropy'] += dist_entropy.item() - train_info['actor_grad_norm'] += actor_grad_norm - train_info['critic_grad_norm'] += critic_grad_norm - train_info['ratio'] += imp_weights.mean() - - num_updates = self.ppo_epoch * self.num_mini_batch - - for k in train_info.keys(): - train_info[k] /= num_updates - - return train_info - - def prep_training(self): - self.policy.actor.train() - self.policy.critic.train() - - def prep_rollout(self): - self.policy.actor.eval() - self.policy.critic.eval() diff --git a/algos/ppo/utils/__init__.py b/algos/ppo/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/algos/ppo/utils/multi_discrete.py b/algos/ppo/utils/multi_discrete.py deleted file mode 100644 index 64f106fa..00000000 --- a/algos/ppo/utils/multi_discrete.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import gym -import numpy as np - - -# An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) -# (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) -class MultiDiscrete(gym.Space): - """ - - The multi-discrete action space consists of a series of discrete action spaces with different parameters - - It can be adapted to both a Discrete action space or a continuous (Box) action space - - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space - - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space where the discrete action space can take any integers from `min` to `max` (both inclusive) - Note: A value of 0 always need to represent the NOOP action. - e.g. Nintendo Game Controller - - Can be conceptualized as 3 discrete action spaces: - 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 - 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - - Can be initialized as - MultiDiscrete([ [0,4], [0,1], [0,1] ]) - """ - - def __init__(self, array_of_param_array): - self.low = np.array([x[0] for x in array_of_param_array]) - self.high = np.array([x[1] for x in array_of_param_array]) - self.num_discrete_space = self.low.shape[0] - self.n = np.sum(self.high) + 2 - - def sample(self): - """ Returns a array with one sample from each discrete action space """ - # For each row: round(random .* (max - min) + min, 0) - random_array = np.random.rand(self.num_discrete_space) - return [ - int(x) for x in np.floor( - np.multiply((self.high - self.low + 1.), random_array) + - self.low) - ] - - def contains(self, x): - return len(x) == self.num_discrete_space and ( - np.array(x) >= self.low).all() and (np.array(x) <= - self.high).all() - - @property - def shape(self): - return self.num_discrete_space - - def __repr__(self): - return "MultiDiscrete" + str(self.num_discrete_space) - - def __eq__(self, other): - return np.array_equal(self.low, other.low) and np.array_equal( - self.high, other.high) diff --git a/algos/ppo/utils/separated_buffer.py b/algos/ppo/utils/separated_buffer.py deleted file mode 100644 index 342b51ff..00000000 --- a/algos/ppo/utils/separated_buffer.py +++ /dev/null @@ -1,505 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -import numpy as np -from collections import defaultdict - -from algos.ppo.utils.util import check, get_shape_from_obs_space, get_shape_from_act_space - - -def _flatten(T, N, x): - return x.reshape(T * N, *x.shape[2:]) - - -def _cast(x): - return x.transpose(1, 0, 2).reshape(-1, *x.shape[2:]) - - -class SeparatedReplayBuffer(object): - - def __init__(self, args, obs_space, share_obs_space, act_space): - self.episode_length = args.episode_length - self.n_rollout_threads = args.n_rollout_threads - self.rnn_hidden_size = args.hidden_size - self.recurrent_N = args.recurrent_N - self.gamma = args.gamma - self.gae_lambda = args.gae_lambda - self._use_gae = args.use_gae - self._use_popart = args.use_popart - self._use_valuenorm = args.use_valuenorm - self._use_proper_time_limits = args.use_proper_time_limits - - obs_shape = get_shape_from_obs_space(obs_space) - share_obs_shape = get_shape_from_obs_space(share_obs_space) - - if type(obs_shape[-1]) == list: - obs_shape = obs_shape[:1] - - if type(share_obs_shape[-1]) == list: - share_obs_shape = share_obs_shape[:1] - - self.share_obs = np.zeros((self.episode_length + 1, - self.n_rollout_threads, *share_obs_shape), - dtype=np.float32) - self.obs = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, *obs_shape), - dtype=np.float32) - - self.rnn_states = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, self.recurrent_N, - self.rnn_hidden_size), - dtype=np.float32) - self.rnn_states_critic = np.zeros_like(self.rnn_states) - - self.value_preds = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, 1), - dtype=np.float32) - self.returns = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, 1), - dtype=np.float32) - - if act_space.__class__.__name__ == 'Discrete': - self.available_actions = np.ones( - (self.episode_length + 1, self.n_rollout_threads, act_space.n), - dtype=np.float32) - else: - self.available_actions = None - - act_shape = get_shape_from_act_space(act_space) - - self.actions = np.zeros( - (self.episode_length, self.n_rollout_threads, act_shape), - dtype=np.float32) - self.action_log_probs = np.zeros( - (self.episode_length, self.n_rollout_threads, act_shape), - dtype=np.float32) - self.rewards = np.zeros( - (self.episode_length, self.n_rollout_threads, 1), dtype=np.float32) - - self.masks = np.ones( - (self.episode_length + 1, self.n_rollout_threads, 1), - dtype=np.float32) - self.bad_masks = np.ones_like(self.masks) - self.active_masks = np.ones_like(self.masks) - - self.step = 0 - - def insert(self, - share_obs, - obs, - rnn_states, - rnn_states_critic, - actions, - action_log_probs, - value_preds, - rewards, - masks, - bad_masks=None, - active_masks=None, - available_actions=None): - self.share_obs[self.step + 1] = share_obs.copy() - self.obs[self.step + 1] = obs.copy() - self.rnn_states[self.step + 1] = rnn_states.copy() - self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy() - self.actions[self.step] = actions.copy() - self.action_log_probs[self.step] = action_log_probs.copy() - self.value_preds[self.step] = value_preds.copy() - self.rewards[self.step] = rewards.copy() - self.masks[self.step + 1] = masks.copy() - if bad_masks is not None: - self.bad_masks[self.step + 1] = bad_masks.copy() - if active_masks is not None: - self.active_masks[self.step + 1] = active_masks.copy() - if available_actions is not None: - self.available_actions[self.step + 1] = available_actions.copy() - - self.step = (self.step + 1) % self.episode_length - - def chooseinsert(self, - share_obs, - obs, - rnn_states, - rnn_states_critic, - actions, - action_log_probs, - value_preds, - rewards, - masks, - bad_masks=None, - active_masks=None, - available_actions=None): - self.share_obs[self.step] = share_obs.copy() - self.obs[self.step] = obs.copy() - self.rnn_states[self.step + 1] = rnn_states.copy() - self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy() - self.actions[self.step] = actions.copy() - self.action_log_probs[self.step] = action_log_probs.copy() - self.value_preds[self.step] = value_preds.copy() - self.rewards[self.step] = rewards.copy() - self.masks[self.step + 1] = masks.copy() - if bad_masks is not None: - self.bad_masks[self.step + 1] = bad_masks.copy() - if active_masks is not None: - self.active_masks[self.step] = active_masks.copy() - if available_actions is not None: - self.available_actions[self.step] = available_actions.copy() - - self.step = (self.step + 1) % self.episode_length - - def after_update(self): - self.share_obs[0] = self.share_obs[-1].copy() - self.obs[0] = self.obs[-1].copy() - self.rnn_states[0] = self.rnn_states[-1].copy() - self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy() - self.masks[0] = self.masks[-1].copy() - self.bad_masks[0] = self.bad_masks[-1].copy() - self.active_masks[0] = self.active_masks[-1].copy() - if self.available_actions is not None: - self.available_actions[0] = self.available_actions[-1].copy() - - def chooseafter_update(self): - self.rnn_states[0] = self.rnn_states[-1].copy() - self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy() - self.masks[0] = self.masks[-1].copy() - self.bad_masks[0] = self.bad_masks[-1].copy() - - def compute_returns(self, next_value, value_normalizer=None): - if self._use_proper_time_limits: - if self._use_gae: - self.value_preds[-1] = next_value - gae = 0 - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart or self._use_valuenorm: - delta = self.rewards[ - step] + self.gamma * value_normalizer.denormalize( - self.value_preds[step + 1]) * self.masks[ - step + 1] - value_normalizer.denormalize( - self.value_preds[step]) - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - gae = gae * self.bad_masks[step + 1] - self.returns[ - step] = gae + value_normalizer.denormalize( - self.value_preds[step]) - else: - delta = self.rewards[ - step] + self.gamma * self.value_preds[ - step + 1] * self.masks[ - step + 1] - self.value_preds[step] - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - gae = gae * self.bad_masks[step + 1] - self.returns[step] = gae + self.value_preds[step] - else: - self.returns[-1] = next_value - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart: - self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[step]) * self.bad_masks[step + 1] \ - + (1 - self.bad_masks[step + 1]) * value_normalizer.denormalize(self.value_preds[step]) - else: - self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[step]) * self.bad_masks[step + 1] \ - + (1 - self.bad_masks[step + 1]) * self.value_preds[step] - else: - if self._use_gae: - self.value_preds[-1] = next_value - gae = 0 - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart or self._use_valuenorm: - delta = self.rewards[ - step] + self.gamma * value_normalizer.denormalize( - self.value_preds[step + 1]) * self.masks[ - step + 1] - value_normalizer.denormalize( - self.value_preds[step]) - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - self.returns[ - step] = gae + value_normalizer.denormalize( - self.value_preds[step]) - else: - delta = self.rewards[ - step] + self.gamma * self.value_preds[ - step + 1] * self.masks[ - step + 1] - self.value_preds[step] - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - self.returns[step] = gae + self.value_preds[step] - else: - self.returns[-1] = next_value - for step in reversed(range(self.rewards.shape[0])): - self.returns[step] = self.returns[ - step + 1] * self.gamma * self.masks[ - step + 1] + self.rewards[step] - - def feed_forward_generator(self, - advantages, - num_mini_batch=None, - mini_batch_size=None): - episode_length, n_rollout_threads = self.rewards.shape[0:2] - batch_size = n_rollout_threads * episode_length - - if mini_batch_size is None: - assert batch_size >= num_mini_batch, ( - "PPO requires the number of processes ({}) " - "* number of steps ({}) = {} " - "to be greater than or equal to the number of PPO mini batches ({})." - "".format(n_rollout_threads, episode_length, - n_rollout_threads * episode_length, num_mini_batch)) - mini_batch_size = batch_size // num_mini_batch - - rand = torch.randperm(batch_size).numpy() - sampler = [ - rand[i * mini_batch_size:(i + 1) * mini_batch_size] - for i in range(num_mini_batch) - ] - - share_obs = self.share_obs[:-1].reshape(-1, *self.share_obs.shape[2:]) - obs = self.obs[:-1].reshape(-1, *self.obs.shape[2:]) - rnn_states = self.rnn_states[:-1].reshape(-1, - *self.rnn_states.shape[2:]) - rnn_states_critic = self.rnn_states_critic[:-1].reshape( - -1, *self.rnn_states_critic.shape[2:]) - actions = self.actions.reshape(-1, self.actions.shape[-1]) - if self.available_actions is not None: - available_actions = self.available_actions[:-1].reshape( - -1, self.available_actions.shape[-1]) - value_preds = self.value_preds[:-1].reshape(-1, 1) - returns = self.returns[:-1].reshape(-1, 1) - masks = self.masks[:-1].reshape(-1, 1) - active_masks = self.active_masks[:-1].reshape(-1, 1) - action_log_probs = self.action_log_probs.reshape( - -1, self.action_log_probs.shape[-1]) - advantages = advantages.reshape(-1, 1) - - for indices in sampler: - # obs size [T+1 N Dim]-->[T N Dim]-->[T*N,Dim]-->[index,Dim] - share_obs_batch = share_obs[indices] - obs_batch = obs[indices] - rnn_states_batch = rnn_states[indices] - rnn_states_critic_batch = rnn_states_critic[indices] - actions_batch = actions[indices] - if self.available_actions is not None: - available_actions_batch = available_actions[indices] - else: - available_actions_batch = None - value_preds_batch = value_preds[indices] - return_batch = returns[indices] - masks_batch = masks[indices] - active_masks_batch = active_masks[indices] - old_action_log_probs_batch = action_log_probs[indices] - if advantages is None: - adv_targ = None - else: - adv_targ = advantages[indices] - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch - - def naive_recurrent_generator(self, advantages, num_mini_batch): - n_rollout_threads = self.rewards.shape[1] - assert n_rollout_threads >= num_mini_batch, ( - "PPO requires the number of processes ({}) " - "to be greater than or equal to the number of " - "PPO mini batches ({}).".format(n_rollout_threads, num_mini_batch)) - num_envs_per_batch = n_rollout_threads // num_mini_batch - perm = torch.randperm(n_rollout_threads).numpy() - for start_ind in range(0, n_rollout_threads, num_envs_per_batch): - share_obs_batch = [] - obs_batch = [] - rnn_states_batch = [] - rnn_states_critic_batch = [] - actions_batch = [] - available_actions_batch = [] - value_preds_batch = [] - return_batch = [] - masks_batch = [] - active_masks_batch = [] - old_action_log_probs_batch = [] - adv_targ = [] - - for offset in range(num_envs_per_batch): - ind = perm[start_ind + offset] - share_obs_batch.append(self.share_obs[:-1, ind]) - obs_batch.append(self.obs[:-1, ind]) - rnn_states_batch.append(self.rnn_states[0:1, ind]) - rnn_states_critic_batch.append(self.rnn_states_critic[0:1, - ind]) - actions_batch.append(self.actions[:, ind]) - if self.available_actions is not None: - available_actions_batch.append(self.available_actions[:-1, - ind]) - value_preds_batch.append(self.value_preds[:-1, ind]) - return_batch.append(self.returns[:-1, ind]) - masks_batch.append(self.masks[:-1, ind]) - active_masks_batch.append(self.active_masks[:-1, ind]) - old_action_log_probs_batch.append(self.action_log_probs[:, - ind]) - adv_targ.append(advantages[:, ind]) - - # [N[T, dim]] - T, N = self.episode_length, num_envs_per_batch - # These are all from_numpys of size (T, N, -1) - share_obs_batch = np.stack(share_obs_batch, 1) - obs_batch = np.stack(obs_batch, 1) - actions_batch = np.stack(actions_batch, 1) - if self.available_actions is not None: - available_actions_batch = np.stack(available_actions_batch, 1) - value_preds_batch = np.stack(value_preds_batch, 1) - return_batch = np.stack(return_batch, 1) - masks_batch = np.stack(masks_batch, 1) - active_masks_batch = np.stack(active_masks_batch, 1) - old_action_log_probs_batch = np.stack(old_action_log_probs_batch, - 1) - adv_targ = np.stack(adv_targ, 1) - - # States is just a (N, -1) from_numpy [N[1,dim]] - rnn_states_batch = np.stack(rnn_states_batch, - 1).reshape(N, - *self.rnn_states.shape[2:]) - rnn_states_critic_batch = np.stack( - rnn_states_critic_batch, - 1).reshape(N, *self.rnn_states_critic.shape[2:]) - - # Flatten the (T, N, ...) from_numpys to (T * N, ...) - share_obs_batch = _flatten(T, N, share_obs_batch) - obs_batch = _flatten(T, N, obs_batch) - actions_batch = _flatten(T, N, actions_batch) - if self.available_actions is not None: - available_actions_batch = _flatten(T, N, - available_actions_batch) - else: - available_actions_batch = None - value_preds_batch = _flatten(T, N, value_preds_batch) - return_batch = _flatten(T, N, return_batch) - masks_batch = _flatten(T, N, masks_batch) - active_masks_batch = _flatten(T, N, active_masks_batch) - old_action_log_probs_batch = _flatten(T, N, - old_action_log_probs_batch) - adv_targ = _flatten(T, N, adv_targ) - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch - - def recurrent_generator(self, advantages, num_mini_batch, - data_chunk_length): - episode_length, n_rollout_threads = self.rewards.shape[0:2] - batch_size = n_rollout_threads * episode_length - data_chunks = batch_size // data_chunk_length # [C=r*T/L] - mini_batch_size = data_chunks // num_mini_batch - - assert episode_length * n_rollout_threads >= data_chunk_length, ( - "PPO requires the number of processes ({}) * episode length ({}) " - "to be greater than or equal to the number of " - "data chunk length ({}).".format(n_rollout_threads, episode_length, - data_chunk_length)) - assert data_chunks >= 2, ("need larger batch size") - - rand = torch.randperm(data_chunks).numpy() - sampler = [ - rand[i * mini_batch_size:(i + 1) * mini_batch_size] - for i in range(num_mini_batch) - ] - - if len(self.share_obs.shape) > 3: - share_obs = self.share_obs[:-1].transpose(1, 0, 2, 3, 4).reshape( - -1, *self.share_obs.shape[2:]) - obs = self.obs[:-1].transpose(1, 0, 2, 3, - 4).reshape(-1, *self.obs.shape[2:]) - else: - share_obs = _cast(self.share_obs[:-1]) - obs = _cast(self.obs[:-1]) - - actions = _cast(self.actions) - action_log_probs = _cast(self.action_log_probs) - advantages = _cast(advantages) - value_preds = _cast(self.value_preds[:-1]) - returns = _cast(self.returns[:-1]) - masks = _cast(self.masks[:-1]) - active_masks = _cast(self.active_masks[:-1]) - # rnn_states = _cast(self.rnn_states[:-1]) - # rnn_states_critic = _cast(self.rnn_states_critic[:-1]) - rnn_states = self.rnn_states[:-1].transpose(1, 0, 2, 3).reshape( - -1, *self.rnn_states.shape[2:]) - rnn_states_critic = self.rnn_states_critic[:-1].transpose( - 1, 0, 2, 3).reshape(-1, *self.rnn_states_critic.shape[2:]) - - if self.available_actions is not None: - available_actions = _cast(self.available_actions[:-1]) - - for indices in sampler: - share_obs_batch = [] - obs_batch = [] - rnn_states_batch = [] - rnn_states_critic_batch = [] - actions_batch = [] - available_actions_batch = [] - value_preds_batch = [] - return_batch = [] - masks_batch = [] - active_masks_batch = [] - old_action_log_probs_batch = [] - adv_targ = [] - - for index in indices: - ind = index * data_chunk_length - # size [T+1 N M Dim]-->[T N Dim]-->[N T Dim]-->[T*N,Dim]-->[L,Dim] - share_obs_batch.append(share_obs[ind:ind + data_chunk_length]) - obs_batch.append(obs[ind:ind + data_chunk_length]) - actions_batch.append(actions[ind:ind + data_chunk_length]) - if self.available_actions is not None: - available_actions_batch.append( - available_actions[ind:ind + data_chunk_length]) - value_preds_batch.append(value_preds[ind:ind + - data_chunk_length]) - return_batch.append(returns[ind:ind + data_chunk_length]) - masks_batch.append(masks[ind:ind + data_chunk_length]) - active_masks_batch.append(active_masks[ind:ind + - data_chunk_length]) - old_action_log_probs_batch.append( - action_log_probs[ind:ind + data_chunk_length]) - adv_targ.append(advantages[ind:ind + data_chunk_length]) - # size [T+1 N Dim]-->[T N Dim]-->[T*N,Dim]-->[1,Dim] - rnn_states_batch.append(rnn_states[ind]) - rnn_states_critic_batch.append(rnn_states_critic[ind]) - - L, N = data_chunk_length, mini_batch_size - - # These are all from_numpys of size (N, L, Dim) - share_obs_batch = np.stack(share_obs_batch) - obs_batch = np.stack(obs_batch) - - actions_batch = np.stack(actions_batch) - if self.available_actions is not None: - available_actions_batch = np.stack(available_actions_batch) - value_preds_batch = np.stack(value_preds_batch) - return_batch = np.stack(return_batch) - masks_batch = np.stack(masks_batch) - active_masks_batch = np.stack(active_masks_batch) - old_action_log_probs_batch = np.stack(old_action_log_probs_batch) - adv_targ = np.stack(adv_targ) - - # States is just a (N, -1) from_numpy - rnn_states_batch = np.stack(rnn_states_batch).reshape( - N, *self.rnn_states.shape[2:]) - rnn_states_critic_batch = np.stack( - rnn_states_critic_batch).reshape( - N, *self.rnn_states_critic.shape[2:]) - - # Flatten the (L, N, ...) from_numpys to (L * N, ...) - share_obs_batch = _flatten(L, N, share_obs_batch) - obs_batch = _flatten(L, N, obs_batch) - actions_batch = _flatten(L, N, actions_batch) - if self.available_actions is not None: - available_actions_batch = _flatten(L, N, - available_actions_batch) - else: - available_actions_batch = None - value_preds_batch = _flatten(L, N, value_preds_batch) - return_batch = _flatten(L, N, return_batch) - masks_batch = _flatten(L, N, masks_batch) - active_masks_batch = _flatten(L, N, active_masks_batch) - old_action_log_probs_batch = _flatten(L, N, - old_action_log_probs_batch) - adv_targ = _flatten(L, N, adv_targ) - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, adv_targ, available_actions_batch diff --git a/algos/ppo/utils/shared_buffer.py b/algos/ppo/utils/shared_buffer.py deleted file mode 100644 index 5bd6c20a..00000000 --- a/algos/ppo/utils/shared_buffer.py +++ /dev/null @@ -1,584 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import torch -import numpy as np -from algos.ppo.utils.util import get_shape_from_obs_space, get_shape_from_act_space - - -def _flatten(T, N, x): - return x.reshape(T * N, *x.shape[2:]) - - -def _cast(x): - return x.transpose(1, 2, 0, 3).reshape(-1, *x.shape[3:]) - - -class SharedReplayBuffer(object): - """ - Buffer to store training data. - :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information. - :param num_agents: (int) number of agents in the env. - :param obs_space: (gym.Space) observation space of agents. - :param cent_obs_space: (gym.Space) centralized observation space of agents. - :param act_space: (gym.Space) action space for agents. - """ - - def __init__(self, args, num_agents, obs_space, cent_obs_space, act_space): - self.episode_length = args.episode_length - self.n_rollout_threads = args.n_rollout_threads - self.hidden_size = args.hidden_size - self.recurrent_N = args.recurrent_N - self.gamma = args.gamma - self.gae_lambda = args.gae_lambda - self._use_gae = args.use_gae - self._use_popart = args.use_popart - self._use_valuenorm = args.use_valuenorm - self._use_proper_time_limits = args.use_proper_time_limits - - obs_shape = get_shape_from_obs_space(obs_space) - share_obs_shape = get_shape_from_obs_space(cent_obs_space) - - if type(obs_shape[-1]) == list: - obs_shape = obs_shape[:1] - - if type(share_obs_shape[-1]) == list: - share_obs_shape = share_obs_shape[:1] - - self.share_obs = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, num_agents, - *share_obs_shape), - dtype=np.float32) - self.obs = np.zeros((self.episode_length + 1, self.n_rollout_threads, - num_agents, *obs_shape), - dtype=np.float32) - - self.rnn_states = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, num_agents, - self.recurrent_N, self.hidden_size), - dtype=np.float32) - self.rnn_states_critic = np.zeros_like(self.rnn_states) - - self.value_preds = np.zeros( - (self.episode_length + 1, self.n_rollout_threads, num_agents, 1), - dtype=np.float32) - self.returns = np.zeros_like(self.value_preds) - - if act_space.__class__.__name__ == 'Discrete': - self.available_actions = np.ones( - (self.episode_length + 1, self.n_rollout_threads, num_agents, - act_space.n), - dtype=np.float32) - else: - self.available_actions = None - - act_shape = get_shape_from_act_space(act_space) - - self.actions = np.zeros((self.episode_length, self.n_rollout_threads, - num_agents, act_shape), - dtype=np.float32) - self.action_log_probs = np.zeros( - (self.episode_length, self.n_rollout_threads, num_agents, - act_shape), - dtype=np.float32) - self.rewards = np.zeros( - (self.episode_length, self.n_rollout_threads, num_agents, 1), - dtype=np.float32) - - self.masks = np.ones( - (self.episode_length + 1, self.n_rollout_threads, num_agents, 1), - dtype=np.float32) - self.bad_masks = np.ones_like(self.masks) - self.active_masks = np.ones_like(self.masks) - - self.step = 0 - - def insert(self, - share_obs, - obs, - rnn_states_actor, - rnn_states_critic, - actions, - action_log_probs, - value_preds, - rewards, - masks, - bad_masks=None, - active_masks=None, - available_actions=None): - """ - Insert data into the buffer. - :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information. - :param obs: (np.ndarray) local agent observations. - :param rnn_states_actor: (np.ndarray) RNN states for actor network. - :param rnn_states_critic: (np.ndarray) RNN states for critic network. - :param actions:(np.ndarray) actions taken by agents. - :param action_log_probs:(np.ndarray) log probs of actions taken by agents - :param value_preds: (np.ndarray) value function prediction at each step. - :param rewards: (np.ndarray) reward collected at each step. - :param masks: (np.ndarray) denotes whether the environment has terminated or not. - :param bad_masks: (np.ndarray) action space for agents. - :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env. - :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available. - """ - self.share_obs[self.step + 1] = share_obs.copy() - self.obs[self.step + 1] = obs.copy() - self.rnn_states[self.step + 1] = rnn_states_actor.copy() - self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy() - self.actions[self.step] = actions.copy() - self.action_log_probs[self.step] = action_log_probs.copy() - self.value_preds[self.step] = value_preds.copy() - self.rewards[self.step] = rewards.copy() - self.masks[self.step + 1] = masks.copy() - if bad_masks is not None: - self.bad_masks[self.step + 1] = bad_masks.copy() - if active_masks is not None: - self.active_masks[self.step + 1] = active_masks.copy() - if available_actions is not None: - self.available_actions[self.step + 1] = available_actions.copy() - - self.step = (self.step + 1) % self.episode_length - - def chooseinsert(self, - share_obs, - obs, - rnn_states, - rnn_states_critic, - actions, - action_log_probs, - value_preds, - rewards, - masks, - bad_masks=None, - active_masks=None, - available_actions=None): - """ - Insert data into the buffer. This insert function is used specifically for Hanabi, which is turn based. - :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information. - :param obs: (np.ndarray) local agent observations. - :param rnn_states_actor: (np.ndarray) RNN states for actor network. - :param rnn_states_critic: (np.ndarray) RNN states for critic network. - :param actions:(np.ndarray) actions taken by agents. - :param action_log_probs:(np.ndarray) log probs of actions taken by agents - :param value_preds: (np.ndarray) value function prediction at each step. - :param rewards: (np.ndarray) reward collected at each step. - :param masks: (np.ndarray) denotes whether the environment has terminated or not. - :param bad_masks: (np.ndarray) denotes indicate whether whether true terminal state or due to episode limit - :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env. - :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available. - """ - self.share_obs[self.step] = share_obs.copy() - self.obs[self.step] = obs.copy() - self.rnn_states[self.step + 1] = rnn_states.copy() - self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy() - self.actions[self.step] = actions.copy() - self.action_log_probs[self.step] = action_log_probs.copy() - self.value_preds[self.step] = value_preds.copy() - self.rewards[self.step] = rewards.copy() - self.masks[self.step + 1] = masks.copy() - if bad_masks is not None: - self.bad_masks[self.step + 1] = bad_masks.copy() - if active_masks is not None: - self.active_masks[self.step] = active_masks.copy() - if available_actions is not None: - self.available_actions[self.step] = available_actions.copy() - - self.step = (self.step + 1) % self.episode_length - - def after_update(self): - """Copy last timestep data to first index. Called after update to model.""" - self.share_obs[0] = self.share_obs[-1].copy() - self.obs[0] = self.obs[-1].copy() - self.rnn_states[0] = self.rnn_states[-1].copy() - self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy() - self.masks[0] = self.masks[-1].copy() - self.bad_masks[0] = self.bad_masks[-1].copy() - self.active_masks[0] = self.active_masks[-1].copy() - if self.available_actions is not None: - self.available_actions[0] = self.available_actions[-1].copy() - - def chooseafter_update(self): - """Copy last timestep data to first index. This method is used for Hanabi.""" - self.rnn_states[0] = self.rnn_states[-1].copy() - self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy() - self.masks[0] = self.masks[-1].copy() - self.bad_masks[0] = self.bad_masks[-1].copy() - - def compute_returns(self, next_value, value_normalizer=None): - """ - Compute returns either as discounted sum of rewards, or using GAE. - :param next_value: (np.ndarray) value predictions for the step after the last episode step. - :param value_normalizer: (PopArt) If not None, PopArt value normalizer instance. - """ - if self._use_proper_time_limits: - if self._use_gae: - self.value_preds[-1] = next_value - gae = 0 - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart or self._use_valuenorm: - # step + 1 - delta = self.rewards[step] + self.gamma * value_normalizer.denormalize( - self.value_preds[step + 1]) * self.masks[step + 1] \ - - value_normalizer.denormalize(self.value_preds[step]) - gae = delta + self.gamma * self.gae_lambda * gae * self.masks[ - step + 1] - gae = gae * self.bad_masks[step + 1] - self.returns[ - step] = gae + value_normalizer.denormalize( - self.value_preds[step]) - else: - delta = self.rewards[step] + self.gamma * self.value_preds[step + 1] * self.masks[step + 1] - \ - self.value_preds[step] - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - gae = gae * self.bad_masks[step + 1] - self.returns[step] = gae + self.value_preds[step] - else: - self.returns[-1] = next_value - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart or self._use_valuenorm: - self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[ - step]) * self.bad_masks[step + 1] \ - + (1 - self.bad_masks[step + 1]) * value_normalizer.denormalize( - self.value_preds[step]) - else: - self.returns[step] = (self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[ - step]) * self.bad_masks[step + 1] \ - + (1 - self.bad_masks[step + 1]) * self.value_preds[step] - else: - if self._use_gae: - self.value_preds[-1] = next_value - gae = 0 - for step in reversed(range(self.rewards.shape[0])): - if self._use_popart or self._use_valuenorm: - delta = self.rewards[step] + self.gamma * value_normalizer.denormalize( - self.value_preds[step + 1]) * self.masks[step + 1] \ - - value_normalizer.denormalize(self.value_preds[step]) - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - self.returns[ - step] = gae + value_normalizer.denormalize( - self.value_preds[step]) - else: - delta = self.rewards[step] + self.gamma * self.value_preds[step + 1] * self.masks[step + 1] - \ - self.value_preds[step] - gae = delta + self.gamma * self.gae_lambda * self.masks[ - step + 1] * gae - self.returns[step] = gae + self.value_preds[step] - else: - self.returns[-1] = next_value - for step in reversed(range(self.rewards.shape[0])): - self.returns[step] = self.returns[ - step + 1] * self.gamma * self.masks[ - step + 1] + self.rewards[step] - - def feed_forward_generator(self, - advantages, - num_mini_batch=None, - mini_batch_size=None): - """ - Yield training data for MLP policies. - :param advantages: (np.ndarray) advantage estimates. - :param num_mini_batch: (int) number of minibatches to split the batch into. - :param mini_batch_size: (int) number of samples in each minibatch. - """ - episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3] - batch_size = n_rollout_threads * episode_length * num_agents - - if mini_batch_size is None: - assert batch_size >= num_mini_batch, ( - "PPO requires the number of processes ({}) " - "* number of steps ({}) * number of agents ({}) = {} " - "to be greater than or equal to the number of PPO mini batches ({})." - "".format(n_rollout_threads, episode_length, num_agents, - n_rollout_threads * episode_length * num_agents, - num_mini_batch)) - mini_batch_size = batch_size // num_mini_batch - - rand = torch.randperm(batch_size).numpy() - sampler = [ - rand[i * mini_batch_size:(i + 1) * mini_batch_size] - for i in range(num_mini_batch) - ] - - share_obs = self.share_obs[:-1].reshape(-1, *self.share_obs.shape[3:]) - obs = self.obs[:-1].reshape(-1, *self.obs.shape[3:]) - rnn_states = self.rnn_states[:-1].reshape(-1, - *self.rnn_states.shape[3:]) - rnn_states_critic = self.rnn_states_critic[:-1].reshape( - -1, *self.rnn_states_critic.shape[3:]) - actions = self.actions.reshape(-1, self.actions.shape[-1]) - if self.available_actions is not None: - available_actions = self.available_actions[:-1].reshape( - -1, self.available_actions.shape[-1]) - value_preds = self.value_preds[:-1].reshape(-1, 1) - returns = self.returns[:-1].reshape(-1, 1) - masks = self.masks[:-1].reshape(-1, 1) - active_masks = self.active_masks[:-1].reshape(-1, 1) - action_log_probs = self.action_log_probs.reshape( - -1, self.action_log_probs.shape[-1]) - advantages = advantages.reshape(-1, 1) - - for indices in sampler: - # obs size [T+1 N M Dim]-->[T N M Dim]-->[T*N*M,Dim]-->[index,Dim] - share_obs_batch = share_obs[indices] - obs_batch = obs[indices] - rnn_states_batch = rnn_states[indices] - rnn_states_critic_batch = rnn_states_critic[indices] - actions_batch = actions[indices] - if self.available_actions is not None: - available_actions_batch = available_actions[indices] - else: - available_actions_batch = None - value_preds_batch = value_preds[indices] - return_batch = returns[indices] - masks_batch = masks[indices] - active_masks_batch = active_masks[indices] - old_action_log_probs_batch = action_log_probs[indices] - if advantages is None: - adv_targ = None - else: - adv_targ = advantages[indices] - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\ - value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\ - adv_targ, available_actions_batch - - def naive_recurrent_generator(self, advantages, num_mini_batch): - """ - Yield training data for non-chunked RNN training. - :param advantages: (np.ndarray) advantage estimates. - :param num_mini_batch: (int) number of minibatches to split the batch into. - """ - episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3] - batch_size = n_rollout_threads * num_agents - assert n_rollout_threads * num_agents >= num_mini_batch, ( - "PPO requires the number of processes ({})* number of agents ({}) " - "to be greater than or equal to the number of " - "PPO mini batches ({}).".format(n_rollout_threads, num_agents, - num_mini_batch)) - num_envs_per_batch = batch_size // num_mini_batch - perm = torch.randperm(batch_size).numpy() - - share_obs = self.share_obs.reshape(-1, batch_size, - *self.share_obs.shape[3:]) - obs = self.obs.reshape(-1, batch_size, *self.obs.shape[3:]) - rnn_states = self.rnn_states.reshape(-1, batch_size, - *self.rnn_states.shape[3:]) - rnn_states_critic = self.rnn_states_critic.reshape( - -1, batch_size, *self.rnn_states_critic.shape[3:]) - actions = self.actions.reshape(-1, batch_size, self.actions.shape[-1]) - if self.available_actions is not None: - available_actions = self.available_actions.reshape( - -1, batch_size, self.available_actions.shape[-1]) - value_preds = self.value_preds.reshape(-1, batch_size, 1) - returns = self.returns.reshape(-1, batch_size, 1) - masks = self.masks.reshape(-1, batch_size, 1) - active_masks = self.active_masks.reshape(-1, batch_size, 1) - action_log_probs = self.action_log_probs.reshape( - -1, batch_size, self.action_log_probs.shape[-1]) - advantages = advantages.reshape(-1, batch_size, 1) - - for start_ind in range(0, batch_size, num_envs_per_batch): - share_obs_batch = [] - obs_batch = [] - rnn_states_batch = [] - rnn_states_critic_batch = [] - actions_batch = [] - available_actions_batch = [] - value_preds_batch = [] - return_batch = [] - masks_batch = [] - active_masks_batch = [] - old_action_log_probs_batch = [] - adv_targ = [] - - for offset in range(num_envs_per_batch): - ind = perm[start_ind + offset] - share_obs_batch.append(share_obs[:-1, ind]) - obs_batch.append(obs[:-1, ind]) - rnn_states_batch.append(rnn_states[0:1, ind]) - rnn_states_critic_batch.append(rnn_states_critic[0:1, ind]) - actions_batch.append(actions[:, ind]) - if self.available_actions is not None: - available_actions_batch.append(available_actions[:-1, ind]) - value_preds_batch.append(value_preds[:-1, ind]) - return_batch.append(returns[:-1, ind]) - masks_batch.append(masks[:-1, ind]) - active_masks_batch.append(active_masks[:-1, ind]) - old_action_log_probs_batch.append(action_log_probs[:, ind]) - adv_targ.append(advantages[:, ind]) - - # [N[T, dim]] - T, N = self.episode_length, num_envs_per_batch - # These are all from_numpys of size (T, N, -1) - share_obs_batch = np.stack(share_obs_batch, 1) - obs_batch = np.stack(obs_batch, 1) - actions_batch = np.stack(actions_batch, 1) - if self.available_actions is not None: - available_actions_batch = np.stack(available_actions_batch, 1) - value_preds_batch = np.stack(value_preds_batch, 1) - return_batch = np.stack(return_batch, 1) - masks_batch = np.stack(masks_batch, 1) - active_masks_batch = np.stack(active_masks_batch, 1) - old_action_log_probs_batch = np.stack(old_action_log_probs_batch, - 1) - adv_targ = np.stack(adv_targ, 1) - - # States is just a (N, dim) from_numpy [N[1,dim]] - rnn_states_batch = np.stack(rnn_states_batch).reshape( - N, *self.rnn_states.shape[3:]) - rnn_states_critic_batch = np.stack( - rnn_states_critic_batch).reshape( - N, *self.rnn_states_critic.shape[3:]) - - # Flatten the (T, N, ...) from_numpys to (T * N, ...) - share_obs_batch = _flatten(T, N, share_obs_batch) - obs_batch = _flatten(T, N, obs_batch) - actions_batch = _flatten(T, N, actions_batch) - if self.available_actions is not None: - available_actions_batch = _flatten(T, N, - available_actions_batch) - else: - available_actions_batch = None - value_preds_batch = _flatten(T, N, value_preds_batch) - return_batch = _flatten(T, N, return_batch) - masks_batch = _flatten(T, N, masks_batch) - active_masks_batch = _flatten(T, N, active_masks_batch) - old_action_log_probs_batch = _flatten(T, N, - old_action_log_probs_batch) - adv_targ = _flatten(T, N, adv_targ) - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\ - value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\ - adv_targ, available_actions_batch - - def recurrent_generator(self, advantages, num_mini_batch, - data_chunk_length): - """ - Yield training data for chunked RNN training. - :param advantages: (np.ndarray) advantage estimates. - :param num_mini_batch: (int) number of minibatches to split the batch into. - :param data_chunk_length: (int) length of sequence chunks with which to train RNN. - """ - episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3] - batch_size = n_rollout_threads * episode_length * num_agents - data_chunks = batch_size // data_chunk_length # [C=r*T*M/L] - mini_batch_size = data_chunks // num_mini_batch - - rand = torch.randperm(data_chunks).numpy() - sampler = [ - rand[i * mini_batch_size:(i + 1) * mini_batch_size] - for i in range(num_mini_batch) - ] - - if len(self.share_obs.shape) > 4: - share_obs = self.share_obs[:-1].transpose( - 1, 2, 0, 3, 4, 5).reshape(-1, *self.share_obs.shape[3:]) - obs = self.obs[:-1].transpose(1, 2, 0, 3, 4, - 5).reshape(-1, *self.obs.shape[3:]) - else: - share_obs = _cast(self.share_obs[:-1]) - obs = _cast(self.obs[:-1]) - - actions = _cast(self.actions) - action_log_probs = _cast(self.action_log_probs) - advantages = _cast(advantages) - value_preds = _cast(self.value_preds[:-1]) - returns = _cast(self.returns[:-1]) - masks = _cast(self.masks[:-1]) - active_masks = _cast(self.active_masks[:-1]) - # rnn_states = _cast(self.rnn_states[:-1]) - # rnn_states_critic = _cast(self.rnn_states_critic[:-1]) - rnn_states = self.rnn_states[:-1].transpose(1, 2, 0, 3, 4).reshape( - -1, *self.rnn_states.shape[3:]) - rnn_states_critic = self.rnn_states_critic[:-1].transpose( - 1, 2, 0, 3, 4).reshape(-1, *self.rnn_states_critic.shape[3:]) - - if self.available_actions is not None: - available_actions = _cast(self.available_actions[:-1]) - - for indices in sampler: - share_obs_batch = [] - obs_batch = [] - rnn_states_batch = [] - rnn_states_critic_batch = [] - actions_batch = [] - available_actions_batch = [] - value_preds_batch = [] - return_batch = [] - masks_batch = [] - active_masks_batch = [] - old_action_log_probs_batch = [] - adv_targ = [] - - for index in indices: - - ind = index * data_chunk_length - # size [T+1 N M Dim]-->[T N M Dim]-->[N,M,T,Dim]-->[N*M*T,Dim]-->[L,Dim] - share_obs_batch.append(share_obs[ind:ind + data_chunk_length]) - obs_batch.append(obs[ind:ind + data_chunk_length]) - actions_batch.append(actions[ind:ind + data_chunk_length]) - if self.available_actions is not None: - available_actions_batch.append( - available_actions[ind:ind + data_chunk_length]) - value_preds_batch.append(value_preds[ind:ind + - data_chunk_length]) - return_batch.append(returns[ind:ind + data_chunk_length]) - masks_batch.append(masks[ind:ind + data_chunk_length]) - active_masks_batch.append(active_masks[ind:ind + - data_chunk_length]) - old_action_log_probs_batch.append( - action_log_probs[ind:ind + data_chunk_length]) - adv_targ.append(advantages[ind:ind + data_chunk_length]) - # size [T+1 N M Dim]-->[T N M Dim]-->[N M T Dim]-->[N*M*T,Dim]-->[1,Dim] - rnn_states_batch.append(rnn_states[ind]) - rnn_states_critic_batch.append(rnn_states_critic[ind]) - - L, N = data_chunk_length, mini_batch_size - - # These are all from_numpys of size (L, N, Dim) - share_obs_batch = np.stack(share_obs_batch, axis=1) - obs_batch = np.stack(obs_batch, axis=1) - - actions_batch = np.stack(actions_batch, axis=1) - if self.available_actions is not None: - available_actions_batch = np.stack(available_actions_batch, - axis=1) - value_preds_batch = np.stack(value_preds_batch, axis=1) - return_batch = np.stack(return_batch, axis=1) - masks_batch = np.stack(masks_batch, axis=1) - active_masks_batch = np.stack(active_masks_batch, axis=1) - old_action_log_probs_batch = np.stack(old_action_log_probs_batch, - axis=1) - adv_targ = np.stack(adv_targ, axis=1) - - # States is just a (N, -1) from_numpy - rnn_states_batch = np.stack(rnn_states_batch).reshape( - N, *self.rnn_states.shape[3:]) - rnn_states_critic_batch = np.stack( - rnn_states_critic_batch).reshape( - N, *self.rnn_states_critic.shape[3:]) - - # Flatten the (L, N, ...) from_numpys to (L * N, ...) - share_obs_batch = _flatten(L, N, share_obs_batch) - obs_batch = _flatten(L, N, obs_batch) - actions_batch = _flatten(L, N, actions_batch) - if self.available_actions is not None: - available_actions_batch = _flatten(L, N, - available_actions_batch) - else: - available_actions_batch = None - value_preds_batch = _flatten(L, N, value_preds_batch) - return_batch = _flatten(L, N, return_batch) - masks_batch = _flatten(L, N, masks_batch) - active_masks_batch = _flatten(L, N, active_masks_batch) - old_action_log_probs_batch = _flatten(L, N, - old_action_log_probs_batch) - adv_targ = _flatten(L, N, adv_targ) - - yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch,\ - value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch,\ - adv_targ, available_actions_batch diff --git a/algos/ppo/utils/util.py b/algos/ppo/utils/util.py deleted file mode 100644 index 7e23b9ea..00000000 --- a/algos/ppo/utils/util.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import numpy as np -import math -import torch - - -def check(input): - if type(input) == np.ndarray: - return torch.from_numpy(input) - - -def get_gard_norm(it): - sum_grad = 0 - for x in it: - if x.grad is None: - continue - sum_grad += x.grad.norm()**2 - return math.sqrt(sum_grad) - - -def update_linear_schedule(optimizer, epoch, total_num_epochs, initial_lr): - """Decreases the learning rate linearly""" - lr = initial_lr - (initial_lr * (epoch / float(total_num_epochs))) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def huber_loss(e, d): - a = (abs(e) <= d).float() - b = (e > d).float() - return a * e**2 / 2 + b * d * (abs(e) - d / 2) - - -def mse_loss(e): - return e**2 / 2 - - -def get_shape_from_obs_space(obs_space): - if obs_space.__class__.__name__ == 'Box': - obs_shape = obs_space.shape - elif obs_space.__class__.__name__ == 'list': - obs_shape = obs_space - else: - raise NotImplementedError - return obs_shape - - -def get_shape_from_act_space(act_space): - if act_space.__class__.__name__ == 'Discrete': - act_shape = 1 - elif act_space.__class__.__name__ == "MultiDiscrete": - act_shape = act_space.shape - elif act_space.__class__.__name__ == "Box": - act_shape = act_space.shape[0] - elif act_space.__class__.__name__ == "MultiBinary": - act_shape = act_space.shape[0] - else: # agar - act_shape = act_space[0].shape[0] + 1 - return act_shape - - -def tile_images(img_nhwc): - """ - Tile N images into one big PxQ image - (P,Q) are chosen to be as close as possible, and if N - is square, then P=Q. - input: img_nhwc, list or array of images, ndim=4 once turned into array - n = batch index, h = height, w = width, c = channel - returns: - bigim_HWc, ndarray with ndim=3 - """ - img_nhwc = np.asarray(img_nhwc) - N, h, w, c = img_nhwc.shape - H = int(np.ceil(np.sqrt(N))) - W = int(np.ceil(float(N) / H)) - img_nhwc = np.array( - list(img_nhwc) + [img_nhwc[0] * 0 for _ in range(N, H * W)]) - img_HWhwc = img_nhwc.reshape(H, W, h, w, c) - img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4) - img_Hh_Ww_c = img_HhWwc.reshape(H * h, W * w, c) - return img_Hh_Ww_c diff --git a/algos/ppo/utils/valuenorm.py b/algos/ppo/utils/valuenorm.py deleted file mode 100644 index 76df255d..00000000 --- a/algos/ppo/utils/valuenorm.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -import numpy as np - -import torch -import torch.nn as nn - - -class ValueNorm(nn.Module): - """ Normalize a vector of observations - across the first norm_axes dimensions""" - - def __init__(self, - input_shape, - norm_axes=1, - beta=0.99999, - per_element_update=False, - epsilon=1e-5, - device=torch.device("cpu")): - super(ValueNorm, self).__init__() - - self.input_shape = input_shape - self.norm_axes = norm_axes - self.epsilon = epsilon - self.beta = beta - self.per_element_update = per_element_update - self.tpdv = dict(dtype=torch.float32, device=device) - - self.running_mean = nn.Parameter(torch.zeros(input_shape), - requires_grad=False).to(**self.tpdv) - self.running_mean_sq = nn.Parameter( - torch.zeros(input_shape), requires_grad=False).to(**self.tpdv) - self.debiasing_term = nn.Parameter(torch.tensor(0.0), - requires_grad=False).to(**self.tpdv) - - self.reset_parameters() - - def reset_parameters(self): - self.running_mean.zero_() - self.running_mean_sq.zero_() - self.debiasing_term.zero_() - - def running_mean_var(self): - debiased_mean = self.running_mean / self.debiasing_term.clamp( - min=self.epsilon) - debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp( - min=self.epsilon) - debiased_var = (debiased_mean_sq - debiased_mean**2).clamp(min=1e-2) - return debiased_mean, debiased_var - - @torch.no_grad() - def update(self, input_vector): - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes))) - batch_sq_mean = (input_vector**2).mean( - dim=tuple(range(self.norm_axes))) - - if self.per_element_update: - batch_size = np.prod(input_vector.size()[:self.norm_axes]) - weight = self.beta**batch_size - else: - weight = self.beta - - self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight)) - self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight)) - self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight)) - - def normalize(self, input_vector): - # Make sure input is float32 - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - mean, var = self.running_mean_var() - out = (input_vector - mean[(None, ) * self.norm_axes] - ) / torch.sqrt(var)[(None, ) * self.norm_axes] - - return out - - def denormalize(self, input_vector): - """ Transform normalized data back into original distribution """ - if type(input_vector) == np.ndarray: - input_vector = torch.from_numpy(input_vector) - input_vector = input_vector.to(**self.tpdv) - - mean, var = self.running_mean_var() - out = input_vector * torch.sqrt(var)[(None, ) * self.norm_axes] + mean[ - (None, ) * self.norm_axes] - - out = out.cpu().numpy() - - return out diff --git a/setup.py b/build.py similarity index 74% rename from setup.py rename to build.py index 4863ae61..43af0ae5 100644 --- a/setup.py +++ b/build.py @@ -1,9 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -"""Run via ```python setup.py develop``` to install Nocturne in your environment.""" +from pybind11.setup_helpers import build_ext, Pybind11Extension import logging import multiprocessing import os @@ -12,18 +7,14 @@ import sys from distutils.version import LooseVersion -from setuptools import Extension, setup -from setuptools.command.build_ext import build_ext - -# Reference: -# https://www.benjack.io/2017/06/12/python-cpp-tests.html +logging.basicConfig(level=logging.INFO) -class CMakeExtension(Extension): +class CMakeExtension(Pybind11Extension): """Use CMake to construct the Nocturne extension.""" def __init__(self, name, src_dir=""): - Extension.__init__(self, name, sources=[]) + Pybind11Extension.__init__(self, name, sources=[]) self.src_dir = os.path.abspath(src_dir) @@ -87,15 +78,9 @@ def build_extension(self, ext): print() # Add an empty line for cleaner output -def main(): - """Build the C++ code.""" - # with open("./requirements.txt", "r") as f: - # requires = f.read().splitlines() - setup( - ext_modules=[CMakeExtension("nocturne", "./nocturne")], - cmdclass=dict(build_ext=CMakeBuild), - ) - - -if __name__ == "__main__": - main() +def build(setup_kwargs): + setup_kwargs.update({ + "ext_modules": [CMakeExtension("nocturne", "./nocturne")], + "cmdclass": {"build_ext": CMakeBuild}, + "zip_safe": False, + }) diff --git a/cfgs/algorithm/APPO.yaml b/cfgs/algorithm/APPO.yaml deleted file mode 100644 index 5c83b6e5..00000000 --- a/cfgs/algorithm/APPO.yaml +++ /dev/null @@ -1,208 +0,0 @@ -algo: APPO -experiments_root: null - # If not None, store experiment data in the specified subfolder of train_dir. Useful for groups of experiments (e.g. gridsearch) (default: None) -train_dir: null - # Root for all experiments (default: /private/home/eugenevinitsky/Code/nocturne/examples/train_dir) - # if null use the hydra default position -device: gpu # CPU training is only recommended for smaller e.g. MLP policies (default: gpu) -save_every_sec: 120 # Checkpointing rate (default: 120) -keep_checkpoints: 3 #Number of model checkpoints to keep (default: 3) -save_milestones_sec: -1 #Save intermediate checkpoints in a separate folder for later evaluation (default=never) (default: -1) -stats_avg: 100 #How many episodes to average to measure performance (avg. reward etc) (default: 100) -learning_rate: 0.0001 # LR (default: 0.0001) -train_for_env_steps: 3000000000 # Stop after all policies are trained for this many env steps (default: 10000000000) -train_for_seconds: 10000000000 #Stop training after this many seconds (default: 10000000000) -lr_schedule: constant #Learning rate schedule to use. Constant keeps constant learning rate throughout training. - # kl_adaptive* schedulers look at --lr_schedule_kl_threshold and if KL-divergence with behavior policy' - # after the last minibatch/epoch significantly deviates from this threshold, lr is apropriately' - # increased or decreased - # options are 'constant', 'kl_adaptive_minibatch', 'kl_adaptive_epoch' -lr_schedule_kl_threshold: 0.008 #Used with kl_adaptive_* schedulers -obs_subtract_mean: 0.0 # Observation preprocessing, mean value to subtract from observation (e.g. 128.0 for 8-bit RGB) (default: 0.0) -obs_scale: 10.0 # Observation preprocessing, divide observation tensors by this scalar (e.g. 128.0 for 8-bit RGB) (default: 1.0) -gamma: 0.99 # Discount factor (default: 0.99) -reward_scale: 1.0 - # Multiply all rewards by this factor before feeding into RL algorithm.Sometimes the overall scale of rewards is too high which makes value estimation a - # harder regression task.Loss values become too high which requires a smaller learning rate, etc. (default: 1.0) -reward_clip: 10.0 # Clip rewards between [-c, c]. Default [-10, 10] virtually means no clipping for most envs (default: 10.0) -encoder_type: mlp # Type of the encoder. Supported: conv, mlp, resnet (feel free to define more) (default: conv) -encoder_subtype: mlp_mujoco # Specific encoder design (see model.py) (default: convnet_simple) -encoder_custom: custom_env_encoder # Use custom encoder class from the registry (see model_utils.py) (default: null, options {null, custom_env_encoder}) -encoder_extra_fc_layers: 1 # Number of fully-connected layers of size "hidden size" to add after the basic encoder (e.g. convolutional) (default: 1) -encoder_hidden_size: 256 -hidden_size: 256 # Size of hidden layer in the model, or the size of RNN hidden state in recurrent model (e.g. GRU) (default: 128) -nonlinearity: tanh # {elu,relu,tanh} - # Type of nonlinearity to use (default: elu) -policy_initialization: orthogonal # {orthogonal,xavier_uniform} - # NN weight initialization (default: orthogonal) -policy_init_gain: 1.0 # Gain parameter of PyTorch initialization schemas (i.e. Xavier) (default: 1.0) -actor_critic_share_weights: True # Whether to share the weights between policy and value function (default: True) -use_spectral_norm: False # Use spectral normalization to smoothen the gradients and stabilize training. Only supports fully connected layers (default: False) -adaptive_stddev: True # Only for continuous action distributions, whether stddev is state-dependent or just a single learned parameter (default: True) -initial_stddev: 1.0 # Initial value for non-adaptive stddev. Only makes sense for continuous action spaces (default: 1.0) -experiment_summaries_interval: 20 # How often in seconds we write avg. statistics about the experiment (reward, episode length, extra stats...) (default: 20) -adam_eps: 1e-06 # Adam epsilon parameter (1e-8 to 1e-5 seem to reliably work okay, 1e-3 and up does not work) (default: 1e-06) -adam_beta1: 0.9 # Adam momentum decay coefficient (default: 0.9) -adam_beta2: 0.999 # Adam second momentum decay coefficient (default: 0.999) -gae_lambda: 0.95 # Generalized Advantage Estimation discounting (only used when V-trace is False (default: 0.95) -rollout: 20 -# Length of the rollout from each environment in timesteps.Once we collect this many timesteps on actor worker, we send this trajectory to the learner.The -# length of the rollout will determine how many timesteps are used to calculate bootstrappedMonte-Carlo estimates of discounted rewards, advantages, GAE, -# or V-trace targets. Shorter rolloutsreduce variance, but the estimates are less precise (bias vs variance tradeoff).For RNN policies, this should be a -# multiple of --recurrence, so every rollout will be splitinto (n = rollout / recurrence) segments for backpropagation. V-trace algorithm currently -# requires thatrollout == recurrence, which what you want most of the time anyway.Rollout length is independent from the episode length. Episode length -# can be both shorter or longer thanrollout, although for PBT training it is currently recommended that rollout << episode_len(see function -# finalize_trajectory in actor_worker.py) (default: 32) -num_workers: 80 # Number of parallel environment workers. Should be less than num_envs and should divide num_envs (default: 80) -recurrence: 20 # Trajectory length for backpropagation through time. If recurrence=1 there is no backpropagation through time, and experience is shuffled completely - # randomlyFor V-trace recurrence should be equal to rollout length. (default: 32) -use_rnn: True # Whether to use RNN core in a policy or not (default: True) -rnn_type: gru # {gru,lstm} - # Type of RNN cell to use if use_rnn is True (default: gru) -rnn_num_layers: 1 # Number of RNN layers to use if use_rnn is True (default: 1) -ppo_clip_ratio: 0.1 # We use unbiased clip(x, 1+e, 1/(1+e)) instead of clip(x, 1+e, 1-e) in the paper (default: 0.1) -ppo_clip_value: 1.0 # Maximum absolute change in value estimate until it is clipped. Sensitive to value magnitude (default: 1.0) -batch_size: 7180 # Minibatch size for SGD (default: 1024) -num_batches_per_iteration: 1 -# How many minibatches we collect before training on the collected experience. It is generally recommended to set this to 1 for most experiments, because -# any higher value will increase the policy lag.But in some specific circumstances it can be beneficial to have a larger macro-batch in order to shuffle -# and decorrelate the minibatches.Here and throughout the codebase: macro batch is the portion of experience that learner processes per iteration -# (consisting of 1 or several minibatches) (default: 1) -ppo_epochs: 1 # Number of training epochs before a new batch of experience is collected (default: 1) -num_minibatches_to_accumulate: -1 -# This parameter governs the maximum number of minibatches the learner can accumulate before further experience collection is stopped.The default value -# (-1) will set this to 2 * num_batches_per_iteration, so if the experience collection is faster than the training,the learner will accumulate enough -# minibatches for 2 iterations of training (but no more). This is a good balance between policy-lag and throughput.When the limit is reached, the learner -# will notify the actor workers that they ought to stop the experience collection until accumulated minibatchesare processed. Set this parameter to 1 * -# num_batches_per_iteration to further reduce policy-lag.If the experience collection is very non-uniform, increasing this parameter can increase overall -# throughput, at the cost of increased policy-lag.A value of 0 is treated specially. This means the experience accumulation is turned off, and all -# experience collection will be halted during training.This is the regime with potentially lowest policy-lag.When this parameter is 0 and num_workers * -# num_envs_per_worker * rollout == num_batches_per_iteration * batch_size, the algorithm is similar toregular synchronous PPO. (default: -1) -max_grad_norm: 4.0 # Max L2 norm of the gradient vector (default: 4.0) -exploration_loss_coeff: 0.001 # Coefficient for the exploration component of the loss function. (default: 0.001) -value_loss_coeff: 0.5 # Coefficient for the critic loss (default: 0.5) -kl_loss_coeff: 0.0 #Coefficient for fixed KL loss (as used by Schulman et al. in https://arxiv.org/pdf/1707.06347.pdf). Highly recommended for environments with continuous - # action spaces. (default: 0.0) -exploration_loss: entropy - # {entropy,symmetric_kl} - # Usually the exploration loss is based on maximizing the entropy of the probability distribution. Note that mathematically maximizing entropy of the - # categorical probability distribution is exactly the same as minimizing the (regular) KL-divergence between this distribution and a uniform prior. The - # downside of using the entropy term (or regular asymmetric KL-divergence) is the fact that penalty does not increase as probabilities of some actions - # approach zero. I.e. numerically, there is almost no difference between an action distribution with a probability epsilon > 0 for some action and an - # action distribution with a probability = zero for this action. For many tasks the first (epsilon) distribution is preferrable because we keep some - # (albeit small) amount of exploration, while the second distribution will never explore this action ever again.Unlike the entropy term, symmetric KL - # divergence between the action distribution and a uniform prior approaches infinity when entropy of the distribution approaches zero, so it can prevent - # the pathological situations where the agent stops exploring. Empirically, symmetric KL-divergence yielded slightly better results on some problems. - # (default: entropy) -max_entropy_coeff: 0.0, # Coefficient for max entropy term added directly to rewards. 0 means no max entropy term to env rewards. ' - # Note that this is different from exploration loss (see https://arxiv.org/abs/1805.00909)' -num_envs_per_worker: 2 - # Number of envs on a single CPU actor, in high-throughput configurations this should be in 10-30 range for Atari/VizDoomMust be even for double-buffered - # sampling! (default: 2) -worker_num_splits: 2 - # Typically we split a vector of envs into two parts for "double buffered" experience collectionSet this to 1 to disable double buffering. Set this to 3 - # for triple buffering! (default: 2) -num_policies: 1 - # Number of policies to train jointly (default: 1) -policy_workers_per_policy: 1 - # Number of policy workers that compute forward pass (per policy) (default: 1) -max_policy_lag: 10000 - # Max policy lag in policy versions. Discard all experience that is older than this. This should be increased for configurations with multiple epochs of - # SGD because naturallypolicy-lag may exceed this value. (default: 10000) -traj_buffers_excess_ratio: 1.3 - # Increase this value to make sure the system always has enough free trajectory buffers (can be useful when i.e. a lot of inactive agents in multi-agent - # envs)Decrease this to 1.0 to save as much RAM as possible. (default: 1.3) -decorrelate_experience_max_seconds: 10 - # Decorrelating experience serves two benefits. First: this is better for learning because samples from workers come from random moments in the episode, - # becoming more "i.i.d".Second, and more important one: this is good for environments with highly non-uniform one-step times, including long and expensive - # episode resets. If experience is not decorrelatedthen training batches will come in bursts e.g. after a bunch of environments finished resets and many - # iterations on the learner might be required,which will increase the policy-lag of the new experience collected. The performance of the Sample Factory is - # best when experience is generated as more-or-lessuniform stream. Try increasing this to 100-200 seconds to smoothen the experience distribution in time - # right from the beginning (it will eventually spread out and settle anyway) (default: 10) -decorrelate_envs_on_one_worker: True - # In addition to temporal decorrelation of worker processes, also decorrelate envs within one worker processFor environments with a fixed episode length - # it can prevent the reset from happening in the same rollout for all envs simultaneously, which makes experience collection more uniform. (default: True) -with_vtrace: True - # Enables V-trace off-policy correction. If this is True, then GAE is not used (default: True) -vtrace_rho: 1.0 - # rho_hat clipping parameter of the V-trace algorithm (importance sampling truncation) (default: 1.0) -vtrace_c: 1.0 - # c_hat clipping parameter of the V-trace algorithm. Low values for c_hat can reduce variance of the advantage estimates (similar to GAE lambda < 1) - # (default: 1.0) -set_workers_cpu_affinity: True - # Whether to assign workers to specific CPU cores or not. The logic is beneficial for most workloads because prevents a lot of context switching.However - # for some environments it can be better to disable it, to allow one worker to use all cores some of the time. This can be the case for some DMLab - # environments with very expensive episode resetthat can use parallel CPU cores for level generation. (default: True) -force_envs_single_thread: True - # Some environments may themselves use parallel libraries such as OpenMP or MKL. Since we parallelize environments on the level of workers, there is no - # need to keep this parallel semantic.This flag uses threadpoolctl to force libraries such as OpenMP and MKL to use only a single thread within the - # environment.Default value (True) is recommended unless you are running fewer workers than CPU cores. (default: True) -reset_timeout_seconds: 120 - # Fail worker on initialization if not a single environment was reset in this time (worker probably got stuck) (default: 120) -default_niceness: 0 - # Niceness of the highest priority process (the learner). Values below zero require elevated privileges. (default: 0) -train_in_background_thread: True - # Using background thread for training is faster and allows preparing the next batch while training is in progress.Unfortunately debugging can become very - # tricky in this case. So there is an option to use only a single thread on the learner to simplify the debugging. (default: True) -learner_main_loop_num_cores: 1 - # When batching on the learner is the bottleneck, increasing the number of cores PyTorch uses can improve the performance (default: 1) -actor_worker_gpus: [] - # [ACTOR_WORKER_GPUS [ACTOR_WORKER_GPUS ...]] - # By default, actor workers only use CPUs. Changes this if e.g. you need GPU-based rendering on the actors (default: []) -with_pbt: False # Enables population-based training basic features (default: False) -pbt_mix_policies_in_one_env: True - # For multi-agent envs, whether we mix different policies in one env. (default: True) -pbt_period_env_steps: 5000000 - # Periodically replace the worst policies with the best ones and perturb the hyperparameters (default: 5000000) -pbt_start_mutation: 20000000 - # Allow initial diversification, start PBT after this many env steps (default: 20000000) -pbt_replace_fraction: 0.3 - # A portion of policies performing worst to be replace by better policies (rounded up) (default: 0.3) -pbt_mutation_rate: 0.15 - # Probability that a parameter mutates (default: 0.15) -pbt_replace_reward_gap: 0.1 - # Relative gap in true reward when replacing weights of the policy with a better performing one (default: 0.1) -pbt_replace_reward_gap_absolute: 1e-06 - # Absolute gap in true reward when replacing weights of the policy with a better performing one (default: 1e-06) -pbt_optimize_batch_size: False - # Whether to optimize batch size or not (experimental) (default: False) -pbt_optimize_gamma: False - # Whether to optimize gamma, discount factor, or not (experimental) (default: False) -pbt_target_objective: true_reward - # Policy stat to optimize with PBT. true_reward (default) is equal to raw env reward if not specified, but can also be any other per-policy stat.For - # DMlab-30 use value "dmlab_target_objective" (which is capped human normalized score) (default: true_reward) -pbt_perturb_min: 1.05 - # When PBT mutates a float hyperparam, it samples the change magnitude randomly from the uniform distribution [pbt_perturb_min, pbt_perturb_max] (default: - # 1.05) -pbt_perturb_max: 1.5 - # When PBT mutates a float hyperparam, it samples the change magnitude randomly from the uniform distribution [pbt_perturb_min, pbt_perturb_max] (default: - # 1.5) -use_cpc: False # Use CPC|A as an auxiliary loss durning learning (default: False) -cpc_forward_steps: 8 - # Number of forward prediction steps for CPC (default: 8) -cpc_time_subsample: 6 - # Number of timesteps to sample from each batch. This should be less than recurrence to decorrelate experience. (default: 6) -cpc_forward_subsample: 2 - # Number of forward steps to sample for loss computation. This should be less than cpc_forward_steps to decorrelate gradients. (default: 2) -with_wandb: ${wandb} - # Enables Weights and Biases integration (default: False) -wandb_user: null - # WandB username (entity). Must be specified from command line! Also see https://docs.wandb.ai/quickstart#1.-set-up-wandb (default: None) -wandb_project: ${wandb_project} - # WandB "Project" (default: sample_factory) -wandb_group: ${wandb_group} - # WandB "Group" (to group your experiments). By default this is the name of the env. (default: None) -wandb_job_type: SF - # WandB job type (default: SF) -wandb_tags: [] # [WANDB_TAGS [WANDB_TAGS ...]] - # Tags can help with finding experiments in WandB web console (default: []) -benchmark: False - # Benchmark mode (default: False) -sampler_only: False - # Do not send experience to the learner, measuring sampling throughput (default: False) -env_frameskip: null - # Number of frames for action repeat (frame skipping). Default (None) means use default environment value (default: None) -env_framestack: 4 - # Frame stacking (only used in Atari?) (default: 4) -pixel_format: CHW - # PyTorch expects CHW by default, Ray & TensorFlow expect HWC (default: CHW) \ No newline at end of file diff --git a/cfgs/algorithm/ppo.yaml b/cfgs/algorithm/ppo.yaml deleted file mode 100644 index 485f53d9..00000000 --- a/cfgs/algorithm/ppo.yaml +++ /dev/null @@ -1,81 +0,0 @@ -algorithm_name: 'rmappo' # choices=["rmappo", "mappo"] -experiment: ${experiment} -seed: ${seed} -device: ${device} -cuda_deterministic: True -n_training_threads: 1 # "Number of torch threads for training" -n_rollout_threads: 1 # Number of parallel envs for training rollouts -n_eval_rollout_threads: 1 # Number of parallel envs for evaluating rollouts -n_render_rollout_threads: 1 # Number of parallel envs for rendering rollouts -num_env_steps: 1e8 # Number of environment steps to train -wandb: ${wandb} -use_obs_instead_of_state: True # Whether to use global state or concatenated obs -episode_length: ${episode_length} # Max length for any episode -share_policy: True # Whether all agents share the same policy -use_centralized_V: False # Whether to use a centralized value function -stacked_frames: 1 # number of stacked observations -use_stacked_frames: True # whether to use stacked frames -hidden_size: 64 # Dimension of hidden layers for actor/critic networks -layer_N: 2 # "Number of layers for actor/critic networks" -use_ReLU: True # Whether to use ReLU activation or Tanh -use_popart: False # Use PopART to normalize rewards -use_valuenorm: True # use running mean and std to normalize rewards -use_feature_normalization: True # Whether to apply layernorm to the inputs -use_orthogonal: True # Whether to use Orthogonal initialization for weights and 0 initialization for biases -gain: 0.01 # The gain # of last action layer -# recurrent parameters -use_naive_recurrent_policy: False # Whether to use a naive recurrent policy by stacking states I believe? -use_recurrent_policy: True # Whether to use a recurrent policy -recurrent_N: 1 # The number of recurrent layers -data_chunk_length: 10 # Time length of chunks used to train a recurrent_policy - -# optimizer parameters -lr: 5e-4 # learning rate -critic_lr: 5e-4 # critic LR -opti_eps: 1e-5 # RMSprop optimizer epsilon -weight_decay: 0 - -# ppo parameters -ppo_epoch: 10 # number of PPO epochs -use_clipped_value_loss: True # clip loss value -clip_param: 0.2 # PPO clipping parameter -num_mini_batch: 4 # Number of minibatches of the collected data to use -entropy_coef: 0.00 -value_loss_coef: 0.5 # scaling on the value loss -use_max_grad_norm: True # use max norm of gradients -max_grad_norm: 10.0 # max norm of gradients -use_gae: True # use generalized advantage estimation -gamma: 0.99 # discount factor -gae_lambda: 0.95 -use_proper_time_limits: False # compute returns taking into account time limits -use_huber_loss: True -use_value_active_masks: True # whether to mask useless data in value loss -use_policy_active_masks: True # whether to mask useless data in policy loss -huber_delta: 10.0 # coefficient of huber loss -use_linear_lr_decay: False - -# saving and logging -save_interval: 1 # time duration between contiunous twice models saving -log_interval: 5 # time duration between contiunous twice log printing -use_eval: True -eval_interval: 25 -eval_episodes: 10 -save_gifs: True -render_interval: 25 # how often to render -use_render: False -render_episodes: 1 -ifi: 0.1 # the play interval of each rendered image in saved video -model_dir: null - -# goal env wrapper stuff -density_buffer_size: 100000 -density_optim_samples: 1000 -num_goal_samples: 200 -bandwidth: 0.1 -log_figure: True -kernel: 'gaussian' -quartile_cutoff: 0.0 -normalize_value: 400.0 -log_every_n_episodes: 50 -# if True, all the agents share the same goal buffer for sampling new goals -share_goal_buffer: False \ No newline at end of file diff --git a/cfgs/config.py b/cfgs/config.py deleted file mode 100644 index f759c9af..00000000 --- a/cfgs/config.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Set path to all the Waymo data and the parsed Waymo files.""" -import os -from pathlib import Path - -from hydra import compose, initialize -from hydra.core.global_hydra import GlobalHydra -from omegaconf import OmegaConf -from pyvirtualdisplay import Display - -VERSION_NUMBER = 2 - -PROJECT_PATH = Path.resolve(Path(__file__).parent.parent) -DATA_FOLDER = '/checkpoint/eugenevinitsky/waymo_open/motion_v1p1/uncompressed/scenario/' -TRAIN_DATA_PATH = os.path.join(DATA_FOLDER, 'training') -VALID_DATA_PATH = os.path.join(DATA_FOLDER, 'validation') -TEST_DATA_PATH = os.path.join(DATA_FOLDER, 'testing') -PROCESSED_TRAIN_NO_TL = os.path.join( - DATA_FOLDER, f'formatted_json_v{VERSION_NUMBER}_no_tl_train') -PROCESSED_VALID_NO_TL = os.path.join( - DATA_FOLDER, f'formatted_json_v{VERSION_NUMBER}_no_tl_valid') -PROCESSED_TRAIN = os.path.join(DATA_FOLDER, - f'formatted_json_v{VERSION_NUMBER}_train') -PROCESSED_VALID = os.path.join(DATA_FOLDER, - f'formatted_json_v{VERSION_NUMBER}_valid') -ERR_VAL = -1e4 - - -def get_scenario_dict(hydra_cfg): - """Convert the `scenario` key in the hydra config to a true dict.""" - if isinstance(hydra_cfg['scenario'], dict): - return hydra_cfg['scenario'] - else: - return OmegaConf.to_container(hydra_cfg['scenario'], resolve=True) - - -def get_default_scenario_dict(): - """Construct the `scenario` dict without w/o hydra decorator.""" - GlobalHydra.instance().clear() - initialize(config_path="./") - cfg = compose(config_name="config") - return get_scenario_dict(cfg) - - -def set_display_window(): - """Set a virtual display for headless machines.""" - if "DISPLAY" not in os.environ: - disp = Display() - disp.start() diff --git a/cfgs/config.yaml b/cfgs/config.yaml deleted file mode 100644 index cf123dbf..00000000 --- a/cfgs/config.yaml +++ /dev/null @@ -1,122 +0,0 @@ -defaults: - - algorithm: ppo - - override hydra/launcher: submitit_local - -seed: 0 -device: 'cuda:0' -debug: False -experiment: intersection -env: my_custom_multi_env_v1 # name of the env, hardcoded for now - -# WANDB things -wandb: False -wandb_project: nocturne4 -wandb_id: null -wandb_group: ${experiment} - -# one of the agents will be randomly tagged as the -# agent that we control, the rest of the agents will -# replay trajectories -single_agent_mode: False -# all goals are achievable within 90 steps -episode_length: 80 -# how many files of the total dataset to use. -1 indicates to use all of them -num_files: -1 -scenario_path: ${oc.env:PROCESSED_TRAIN_NO_TL} -dt: 0.1 -sims_per_step: 10 -img_as_state: False -discretize_actions: True -accel_discretization: 6 -accel_lower_bound: -3 -accel_upper_bound: 2 -steering_lower_bound: -0.7 # corresponds to about 40 degrees of max steering angle -steering_upper_bound: 0.7 # corresponds to about 40 degrees of max steering angle -steering_discretization: 21 -head_angle_lower_bound: -1.6 -head_angle_upper_bound: 1.6 -head_angle_discretization: 5 -max_num_vehicles: 20 # we want to upper bound how many agents there can be in the scene - # this is mostly useful because RL libraries expect it -# TODO(eugenevinitsky) actually implement this -randomize_goals: False -scenario: - # initial timestep of the scenario (which ranges from timesteps 0 to 90) - start_time: 0 - # if set to True, non-vehicle objects (eg. cyclists, pedestrians...) will be spawned - allow_non_vehicles: False - # for an object to be included into moving_objects - moving_threshold: 0.2 # its goal must be at least this distance from its initial position - speed_threshold: 0.05 # its speed must be superior to this value at some point - # maximum number of each objects visible in the object state - # if there are more objects, the closest ones are prioritized - # if there are less objects, the features vector is padded with zeros - max_visible_objects: 16 - max_visible_road_points: 1000 - max_visible_traffic_lights: 20 - max_visible_stop_signs: 4 - # from the set of road points that comprise each polyline, we take - # every n-th one of these - sample_every_n: 1 - # if true we add all the road-edges (the edges you can collide with) - # to the visible road points first and only add the other points - # (road lines, lane lines) etc. if we have remaining states after - road_edge_first: False - -# these configs are mostly used for aligning displacement error computations -# with the standard way of doing it in other libraries i.e. we keep -# the agent for the whole rollout and compute its distance from the expert -# at all the points that the expert is valid -remove_at_goal: True # if true, remove the agent when it reaches its goal -remove_at_collide: True # if true, remove the agent when it collides - -rew_cfg: - shared_reward: False # agents get the collective reward instead of individual rewards - goal_tolerance: 0.5 - reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms - collision_penalty: 0 - shaped_goal_distance_scaling: 0.2 - shaped_goal_distance: True - goal_distance_penalty: False # if shaped_goal_distance is true, then when this is True the goal distance - # is a penalty for being far from - # goal instead of a reward for being close - goal_achieved_bonus: ${episode_length} - # goal is only achieved if you're within this tolerance on distance from goal - position_target: True - position_target_tolerance: 1.0 - # goal is only achieved if you're within this tolerance on final agent speed at goal position - speed_target: True - speed_target_tolerance: 1.0 - # goal is only achieved if you're within this tolerance on final agent heading at goal position - heading_target: True - heading_target_tolerance: 0.3 -subscriber: - view_angle: 2.1 - # the distance which the cone extends before agents are not visible - # TODO(eugenevinitsky) pick the right number - view_dist: 80 - use_ego_state: True - use_observations: True - # if true, we return an observation for agents that have exited the system - # as well as returning an observation for the extra agents if the number of - # agents in the system is less than max_num_vehicles - keep_inactive_agents: False - # for values greater than 1, we will stack inputs together - n_frames_stacked: 1 - -results_dir: ${oc.env:NOCTURNE_LOG_DIR} - -hydra: - run: - dir: ${results_dir}/test/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/${hydra.job.override_dirname} - sweep: - dir: ${results_dir}/${oc.env:USER}/nocturne/sweep/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S} - subdir: ${hydra.job.num} - launcher: - timeout_min: 2880 - cpus_per_task: 10 - gpus_per_node: 1 - tasks_per_node: 1 - mem_gb: 160 - nodes: 1 - submitit_folder: ${results_dir}/sweep/${now:%Y.%m.%d}/${now:%H%M}_${experiment}/.slurm diff --git a/cfgs/cpp_ b/cfgs/cpp_ deleted file mode 100644 index e69de29b..00000000 diff --git a/cfgs/imitation/config.yaml b/cfgs/imitation/config.yaml deleted file mode 100644 index 6cf72fc1..00000000 --- a/cfgs/imitation/config.yaml +++ /dev/null @@ -1,42 +0,0 @@ -defaults: - - override hydra/launcher: submitit_local - -experiment: test -path: ${oc.env:PROCESSED_TRAIN_NO_TL} -num_files: 1000 -n_cpus: 9 -lr: 3e-4 -samples_per_epoch: 50000 -max_visible_road_points: 500 -batch_size: 512 -epochs: 700 -device: cuda -n_stacked_states: 5 -view_dist: 80 -view_angle: 3.14 -actions_are_positions: False -discrete: True -seed: 0 - -# WANDB things -wandb: True -wandb_project: nocturne -wandb_group: ${experiment} - -# tensorboard logs -write_to_tensorboard: True - -hydra: - run: - dir: /checkpoint/${oc.env:USER}/nocturne/test/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/${hydra.job.override_dirname} - sweep: - dir: /checkpoint/${oc.env:USER}/nocturne/sweep/imitation/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S} - subdir: ${hydra.job.num} - launcher: - timeout_min: 2880 - cpus_per_task: 80 - gpus_per_node: 1 - tasks_per_node: 1 - mem_gb: 160 - nodes: 1 - submitit_folder: /checkpoint/${oc.env:USER}/nocturne/sweep/imitation/${now:%Y.%m.%d}/${experiment}/${now:%H.%M.%S}/.slurm diff --git a/configs/env_config.yaml b/configs/env_config.yaml new file mode 100644 index 00000000..adf7a237 --- /dev/null +++ b/configs/env_config.yaml @@ -0,0 +1,93 @@ +seed: 0 +device: cuda:0 +debug: false +experiment: intersection +env: my_custom_multi_env_v1 # name of the env, hardcoded for now + +# all goals are achievable within 90 steps +episode_length: 80 +# how many files of the total dataset to use. -1 indicates to use all of them +num_files: 5 +dt: 0.1 +sims_per_step: 10 +img_as_state: false +discretize_actions: true +include_head_angle: false # Whether to include the head tilt/angle as part of a vehicle's action +accel_discretization: 3 +accel_lower_bound: -2 +accel_upper_bound: 2 +steering_lower_bound: -0.25 # corresponds to about 40 degrees of max steering angle +steering_upper_bound: 0.25 # corresponds to about 40 degrees of max steering angle +steering_discretization: 3 +max_num_vehicles: 20 +randomize_goals: false +scenario: + # initial timestep of the scenario (which ranges from timesteps 0 to 90) + start_time: 0 + # if set to True, non-vehicle objects (eg. cyclists, pedestrians...) will be spawned + allow_non_vehicles: false + # for an object to be included into moving_objects + moving_threshold: 0.2 # its goal must be at least this distance from its initial position + speed_threshold: 0.05 # its speed must be superior to this value at some point + # maximum number of each objects visible in the object state + # if there are more objects, the closest ones are prioritized + # if there are less objects, the features vector is padded with zeros + #max_visible_objects: 16 + max_visible_road_points: 500 + #max_visible_traffic_lights: 20 + #max_visible_stop_signs: 4 + # from the set of road points that comprise each polyline, we take + # every n-th one of these + sample_every_n: 1 + # if true we add all the road-edges (the edges you can collide with) + # to the visible road points first and only add the other points + # (road lines, lane lines) etc. if we have remaining states after + road_edge_first: false + invalid_position: -10000.0 + context_length: 10 + +# these configs are mostly used for aligning displacement error computations +# with the standard way of doing it in other libraries i.e. we keep +# the agent for the whole rollout and compute its distance from the expert +# at all the points that the expert is valid +remove_at_goal: true # if true, remove the agent when it reaches its goal +remove_at_collide: true # if true, remove the agent when it collides + +# Reward settings +rew_cfg: + shared_reward: false # agents get the collective reward instead of individual rewards + goal_tolerance: 0.5 + reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms + collision_penalty: 0 + shaped_goal_distance_scaling: 0.2 + shaped_goal_distance: true + goal_distance_penalty: false # if shaped_goal_distance is true, then when this is True the goal distance + # is a penalty for being far from + # goal instead of a reward for being close + goal_achieved_bonus: 80 + position_target: true # If True, goal is only achieved if you're within this tolerance on distance from goal + position_target_tolerance: 1.0 + speed_target: true # If True, goal is only achieved if you're within this tolerance on final agent speed at goal position + speed_target_tolerance: 1.0 + heading_target: false # If True, goal is only achieved if you're within this tolerance on final agent heading at goal position + heading_target_tolerance: 0.3 + # we assume that vehicles are never more than 400 meters from their goal which makes + # sense as the episodes are 9 seconds long, i.e. we'd have to go more than 40 m/s to get there + goal_speed_scaling: 40.0 + +# Agent settings +subscriber: + view_angle: 3.14 # the distance which the cone extends before agents are not visible; set to pi rad to correct for missing head angle + view_dist: 80 + use_ego_state: true # if True, add information about the ego state + use_observations: false # if True, add visible field + use_start_position: false # if True, add start (x, y)-position of the agent + use_current_position: false # if True, add current (x, y)-position of the agent + use_target_position: false # if True, add target (x, y)-position of the agent + use_distance_to_target: false # if True, add distance to target (dx, dy) of the agent + + # for values greater than 1, we will stack inputs together (i.e. memory and equivalent of n_stacked_states) + n_frames_stacked: 1 # Agent memory + +# Path to folder with traffic scene(s) from which to create an environment +data_path: ../data diff --git a/examples/example_scenario.json b/data/example_scenario.json similarity index 100% rename from examples/example_scenario.json rename to data/example_scenario.json diff --git a/data/valid_files.json b/data/valid_files.json new file mode 100644 index 00000000..7698869b --- /dev/null +++ b/data/valid_files.json @@ -0,0 +1,3 @@ +{ + "example_scenario.json": [] +} diff --git a/environment.yml b/environment.yml index b9e7ae19..33dc0588 100644 --- a/environment.yml +++ b/environment.yml @@ -2,29 +2,4 @@ name: nocturne channels: - defaults dependencies: - - python=3.8 - - pip=21.1.3 - - numpy=1.19.2 - - jupyterlab=3.0.14 - - pip: - - hydra-core==1.1.0 - - hydra-submitit-launcher==1.1.5 - - ipdb==0.13.9 - - seaborn - - imageio==2.10.1 - - moviepy==1.0.3 - - opencv-python==4.5.5.64 - - gym==0.20.0 - - wandb==0.12.15 - - imageio==2.10.1 - - setproctitle==1.2.3 - - tensorboardX==2.5 - - pytest==7.1.1 - - flake8==4.0.1 - - pydocstyle==6.1.1 - - pyvirtualdisplay - - ray==1.11.0 - - dm-tree - - tabulate - - torch - - sample-factory==1.123.0 \ No newline at end of file + - python=3.10 diff --git a/examples/01_data_structure.ipynb b/examples/01_data_structure.ipynb new file mode 100644 index 00000000..d79bf973 --- /dev/null +++ b/examples/01_data_structure.ipynb @@ -0,0 +1,4235 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data format of a traffic scene\n", + "\n", + "This notebook dives into the data format used to create simulations in Nocturne.\n", + "\n", + "_Last update: 10/2023_" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "import os\n", + "os.chdir('..')\n", + "\n", + "cmap = ['r', 'g', 'b', 'y', 'c'] \n", + "%config InlineBackend.figure_format = 'svg'\n", + "sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (8, 3)})\n", + "sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Traffic scenes are constructed by utilizing the [Waymo Open Motion dataset](https://waymo.com/open/). Though every scene is unique, they all have the same basic data structure. \n", + "\n", + "To load a traffic scene:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['name', 'objects', 'roads', 'tl_states'])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take an example scene\n", + "data_path = './data/example_scenario.json'\n", + "\n", + "with open(data_path) as file:\n", + " traffic_scene = json.load(file)\n", + "\n", + "traffic_scene.keys()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global Overview \n", + "A traffic scene consists of:\n", + "- `name`: the name of the traffic scenario.\n", + "- `objects`: the road objects or moving vehicles in the scene.\n", + "- `roads`: the road points in the scene, these are all the stationary objects.\n", + "- `tl_states`: the states of the traffic lights, which are filtered out for now. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traffic_scene['tl_states']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'tfrecord-00358-of-01000_65.json'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traffic_scene['name']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-10-03T10:23:25.972593\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.8.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.Series(\n", + " [\n", + " traffic_scene['objects'][idx]['type']\n", + " for idx in range(len(traffic_scene['objects']))\n", + " ]\n", + ").value_counts().plot(kind='bar', rot=45, color=cmap);\n", + "plt.title(f'Distribution of road objects in traffic scene. Total # objects: {len(traffic_scene[\"objects\"])}')\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This traffic scenario only contains vehicles and pedestrians, some scenes have cyclists as well." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-10-03T10:23:26.839616\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.8.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.Series(\n", + " [\n", + " traffic_scene['roads'][idx]['type']\n", + " for idx in range(len(traffic_scene['roads']))\n", + " ]\n", + ").value_counts().plot(kind='bar', rot=45, color=cmap);\n", + "plt.title(f'Distribution of road points in traffic scene. Total # points: {len(traffic_scene[\"roads\"])}')\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### In-Depth: Road Objects\n", + "\n", + "This is a list of different road objects in the traffic scene. For each road object, we have information about its position, velocity, size, in which direction it's heading, whether it's a valid object, the type, and the final position of the vehicle." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['position', 'width', 'length', 'heading', 'velocity', 'valid', 'goalPosition', 'type'])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take the first object\n", + "idx = 0\n", + "\n", + "# For each object, we have this information:\n", + "traffic_scene['objects'][idx].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"x\": 9037.7138671875,\n", + " \"y\": -2720.373779296875\n", + " },\n", + " {\n", + " \"x\": 9037.7607421875,\n", + " \"y\": -2720.306640625\n", + " },\n", + " {\n", + " \"x\": 9037.822265625,\n", + " \"y\": -2720.217529296875\n", + " },\n", + " {\n", + " \"x\": 9037.8916015625,\n", + " \"y\": -2720.146240234375\n", + " },\n", + " {\n", + " \"x\": 9037.9482421875,\n", + " \"y\": -2720.070068359375\n", + " },\n", + " {\n", + " \"x\": 9038.01953125,\n", + " \"y\": -2719.994384765625\n", + " },\n", + " {\n", + " \"x\": 9038.1005859375,\n", + " \"y\": -2719.903076171875\n", + " },\n", + " {\n", + " \"x\": 9038.1953125,\n", + " \"y\": -2719.830810546875\n", + " },\n", + " {\n", + " \"x\": 9038.279296875,\n", + " \"y\": -2719.74462890625\n", + " },\n", + " {\n", + " \"x\": 9038.3564453125,\n", + " \"y\": -2719.674560546875\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "# Position contains the (x, y) coordinates for the vehicle at every time step\n", + "print(json.dumps(traffic_scene['objects'][idx]['position'][:10], indent=4))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.6877052187919617, 0.6777269244194031)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Width and length together make the size of the object, and is used to see if there is a collision \n", + "traffic_scene['objects'][idx]['width'], traffic_scene['objects'][idx]['length'] " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An object's heading refers to the direction it is pointing or moving in. The default coordinate system in Nocturne is right-handed, where the positive x and y axes point to the right and downwards, respectively. In a right-handed coordinate system, 0 degrees is located on the x-axis and the angle increases counter-clockwise.\n", + "\n", + "Because the scene is created from the viewpoint of an ego driver, there may be instances where the heading of certain vehicles is not available. These cases are represented by the value `-10_000`, to indicate that these steps should be filtered out or are invalid." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-10-03T10:23:28.800884\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.8.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Heading is the direction in which the vehicle is pointing \n", + "plt.plot(traffic_scene['objects'][idx]['heading']);\n", + "plt.xlabel('Time step')\n", + "plt.ylabel('Heading')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"x\": 0.634765625,\n", + " \"y\": 0.72265625\n", + " },\n", + " {\n", + " \"x\": 0.46875,\n", + " \"y\": 0.67138671875\n", + " },\n", + " {\n", + " \"x\": 0.615234375,\n", + " \"y\": 0.89111328125\n", + " },\n", + " {\n", + " \"x\": 0.693359375,\n", + " \"y\": 0.712890625\n", + " },\n", + " {\n", + " \"x\": 0.56640625,\n", + " \"y\": 0.76171875\n", + " },\n", + " {\n", + " \"x\": 0.712890625,\n", + " \"y\": 0.7568359375\n", + " },\n", + " {\n", + " \"x\": 0.810546875,\n", + " \"y\": 0.9130859375\n", + " },\n", + " {\n", + " \"x\": 0.947265625,\n", + " \"y\": 0.72265625\n", + " },\n", + " {\n", + " \"x\": 0.83984375,\n", + " \"y\": 0.86181640625\n", + " },\n", + " {\n", + " \"x\": 0.771484375,\n", + " \"y\": 0.70068359375\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "# Velocity shows the velocity in the x- and y- directions\n", + "print(json.dumps(traffic_scene['objects'][idx]['velocity'][:10], indent=4))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-10-03T10:23:29.389521\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.8.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Valid indicates if the state of the vehicle was observed for each timepoint\n", + "plt.xlabel('Time step')\n", + "plt.ylabel('IS VALID');\n", + "plt.plot(traffic_scene['objects'][idx]['valid'], '_', lw=5)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'x': 9041.1259765625, 'y': -2716.647216796875}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Each object has a goalPosition, an (x, y) position within the scene\n", + "traffic_scene['objects'][idx]['goalPosition']" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'pedestrian'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Finally, we have the type of the vehicle\n", + "traffic_scene['objects'][idx]['type']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### In-Depth: Road Points\n", + "\n", + "Road points are static objects in the scene." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['geometry', 'type'])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traffic_scene['roads'][idx].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'road_edge'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This point represents the edge of a road\n", + "traffic_scene['roads'][idx]['type']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"x\": 8922.911733810946,\n", + " \"y\": -2849.426741530589\n", + " },\n", + " {\n", + " \"x\": 8923.216436260553,\n", + " \"y\": -2849.038518766975\n", + " },\n", + " {\n", + " \"x\": 8923.50673911804,\n", + " \"y\": -2848.63941352788\n", + " },\n", + " {\n", + " \"x\": 8923.782254084921,\n", + " \"y\": -2848.2299596442986\n", + " },\n", + " {\n", + " \"x\": 8924.042612639492,\n", + " \"y\": -2847.8107047886665\n", + " },\n", + " {\n", + " \"x\": 8924.287466537296,\n", + " \"y\": -2847.382209743547\n", + " },\n", + " {\n", + " \"x\": 8924.516488266596,\n", + " \"y\": -2846.945047650609\n", + " },\n", + " {\n", + " \"x\": 8924.729371495881,\n", + " \"y\": -2846.49980324385\n", + " },\n", + " {\n", + " \"x\": 8924.91688626026,\n", + " \"y\": -2846.067714357487\n", + " },\n", + " {\n", + " \"x\": 8925.087545312272,\n", + " \"y\": -2845.6286986979553\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "# Geometry contains the (x, y) position(s) for a road point\n", + "# Note that this will be a list for road lanes and edges but a single (x, y) tuple for stop signs and alike\n", + "print(json.dumps(traffic_scene['roads'][idx]['geometry'][:10], indent=4));" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nocturne-research", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/02_nocturne_concepts.ipynb b/examples/02_nocturne_concepts.ipynb new file mode 100644 index 00000000..0863e19b --- /dev/null +++ b/examples/02_nocturne_concepts.ipynb @@ -0,0 +1,785 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nocturne concepts\n", + "\n", + "This page introduces the most basic elements of nocturne. You can find further information about these [in Section 3 of the Nocturne paper](https://arxiv.org/abs/2206.09889).\n", + "\n", + "_Last update: 10/2023_" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import os\n", + "os.chdir('..')\n", + "\n", + "data_path = './data/example_scenario.json'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary\n", + "\n", + "- Nocturne simulations are **discretized traffic scenarios**. A scenario is a constructed snapshot of traffic situation at a particular timepoint.\n", + "- The state of the vehicle of focus is referred to as the **ego state**. Each vehicle has their **own partial view of the traffic scene**; and a visible state is constructed by parameterizing the view distance, head angle and cone radius of the driver. The action for each vehicle is a `(1, 3)` tuple with the acceleration, steering and head angle of the vehicle. \n", + "- The **step method advances the simulation** with a desired step size. By default, the dynamics of vehicles are driven by a kinematic bicycle model. If a vehicle is set to expert-controlled mode, its position, heading, and speed will be updated according to a trajectory recorded from a human driver." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simulation\n", + "\n", + "In Nocturne, a simulation discretizes an existing traffic scenario. At the moment, Nocturne supports traffic scenarios from the Waymo Open Dataset, but can be further extended to work with other driving datasets. \n", + "\n", + "
\n", + "
\n", + "\n", + "
An example of a set of traffic scenario's in Nocturne. Upon initialization, a start time is chosen. After each iteration we take a step in the simulation, which gets us to the next scenario. This is done until we reach the end of the simulation.
\n", + "
\n", + "\n", + "We show an example of this using `example_scenario.json`, where our traffic data is extracted from the Waymo open motion dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from nocturne import Simulation\n", + "\n", + "scenario_config = {\n", + " 'start_time': 0, # When to start the simulation\n", + " 'allow_non_vehicles': True, # Whether to include cyclists and pedestrians \n", + " 'max_visible_road_points': 10, # Maximum number of road points for a vehicle\n", + " 'max_visible_objects': 10, # Maximum number of road objects for a vehicle\n", + " 'max_visible_traffic_lights': 10, # Maximum number of traffic lights in constructed view\n", + " 'max_visible_stop_signs': 10, # Maximum number of stop signs in constructed view\n", + "}\n", + "\n", + "# Create simulation\n", + "sim = Simulation(data_path, scenario_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scenario\n", + "\n", + "A simulation consists of a set of scenarios. A scenario is a snapshot of the traffic scene at a particular timepoint. \n", + "\n", + "Here is how to create a scenario object:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Get traffic scenario at timepoint\n", + "scenario = sim.getScenario()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `scenario` objects holds information we are interested in. Here are a couple of examples:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The number of road objects in the scene\n", + "len(scenario.getObjects())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total # moving objects: 15\n", + "\n", + "Object IDs of moving vehicles: \n", + " [0, 1, 2, 3, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32] \n" + ] + } + ], + "source": [ + "# The road objects that moved at a particular timepoint\n", + "objects_that_moved = scenario.getObjectsThatMoved()\n", + "\n", + "print(f'Total # moving objects: {len(objects_that_moved)}\\n')\n", + "print(f'Object IDs of moving vehicles: \\n {[obj.getID() for obj in objects_that_moved]} ')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "128" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Number of road lines\n", + "len(scenario.road_lines())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scenario.getVehicles()[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# No cyclists in this scene\n", + "scenario.getCyclists()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 2 moving vehicles in scene: [3, 32]\n" + ] + } + ], + "source": [ + "# Select all moving vehicles that move \n", + "moving_vehicles = [obj for obj in scenario.getVehicles() if obj in objects_that_moved]\n", + "\n", + "print(f'Found {len(moving_vehicles)} moving vehicles in scene: {[vehicle.getID() for vehicle in moving_vehicles]}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ego state\n", + "\n", + "The **ego state** is an array with features that describe the current vehicle. This array holds the following information: \n", + "- 0: length of ego vehicle\n", + "- 1: width of ego vehicle\n", + "- 2: speed of ego vehicle\n", + "- 3: distance to the goal position of ego vehicle\n", + "- 4: angle to the goal (target azimuth) \n", + "- 5: desired heading at goal position\n", + "- 6: desired speed at goal position\n", + "- 7: current acceleration\n", + "- 8: current steering position\n", + "- 9: current head angle" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Selected vehicle # 3\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 4.4936213 , 1.9770377 , 0.07662283, 4.24219 , -0.05617166,\n", + " -0.05909407, 1.6792779 , 0. , 0. , 0. ],\n", + " dtype=float32)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select an arbitrary vehicle\n", + "ego_vehicle = moving_vehicles[0]\n", + "\n", + "print(f'Selected vehicle # {ego_vehicle.getID()}')\n", + "\n", + "# Get the state for ego vehicle\n", + "scenario.ego_state(ego_vehicle)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Visible state\n", + "\n", + "We use the ego vehicle state, together with a view distance (how far the vehicle can see) and a view angle to construct the **visible state**. The figure below shows this procedure for a simplified traffic scene. \n", + "\n", + "Calling `scenario.visible_state()` returns a dictionary with four matrices:\n", + "- `stop_signs`: The visible stop signs \n", + "- `traffic_lights`: The states for the traffic lights from the perspective of the ego driver(red, yellow, green).\n", + "- `road_points`: The observable road points (static elements in the scene).\n", + "- `objects`: The observable road objects (vehicles, pedestrians and cyclists).\n", + "\n", + "
\n", + "
\n", + "\n", + "
To investigate coordination under partial observability, agents in Nocturne can only see an obstructed view of their environment. In this simplified traffic scene, we construct the state for the red ego driver. Note that Nocturne assumes that stop signs can be viewed, even if they are behind another driver.
\n", + "
\n", + "\n", + "\\begin{align*}\n", + "\\end{align*}\n", + "\n", + "
\n", + "
\n", + "\n", + "
The same scene, this time showing the view of the yellow car.
\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The shape of the visible state is a function of the maximum number of visible objects defined at initialization (traffic lights, stop signs, road objects, and road points) and whether we add padding. If `padding = True`, an array is of size `(max visible objects, # features)` is always constructed, even if there are no visible objects. Otherwise, if `padding = False` new entries are only created when objects are visible. \n", + "\n", + "For example, say a vehicle does not observe any stop signs at a given timepoint. If we set `padding=False`, and run `visible_state['stop_signs']`, we'll get back an empty array with the shape `(0, 3)`, where 3 is the number of features per stop sign. However, if the vehicle observes two stop signs using the same setting, then `visible_state['stop_signs']` will return an array with the shape `(2, 3)`.\n", + "\n", + "On the other hand, if we set `padding=True`, the resulting array will always have a shape of `(max visible stop signs, 3)`, irrespective of how many stop signs the vehicle actually observes." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['stop_signs', 'traffic_lights', 'road_points', 'objects'])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Define viewing distance, radius and head angle\n", + "view_distance = 80 \n", + "view_angle = np.radians(120) \n", + "head_angle = 0\n", + "padding = True \n", + "\n", + "# Construct the visible state for ego vehicle\n", + "visible_state = scenario.visible_state(\n", + " ego_vehicle, \n", + " view_dist=view_distance, \n", + " view_angle=view_angle,\n", + " head_angle=head_angle,\n", + " padding=padding,\n", + ")\n", + "\n", + "visible_state.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# There are no visible stop signs at this point\n", + "visible_state['stop_signs'].T" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Traffic light states are filtered out in this version of Nocturne\n", + "visible_state['traffic_lights']" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 13)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Max visible road points x 13 features\n", + "visible_state['road_points'].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 13)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Number of visible road objects x 13 features \n", + "visible_state['objects'].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dimension flattened visible state: 410\n" + ] + } + ], + "source": [ + "visible_state_dim = sum([val.flatten().shape[0] for key, val in visible_state.items()])\n", + "\n", + "print(f'Dimension flattened visible state: {visible_state_dim}')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(410,)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can also flatten the visible state\n", + "# flattened has padding: if we miss an object --> zeros\n", + "visible_state_flat = scenario.flattened_visible_state(\n", + " ego_vehicle, \n", + " view_dist=view_distance, \n", + " view_angle=view_angle, \n", + " head_angle=head_angle, \n", + ")\n", + "\n", + "visible_state_flat.shape" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `.flattened_visible_state()` adds padding by default." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step \n", + "\n", + "`step(dt)` is a method call on an instance of the Simulation class, where `dt` is a scalar that represents the length of each simulation timestep in seconds. It advances the simulation by one timestep, which can result in changes to the state of the simulation (for example, new positions of objects, updated velocities, etc.) based on the physical laws and rules defined in the simulation.\n", + "\n", + "In the Waymo dataset, the length of the expert data is 9 seconds, a step size of 0.1 is used to discretize each traffic scene. The first second is used as a warm-start, leaving the remaining 8 seconds (80 steps) for the simulation (Details in Section 3.3)." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "dt = 0.1\n", + "\n", + "# Step the simulation\n", + "sim.step(dt)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vehicle control\n", + "\n", + "By default, vehicles in Nocturne are driven by a **kinematic bicycle model**. This means that calling the `step(dt)` method evolves the dynamics of a vehicle according to the following set of equations (Appendix D in the paper):\n", + "\n", + "\\begin{align*}\n", + " \\textbf{position: } x_{t+1} &= x_t + \\dot{x} \\, \\Delta t \\\\\n", + " y_{t+1} &= y_t + \\dot{y} \\, \\Delta t \\\\\n", + " \\textbf{heading: } \\theta_{t+1} &= \\theta_t + \\dot{\\theta} \\, \\Delta t \\\\ \n", + " \\textbf{speed: } v_{t+1} &= \\text{clip}(v_t + \\dot{v} \\, \\Delta t, -v_{\\text{max}}, v_{\\text{max}}) \\\\\n", + "\\end{align*}\n", + "\n", + "with\n", + "\n", + "\\begin{align*}\n", + " \\dot{v} &= a \\\\ \n", + " \\bar{v} &= \\text{clip}(v_t, + 0.5 \\, \\dot{v} \\, \\Delta \\, t ,\\, - v_{\\text{max}}, v_{\\text{max}}) \\\\\n", + " \\beta &= \\tan^{-1} \\left( \\frac{l_r \\tan (\\delta)}{L} \\right) \\\\\n", + " &= \\tan^{-1} (0.5 \\tan(\\delta)) \\\\\n", + " \\dot{x} &= \\bar{v} \\cos (\\theta + \\beta) \\\\\n", + " \\dot{y} &= \\bar{v} \\sin (\\theta + \\beta) \\\\\n", + " \\dot{\\theta} &= \\frac{\\bar{v} \\cos (\\beta)\\tan(\\delta)}{L}\n", + "\\end{align*}\n", + "\n", + "where $(x_t, y_t)$ is the position of a vehicle at time $t$, $\\theta_t$ is the vehicles heading angle, $a$ is the acceleration and $\\delta$ is the steering angle. Finally, $L$ is the length of the car and $l_r = 0.5L$ is the distance to the rear axle of the car.\n", + "\n", + "If we set a vehicle to be **expert-controlled** instead, it will follow the same path as the respective human driver. This means that when we call the `step(dt)` function, the vehicle's position, heading, and speed will be updated to match the next point in the recorded human trajectory." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# By default, all vehicles are not expert controlled\n", + "ego_vehicle.expert_control" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Set a vehicle to be expert controlled:\n", + "ego_vehicle.expert_control = True" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "> **Pseudocode**: How `step(dt)` advances the simulation for every vehicle. Full code is implemented in [scenario.cc](https://github.com/facebookresearch/nocturne/blob/ae0a4e361457caf6b7e397675cc86f46161405ed/nocturne/cpp/src/scenario.cc#L264)\n", + "\n", + "---\n", + "\n", + "```Python\n", + "for vehicle in vehicles:\n", + "\n", + " if object is not expert controlled:\n", + " step vehicle dynamics following the kinematic bicycle model\n", + " \n", + " if vehicle is expert controlled:\n", + " get current time & vehicle idx\n", + " vehicle position = expert trajectories[vehicle_idx, time]\n", + " vehicle heading = expert headings[vehicle_idx, time]\n", + " vehicle speed = expert speeds[vehicle_idx, time]\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Action space\n", + "\n", + "The action set for a vehicle consists of three components: acceleration, steering and the head angle. Actions are discretized based on a provided upper and lower bound.\n", + "\n", + "The experiments in the paper use:\n", + "- 6 discrete actions for **acceleration** uniformly split between $[-3, 2] \\, \\frac{m}{s^2}$\n", + "- 21 discrete actions for **steering** between $[-0.7, 0.7]$ radians \n", + "- 5 discrete actions for **head tilt** between $[-1.6, 1.6]$ radians\n", + "\n", + "This is how you can access an expert action for a vehicle in Nocturne:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{acceleration: -0.224648, steering: -0.360994, head_angle: 0.000000}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Choose an arbitrary timepoint\n", + "time = 5\n", + "\n", + "# Show expert action at timepoint\n", + "scenario.expert_action(ego_vehicle, time)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "expert_action = scenario.expert_action(ego_vehicle, time)\n", + "\n", + "expert_action = expert_action.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "acceleration = expert_action[0]\n", + "steering = expert_action[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nocturne_cpp.Action" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(scenario.expert_action(ego_vehicle, time))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-0.005859, 0.004639)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# How did the vehicle's position change after taking this action?\n", + "scenario.expert_pos_shift(ego_vehicle, time)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.0007097125053405762" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# How did the head angle change?\n", + "scenario.expert_heading_shift(ego_vehicle, time)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nocturne-research", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/03_basic_rl_usage.ipynb b/examples/03_basic_rl_usage.ipynb new file mode 100644 index 00000000..0e4baa7f --- /dev/null +++ b/examples/03_basic_rl_usage.ipynb @@ -0,0 +1,262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic RL usage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initializing environments\n", + "\n", + "\n", + "#### **Environment settings**\n", + "\n", + "- Initializing an environment is done with the `BaseEnv` class. The `BaseEnv` class leverages the `nocturne` simulator to create a basic RL interface, based on the provided traffic scenario(s). \n", + "\n", + "---\n", + "> 📝 The `env_config.yaml` file defines our environment settings, such as the action space, observation space and traffic scenarios to use.\n", + "---\n", + "\n", + "Check out `configs/env_config` for all the details!" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import yaml\n", + "from nocturne.envs.base_env import BaseEnv\n", + "\n", + "# Load environment settings\n", + "with open(f\"../configs/env_config.yaml\", \"r\") as stream:\n", + " env_config = yaml.safe_load(stream)\n", + "\n", + "# Initialize environment\n", + "env = BaseEnv(config=env_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "controlling agents # [32, 3]\n" + ] + } + ], + "source": [ + "print(f'controlling agents # {[agent.id for agent in env.controlled_vehicles]}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Data**\n", + "\n", + "- Within `env_config.yaml`, we specify the path to the folder containing the traffic scenarios to use as follows:\n", + "\n", + "```yaml\n", + "# Path to folder with traffic scene(s) from which to create an environment\n", + "data_path: ../data\n", + "```\n", + "\n", + "- [Here](https://github.com/facebookresearch/nocturne/tree/main#downloading-the-dataset) are the instructions to access the complete dataset of traffic scenes. \n", + "\n", + "- The data folder also has a file named `valid_files.json`. This file lists the names of all the valid traffic scenarios along with the ids of the vehicles that are not valid. These vehicles are excluded from our experiment.\n", + "\n", + "For simplicity, we currently added a single traffic scenario that includes two vehicles in our data folder. Both vehicles can be used, so our `valid_files.json` looks like this:\n", + "\n", + "```yaml\n", + "{\n", + " \"example_scenario.json\": []\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interacting with the environment\n", + "\n", + "The classic agent-environment loop of reinforcement learning is implemented as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done after 80 steps -- total return in episode: {3: 1.7939045316631903, 32: 1.76165686989652}\n", + "Done after 80 steps -- total return in episode: {3: 2.155318604202339, 32: 1.481429297893312}\n", + "Done after 80 steps -- total return in episode: {3: 9.465985459353316, 32: 2.2998212474249136}\n", + "Done after 80 steps -- total return in episode: {3: 1.7018343298612015, 32: 1.3247767658493788}\n", + "Done after 80 steps -- total return in episode: {3: 1.8384227205755483, 32: 10.332866871900634}\n", + "Done after 80 steps -- total return in episode: {3: 1.1086751511448438, 32: 2.2523170773066994}\n", + "Done after 68 steps -- total return in episode: {3: 9.61291282706631, 32: 1.7437541483099983}\n", + "Done after 80 steps -- total return in episode: {3: 1.3500529425191474, 32: 1.4489859636190936}\n", + "Done after 80 steps -- total return in episode: {3: 0.2037829695907602, 32: 1.79063201755183}\n", + "Done after 80 steps -- total return in episode: {3: 0.5679890269139611, 32: 1.1160696685449862}\n", + "Done after 80 steps -- total return in episode: {3: 1.2231784562099877, 32: 10.2609964920322}\n", + "Done after 80 steps -- total return in episode: {3: 0.1683594772814569, 32: 1.8316186898723619}\n" + ] + } + ], + "source": [ + "# Reset\n", + "obs_dict = env.reset()\n", + "\n", + "# Get info\n", + "agent_ids = [agent_id for agent_id in obs_dict.keys()]\n", + "dead_agent_ids = []\n", + "num_agents = len(agent_ids)\n", + "rewards = {agent_id: 0 for agent_id in agent_ids}\n", + "\n", + "for step in range(1000):\n", + "\n", + " # Sample actions\n", + " action_dict = {\n", + " agent_id: env.action_space.sample() \n", + " for agent_id in agent_ids\n", + " if agent_id not in dead_agent_ids\n", + " }\n", + " \n", + " # Step in env\n", + " obs_dict, rew_dict, done_dict, info_dict = env.step(action_dict)\n", + "\n", + " for agent_id in action_dict.keys():\n", + " rewards[agent_id] += rew_dict[agent_id]\n", + "\n", + " # Update dead agents\n", + " for agent_id, is_done in done_dict.items():\n", + " if is_done and agent_id not in dead_agent_ids:\n", + " dead_agent_ids.append(agent_id)\n", + "\n", + " # Reset if all agents are done\n", + " if done_dict[\"__all__\"]:\n", + " print(f'Done after {env.step_num} steps -- total return in episode: {rewards}')\n", + " obs_dict = env.reset()\n", + " dead_agent_ids = []\n", + " rewards = {agent_id: 0 for agent_id in agent_ids}\n", + "\n", + "# Close environment\n", + "env.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accessing information about the environment" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Box(-inf, inf, (10,), float32)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The observation space \n", + "env.observation_space\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Discrete(9)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The size of the joint action space \n", + "env.action_space\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[, ]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Which agents are controlled?\n", + "env.controlled_vehicles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### \n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/04_ppo_with_sb3.ipynb b/examples/04_ppo_with_sb3.ipynb new file mode 100644 index 00000000..e1cbbc64 --- /dev/null +++ b/examples/04_ppo_with_sb3.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PPO with single-agent control\n", + "\n", + "In this notebook, we show how to use Proximal Policy Optimization (PPO) with Nocturne and [Stable Baselines 3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/index.html). SB3 is a library that has implementations of various well-known RL algorithms." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wrappers\n", + "\n", + "The Nocturne `BaseEnv` returns output as dictionaries, but the SB3 `PPO` class expects numpy arrays. To make our environment compatible with SB3, we create a wrapper class. Wrappers modify an environment without altering code directly, which reduces boilerplate and increasing modularity." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import yaml\n", + "import wandb\n", + "# Import base environment and wrapper\n", + "from nocturne.envs.base_env import BaseEnv\n", + "from nocturne.wrappers.sb3_wrappers import NocturneToSB3\n", + "\n", + "# import os\n", + "# os.chdir('..')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment settings\n", + "with open(f\"../configs/env_config.yaml\", \"r\") as stream:\n", + " env_config = yaml.safe_load(stream)\n", + "\n", + "# Make sure to only control a single agent at a time. This is achieved by setting max_num_vehicles = 1\n", + "env_config[\"max_num_vehicles\"] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize env and wrap it with SB3 wrapper\n", + "env = NocturneToSB3(BaseEnv(env_config))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PPO\n", + "\n", + "Now all we have to do is initialize the SB3 `PPO` class and we're ready to learn! We use Weights & Biases (`wandb`) to take care of the logging. If you prefer not to use `wandb`, set `LOGGING = False` and `verbose=1`. \n", + "\n", + "\n", + "---\n", + "\n", + "> 🔦 More info on PPO and settings can be found in the [SB3 docs](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html).\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from stable_baselines3 import PPO" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "LOGGING = True" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mdaphnecor\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.12" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/Daphne/git_repos/nocturne_lab/examples/wandb/run-20231004_215340-rmy7acy1" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run blooming-eon-12 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/daphnecor/single_agent_control_sb3_ppo" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/daphnecor/single_agent_control_sb3_ppo/runs/rmy7acy1" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if LOGGING:\n", + " wandb.login()\n", + " run = wandb.init(\n", + " project=\"single_agent_control_sb3_ppo\",\n", + " sync_tensorboard=True,\n", + " )\n", + " run_id = run.id\n", + "else:\n", + " run_id = None\n", + "\n", + "# Init PPO algorithm\n", + "model = PPO( \n", + " policy=\"MlpPolicy\", # Policy type\n", + " n_steps=4096, # Number of steps per rollout\n", + " batch_size=128, # Minibatch size\n", + " env=env, # Our wrapped environment\n", + " seed=42, # Always seed for reproducibility\n", + " verbose=0,\n", + " tensorboard_log=f\"runs/{run_id}\" if run_id is not None else None, # Sync with wandb\n", + ")\n", + "\n", + "# Learn\n", + "model.learn(total_timesteps=200_000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 🤔 How good is your policy?\n", + "\n", + "Hooray! You have just trained your first PPO agent in Nocturne! 🏁 \n", + "\n", + "Now take a look at information you've logged over training; did we learn? (if you want to compare, [this is how my run looks like](https://api.wandb.ai/links/daphnecor/iarufxw9))\n", + "\n", + "One important metric for assess the effectiveness of your policy is the average cumulative reward per episode. In our case, the **maximum** achievable return per episode is approximately between 9 and 10 (it varies per traffic scene and per agent). With the configurations above, your policy should approach this value in 150,000 steps. Here, steps (the `global_step`) represents the total number of **frames** our policy network has seen, you can think of it as the accumulated experience." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nocturne_lab", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/create_env.py b/examples/create_env.py deleted file mode 100644 index 7b9355f5..00000000 --- a/examples/create_env.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Test step and rendering functions.""" -import hydra - -from cfgs.config import set_display_window -from nocturne import Action -from nocturne.envs.wrappers import create_env - - -@hydra.main(config_path="../cfgs/", config_name="config") -def create_rl_env(cfg): - """Test step and rendering functions.""" - set_display_window() - env = create_env(cfg) - _ = env.reset() - # quick check that rendering works - _ = env.scenario.getConeImage( - env.scenario.getVehicles()[0], - # how far the agent can see - view_dist=cfg['subscriber']['view_dist'], - # the angle formed by the view cone - view_angle=cfg['subscriber']['view_angle'], - # the agent's head angle - head_angle=0.0, - # whether to draw the goal position in the image - draw_target_position=False) - for _ in range(80): - # grab the list of vehicles that actually need to - # move some distance to get to their goal - moving_vehs = env.scenario.getObjectsThatMoved() - # obs, rew, done, info - # each of these objects is a dict keyed by the vehicle ID - # info[veh_id] contains the following useful keys: - # 'collided': did the agent collide with a road object or edge - # 'veh_veh_collision': did the agent collide with a vehicle - # 'veh_edge_collision': did the agent collide with a road edge - # 'goal_achieved': did we get to our target - _, _, _, _ = env.step({ - veh.id: Action(acceleration=2.0, steering=1.0, head_angle=0.5) - for veh in moving_vehs - }) - - -if __name__ == '__main__': - create_rl_env() diff --git a/examples/imitation_learning/filters.py b/examples/imitation_learning/filters.py deleted file mode 100644 index fdab3118..00000000 --- a/examples/imitation_learning/filters.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""A streaming mean-std filter used to whiten inputs.""" -import torch -from torch import nn - - -class MeanStdFilter(nn.Module): - """adapted from https://www.johndcook.com/blog/standard_deviation/.""" - - def __init__(self, input_shape, eps=1e-05): - super().__init__() - self.input_shape = input_shape - self.eps = eps - self.track_running_states = True - self.counter = 0 - self._M = nn.Parameter(torch.zeros(input_shape), requires_grad=False) - self._S = nn.Parameter(torch.zeros(input_shape), requires_grad=False) - self._n = 0 - - def train(self, mode): - """Turn on updates to mean and standard deviation.""" - self.track_running_states = True - - def eval(self): - """Turn off updates to mean and standard deviation.""" - self.track_running_states = False - - def forward(self, x): - """Whiten and optionally update.""" - if self.track_running_states: - for i in range(x.shape[0]): - self.push(x[i]) - x = x - self.mean - x = x / (self.std + self.eps) - return x - - def push(self, x): - """Unvectorized update of the running statistics.""" - if x.shape != self._M.shape: - raise ValueError( - "Unexpected input shape {}, expected {}, value = {}".format( - x.shape, self._M.shape, x)) - n1 = self._n - self._n += 1 - if self._n == 1: - self._M[...] = x - else: - delta = x - self._M - self._M[...] += delta / self._n - self._S[...] += delta * delta * n1 / self._n - - @property - def n(self): - """Return the number of samples.""" - return self._n - - @property - def mean(self): - """Return the mean.""" - return self._M - - @property - def var(self): - """Compute the variance.""" - return self._S / (self._n - 1) if self._n > 1 else torch.square( - self._M) - - @property - def std(self): - """Compute the standard deviation.""" - return torch.sqrt(self.var) - - @property - def shape(self): - """Get the means shape.""" - return self._M.shape diff --git a/examples/imitation_learning/model.py b/examples/imitation_learning/model.py deleted file mode 100644 index d3030f17..00000000 --- a/examples/imitation_learning/model.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Model for an imitation learning agent.""" -import torch -from torch import nn -from torch.distributions.multivariate_normal import MultivariateNormal -from torch.distributions.categorical import Categorical - -from examples.imitation_learning.filters import MeanStdFilter - - -class ImitationAgent(nn.Module): - """Pytorch Module for imitation. Output is a Multivariable Gaussian.""" - - def __init__(self, cfg): - """Initialize.""" - super(ImitationAgent, self).__init__() - - self.n_states = cfg['n_inputs'] - self.hidden_layers = cfg.get('hidden_layers', [256, 256]) - - self.discrete = cfg['discrete'] - - if self.discrete: - self.actions_discretizations = cfg['actions_discretizations'] - self.actions_bounds = cfg['actions_bounds'] - self.actions_grids = [ - torch.linspace(a_min, a_max, a_count, - requires_grad=False).to(cfg['device']) - for (a_min, a_max), a_count in zip( - self.actions_bounds, self.actions_discretizations) - ] - else: - # neural network outputs between -1 and 1 (tanh filter) - # then output is sampled from a Gaussian distribution - # N(nn output * mean_scalings, std_devs) - self.mean_scalings = torch.tensor(cfg['mean_scalings']) - self.std_devs = torch.tensor(cfg['std_devs']) - self.covariance_matrix = torch.diag_embed(self.std_devs) - - self._build_model() - - def _build_model(self): - """Build agent MLP that outputs an action mean and variance from a state input.""" - if self.hidden_layers is None or len(self.hidden_layers) == 0: - self.nn = nn.Identity() - pre_head_size = self.n_states - else: - self.nn = nn.Sequential( - MeanStdFilter(self.n_states), - nn.Linear(self.n_states, self.hidden_layers[0]), - nn.Tanh(), - *[ - nn.Sequential( - nn.Linear(self.hidden_layers[i], - self.hidden_layers[i + 1]), - nn.Tanh(), - ) for i in range(len(self.hidden_layers) - 1) - ], - ) - pre_head_size = self.hidden_layers[-1] - - if self.discrete: - self.heads = nn.ModuleList([ - nn.Linear(pre_head_size, discretization) - for discretization in self.actions_discretizations - ]) - else: - self.head = nn.Sequential( - nn.Linear(pre_head_size, len(self.mean_scalings)), nn.Tanh()) - - def dist(self, state): - """Construct a distribution from tensor input.""" - x_out = self.nn(state) - if self.discrete: - return [Categorical(logits=head(x_out)) for head in self.heads] - else: - return MultivariateNormal( - self.head(x_out) * self.mean_scalings, self.covariance_matrix) - - def forward(self, state, deterministic=False, return_indexes=False): - """Generate an output from tensor input.""" - dists = self.dist(state) - if self.discrete: - actions_idx = [ - d.logits.argmax(axis=-1) if deterministic else d.sample() - for d in dists - ] - actions = [ - action_grid[action_idx] for action_grid, action_idx in zip( - self.actions_grids, actions_idx) - ] - return (actions, actions_idx) if return_indexes else actions - else: - return [dist.argmax(axis=-1) for dist in dists - ] if deterministic else [dist.sample() for dist in dists] - - def log_prob(self, state, ground_truth_action, return_indexes=False): - """Compute the log prob of the expert action for a given input tensor.""" - dist = self.dist(state) - if self.discrete: - # find indexes in actions grids whose values are the closest to the ground truth actions - actions_idx = self.action_to_grid_idx(ground_truth_action) - # sum log probs of actions indexes wrt. Categorial variables for each action dimension - log_prob = sum( - [d.log_prob(actions_idx[:, i]) for i, d in enumerate(dist)]) - return (log_prob, actions_idx) if return_indexes else log_prob - else: - return dist.log_prob(ground_truth_action) - - def action_to_grid_idx(self, action): - """Convert a batch of actions to a batch of action indexes (for discrete actions only).""" - # action is of shape (batch_size, n_actions) - # we want to transform it into an array of same shape, but with indexes instead of actions - # credits https://stackoverflow.com/a/46184652/16207351 - output = torch.zeros_like(action) - for i, action_grid in enumerate(self.actions_grids): - actions = action[:, i] - - # get indexes where actions would be inserted in action_grid to keep it sorted - idxs = torch.searchsorted(action_grid, actions) - - # if it would be inserted at the end, we're looking at the last action - idxs[idxs == len(action_grid)] -= 1 - - # find indexes where previous index is closer (simple grid has constant sampling intervals) - idxs[action_grid[idxs] - actions > torch.diff(action_grid).mean() * - 0.5] -= 1 - - # write indexes in output - output[:, i] = idxs - return output - - -if __name__ == '__main__': - model_cfg = { - 'n_inputs': 100, - 'hidden_layers': [256, 256], - 'discrete': False, - 'mean_scalings': [1, 10, 10000], - 'std_devs': [1.0, 1.0, 1.0], - } - if True: - model_cfg.update({ - 'discrete': True, - 'actions_discretizations': [5, 10], - 'actions_bounds': [[-3, 3], [0, 10]], - }) - - model = ImitationAgent(model_cfg) - - sample_states = torch.rand(3, model_cfg['n_inputs']) - actions = model(sample_states) - print(actions) - print(model.log_prob(sample_states, actions)) diff --git a/examples/imitation_learning/replay_video.py b/examples/imitation_learning/replay_video.py deleted file mode 100644 index 9221e431..00000000 --- a/examples/imitation_learning/replay_video.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Replay a video of a trained controller.""" -from collections import defaultdict -import json -from pathlib import Path -import sys - -import imageio -import numpy as np -import subprocess -import torch - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, set_display_window -from nocturne import Simulation, Vector2D - -OUTPUT_PATH = str(PROJECT_PATH / 'vids') - -MODEL_PATH = Path( - '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\ - ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_220.pth' -) -CONFIG_PATH = MODEL_PATH.parent / 'configs.json' -GOAL_TOLERANCE = 1.0 - -if __name__ == '__main__': - set_display_window() - output_dir = Path(OUTPUT_PATH) - output_dir.mkdir(exist_ok=True) - - with open(CONFIG_PATH, 'r') as f: - configs = json.load(f) - - data_path = PROCESSED_TRAIN_NO_TL - files = [ - file for file in Path(data_path).iterdir() if 'tfrecord' in file.stem - ] - scenario_config = configs['scenario_cfg'] - dataloader_config = configs['dataloader_cfg'] - files = files[:600] - np.random.shuffle(files) - model = torch.load(MODEL_PATH).to('cpu') - model.eval() - for traj_path in files: - sim = Simulation(str(traj_path), scenario_config) - output_str = traj_path.stem.split('.')[0].split('/')[-1] - - def policy(state): - """Get model output.""" - state = torch.as_tensor(np.array([state]), dtype=torch.float32) - return model.forward(state, - deterministic=True, - return_indexes=False) - - with torch.no_grad(): - for expert_control_vehicles, mp4_name in [ - (False, f'{output_str}_policy_rollout.mp4'), - (True, f'{output_str}_true_rollout.mp4') - ]: - frames = [] - sim.reset() - scenario = sim.getScenario() - - objects_of_interest = [ - obj for obj in scenario.getVehicles() - if obj in scenario.getObjectsThatMoved() - ] - - for obj in objects_of_interest: - obj.expert_control = True - - relevant_obj_ids = [ - obj.getID() for obj in objects_of_interest[0:2] - ] - - view_dist = configs['dataloader_cfg']['view_dist'] - view_angle = configs['dataloader_cfg']['view_angle'] - state_normalization = configs['dataloader_cfg'][ - 'state_normalization'] - dt = configs['dataloader_cfg']['dt'] - - n_stacked_states = configs['dataloader_cfg'][ - 'n_stacked_states'] - state_size = configs['model_cfg'][ - 'n_inputs'] // n_stacked_states - state_dict = defaultdict( - lambda: np.zeros(state_size * n_stacked_states)) - for i in range(n_stacked_states): - for veh in objects_of_interest: - ego_state = scenario.ego_state(veh) - visible_state = scenario.flattened_visible_state( - veh, view_dist=view_dist, view_angle=view_angle) - state = np.concatenate( - (ego_state, visible_state)) / state_normalization - state_dict[veh.getID()] = np.roll( - state_dict[veh.getID()], len(state)) - state_dict[veh.getID()][:len(state)] = state - - sim.step(dt) - - for obj in scenario.getObjectsThatMoved(): - obj.expert_control = True - # we only actually want to take control once the vehicle - # has been placed into the network - for veh in objects_of_interest: - if np.isclose(veh.position.x, -10000.0): - veh.expert_control = True - else: - if veh.getID() in relevant_obj_ids: - veh.expert_control = expert_control_vehicles - veh.highlight = True - - for i in range(90 - n_stacked_states): - # we only actually want to take control once the vehicle - # has been placed into the network - # so vehicles that should be controlled by our agent - # are overriden to be expert controlled - # until they are actually spawned in the scene - for veh in objects_of_interest: - if np.isclose(veh.position.x, -10000.0): - veh.expert_control = True - else: - if veh.getID() in relevant_obj_ids: - veh.expert_control = expert_control_vehicles - veh.highlight = True - print( - f'...{i+1}/{90 - n_stacked_states} ({traj_path} ; {mp4_name})' - ) - img = scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - ) - frames.append(img) - for veh in objects_of_interest: - veh_state = np.concatenate( - (np.array(scenario.ego_state(veh), copy=False), - np.array(scenario.flattened_visible_state( - veh, - view_dist=view_dist, - view_angle=view_angle), - copy=False))) - ego_state = scenario.ego_state(veh) - visible_state = scenario.flattened_visible_state( - veh, view_dist=view_dist, view_angle=view_angle) - state = np.concatenate( - (ego_state, visible_state)) / state_normalization - state_dict[veh.getID()] = np.roll( - state_dict[veh.getID()], len(state)) - state_dict[veh.getID()][:len(state)] = state - action = policy(state_dict[veh.getID()]) - if dataloader_config['expert_position']: - if configs['model_cfg']['discrete']: - pos_diff = np.array([ - pos.cpu().numpy()[0] for pos in action[0:2] - ]) - heading = action[2:3][0].cpu().numpy()[0] - else: - pos_diff = action[0:2] - heading = action[2:3] - veh.position = Vector2D.from_numpy( - pos_diff + veh.position.numpy()) - veh.heading += heading - else: - veh.acceleration = action[0].cpu().numpy() - veh.steering = action[1].cpu().numpy() - sim.step(dt) - for veh in scenario.getObjectsThatMoved(): - if (veh.position - - veh.target_position).norm() < GOAL_TOLERANCE: - scenario.removeVehicle(veh) - imageio.mimsave(mp4_name, np.stack(frames, axis=0), fps=30) - print(f'> {mp4_name}') - - # stack the movies side by side - output_name = traj_path.stem.split('.')[0].split('/')[-1] - output_path = f'{output_name}_output.mp4' - ffmpeg_command = f'ffmpeg -y -i {output_str}_true_rollout.mp4 ' \ - f'-i {output_str}_policy_rollout.mp4 -filter_complex hstack {output_path}' - print(ffmpeg_command) - subprocess.call(ffmpeg_command.split(' ')) - print(f'> {output_path}') - sys.exit() diff --git a/examples/imitation_learning/train.py b/examples/imitation_learning/train.py deleted file mode 100644 index 7f072162..00000000 --- a/examples/imitation_learning/train.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Imitation learning training script (behavioral cloning).""" -from datetime import datetime -from pathlib import Path -import pickle -import random -import json - -import hydra -import numpy as np -import torch -from torch.utils.tensorboard import SummaryWriter -from torch.optim import Adam -from torch.utils.data import DataLoader -from tqdm import tqdm -import wandb - -from examples.imitation_learning.model import ImitationAgent -from examples.imitation_learning.waymo_data_loader import WaymoDataset - - -def set_seed_everywhere(seed): - """Ensure determinism.""" - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) - np.random.seed(seed) - random.seed(seed) - - -@hydra.main(config_path="../../cfgs/imitation", config_name="config") -def main(args): - """Train an IL model.""" - set_seed_everywhere(args.seed) - # create dataset and dataloader - if args.actions_are_positions: - expert_bounds = [[-0.5, 3], [-3, 3], [-0.07, 0.07]] - actions_discretizations = [21, 21, 21] - actions_bounds = [[-0.5, 3], [-3, 3], [-0.07, 0.07]] - mean_scalings = [3, 3, 0.07] - std_devs = [0.1, 0.1, 0.02] - else: - expert_bounds = [[-6, 6], [-0.7, 0.7]] - actions_bounds = expert_bounds - actions_discretizations = [15, 43] - mean_scalings = [3, 0.7] - std_devs = [0.1, 0.02] - - dataloader_cfg = { - 'tmin': 0, - 'tmax': 90, - 'view_dist': args.view_dist, - 'view_angle': args.view_angle, - 'dt': 0.1, - 'expert_action_bounds': expert_bounds, - 'expert_position': args.actions_are_positions, - 'state_normalization': 100, - 'n_stacked_states': args.n_stacked_states, - } - scenario_cfg = { - 'start_time': 0, - 'allow_non_vehicles': True, - 'spawn_invalid_objects': True, - 'max_visible_road_points': args.max_visible_road_points, - 'sample_every_n': 1, - 'road_edge_first': False, - } - dataset = WaymoDataset( - data_path=args.path, - file_limit=args.num_files, - dataloader_config=dataloader_cfg, - scenario_config=scenario_cfg, - ) - data_loader = iter( - DataLoader( - dataset, - batch_size=args.batch_size, - num_workers=args.n_cpus, - pin_memory=True, - )) - - # create model - sample_state, _ = next(data_loader) - n_states = sample_state.shape[-1] - - model_cfg = { - 'n_inputs': n_states, - 'hidden_layers': [1024, 256, 128], - 'discrete': args.discrete, - 'mean_scalings': mean_scalings, - 'std_devs': std_devs, - 'actions_discretizations': actions_discretizations, - 'actions_bounds': actions_bounds, - 'device': args.device - } - - model = ImitationAgent(model_cfg).to(args.device) - model.train() - print(model) - - # create optimizer - optimizer = Adam(model.parameters(), lr=args.lr) - - # create exp dir - time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S') - exp_dir = Path.cwd() / Path('train_logs') / time_str - exp_dir.mkdir(parents=True, exist_ok=True) - - # save configs - configs_path = exp_dir / 'configs.json' - configs = { - 'scenario_cfg': scenario_cfg, - 'dataloader_cfg': dataloader_cfg, - 'model_cfg': model_cfg, - } - with open(configs_path, 'w') as fp: - json.dump(configs, fp, sort_keys=True, indent=4) - print('Wrote configs at', configs_path) - - # tensorboard writer - if args.write_to_tensorboard: - writer = SummaryWriter(log_dir=str(exp_dir)) - # wandb logging - if args.wandb: - wandb_mode = "online" - wandb.init(config=args, - project=args.wandb_project, - name=args.experiment, - group=args.experiment, - resume="allow", - settings=wandb.Settings(start_method="fork"), - mode=wandb_mode) - - # train loop - print('Exp dir created at', exp_dir) - print(f'`tensorboard --logdir={exp_dir}`\n') - for epoch in range(args.epochs): - print(f'\nepoch {epoch+1}/{args.epochs}') - n_samples = epoch * args.batch_size * (args.samples_per_epoch // - args.batch_size) - - for i in tqdm(range(args.samples_per_epoch // args.batch_size), - unit='batch'): - # get states and expert actions - states, expert_actions = next(data_loader) - states = states.to(args.device) - expert_actions = expert_actions.to(args.device) - - # compute loss - if args.discrete: - log_prob, expert_idxs = model.log_prob(states, - expert_actions, - return_indexes=True) - else: - dist = model.dist(states) - log_prob = dist.log_prob(expert_actions.float()) - loss = -log_prob.mean() - - metrics_dict = {} - - # optim step - optimizer.zero_grad() - loss.backward() - - # grad clipping - total_norm = 0 - for p in model.parameters(): - if p.grad is not None: - param_norm = p.grad.detach().data.norm(2) - total_norm += param_norm.item()**2 - total_norm = total_norm**0.5 - metrics_dict['train/grad_norm'] = total_norm - torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) - total_norm = 0 - for p in model.parameters(): - if p.grad is not None: - param_norm = p.grad.detach().data.norm(2) - total_norm += param_norm.item()**2 - total_norm = total_norm**0.5 - metrics_dict['train/post_clip_grad_norm'] = total_norm - optimizer.step() - - # tensorboard logging - metrics_dict['train/loss'] = loss.item() - - if args.actions_are_positions: - metrics_dict['train/x_logprob'] = log_prob[0] - metrics_dict['train/y_logprob'] = log_prob[1] - metrics_dict['train/steer_logprob'] = log_prob[2] - else: - metrics_dict['train/accel_logprob'] = log_prob[0] - metrics_dict['train/steer_logprob'] = log_prob[1] - - if not model_cfg['discrete']: - diff_actions = torch.mean(torch.abs(dist.mean - - expert_actions), - axis=0) - metrics_dict['train/accel_diff'] = diff_actions[0] - metrics_dict['train/steer_diff'] = diff_actions[1] - metrics_dict['train/l2_dist'] = torch.norm( - dist.mean - expert_actions.float()) - - if model_cfg['discrete']: - with torch.no_grad(): - model_actions, model_idxs = model(states, - deterministic=True, - return_indexes=True) - accuracy = [ - (model_idx == expert_idx).float().mean(axis=0) - for model_idx, expert_idx in zip(model_idxs, expert_idxs.T) - ] - if args.actions_are_positions: - metrics_dict['train/x_pos_acc'] = accuracy[0] - metrics_dict['train/y_pos_acc'] = accuracy[1] - metrics_dict['train/heading_acc'] = accuracy[2] - else: - metrics_dict['train/accel_acc'] = accuracy[0] - metrics_dict['train/steer_acc'] = accuracy[1] - - for key, val in metrics_dict.items(): - if args.write_to_tensorboard: - writer.add_scalar(key, val, n_samples) - if args.wandb: - wandb.log(metrics_dict, step=n_samples) - # save model checkpoint - if (epoch + 1) % 10 == 0 or epoch == args.epochs - 1: - model_path = exp_dir / f'model_{epoch+1}.pth' - torch.save(model, str(model_path)) - pickle.dump(filter, open(exp_dir / f"filter_{epoch+1}.pth", "wb")) - print(f'\nSaved model at {model_path}') - if args.discrete: - if args.actions_are_positions: - print('xpos') - print('model: ', model_idxs[0][0:10]) - print('expert: ', expert_idxs[0:10, 0]) - print('ypos') - print('model: ', model_idxs[1][0:10]) - print('expert: ', expert_idxs[0:10, 1]) - print('steer') - print('model: ', model_idxs[2][0:10]) - print('expert: ', expert_idxs[0:10, 2]) - else: - print('accel') - print('model: ', model_idxs[0][0:10]) - print('expert: ', expert_idxs[0:10, 0]) - print('steer') - print('model: ', model_idxs[1][0:10]) - print('expert: ', expert_idxs[0:10, 1]) - - print('Done, exp dir is', exp_dir) - - writer.flush() - writer.close() - - -if __name__ == '__main__': - main() diff --git a/examples/imitation_learning/waymo_data_loader.py b/examples/imitation_learning/waymo_data_loader.py deleted file mode 100644 index 8f1fc606..00000000 --- a/examples/imitation_learning/waymo_data_loader.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Dataloader for imitation learning in Nocturne.""" -from collections import defaultdict -import random - -import torch -from pathlib import Path -import numpy as np - -from cfgs.config import ERR_VAL -from nocturne import Simulation - - -def _get_waymo_iterator(paths, dataloader_config, scenario_config): - # if worker has no paths, return an empty iterator - if len(paths) == 0: - return - - # load dataloader config - tmin = dataloader_config.get('tmin', 0) - tmax = dataloader_config.get('tmax', 90) - view_dist = dataloader_config.get('view_dist', 80) - view_angle = dataloader_config.get('view_angle', np.radians(120)) - dt = dataloader_config.get('dt', 0.1) - expert_action_bounds = dataloader_config.get('expert_action_bounds', - [[-3, 3], [-0.7, 0.7]]) - expert_position = dataloader_config.get('expert_position', True) - state_normalization = dataloader_config.get('state_normalization', 100) - n_stacked_states = dataloader_config.get('n_stacked_states', 5) - - while True: - # select a random scenario path - scenario_path = np.random.choice(paths) - - # create simulation - sim = Simulation(str(scenario_path), scenario_config) - scenario = sim.getScenario() - - # set objects to be expert-controlled - for obj in scenario.getObjects(): - obj.expert_control = True - - # we are interested in imitating vehicles that moved - objects_that_moved = scenario.getObjectsThatMoved() - objects_of_interest = [ - obj for obj in scenario.getVehicles() if obj in objects_that_moved - ] - - # initialize values if stacking states - stacked_state = defaultdict(lambda: None) - initial_warmup = n_stacked_states - 1 - - state_list = [] - action_list = [] - - # iterate over timesteps and objects of interest - for time in range(tmin, tmax): - for obj in objects_of_interest: - # get state - ego_state = scenario.ego_state(obj) - visible_state = scenario.flattened_visible_state( - obj, view_dist=view_dist, view_angle=view_angle) - state = np.concatenate((ego_state, visible_state)) - - # normalize state - state /= state_normalization - - # stack state - if n_stacked_states > 1: - if stacked_state[obj.getID()] is None: - stacked_state[obj.getID()] = np.zeros( - len(state) * n_stacked_states, dtype=state.dtype) - stacked_state[obj.getID()] = np.roll( - stacked_state[obj.getID()], len(state)) - stacked_state[obj.getID()][:len(state)] = state - - if np.isclose(obj.position.x, ERR_VAL): - continue - - if not expert_position: - # get expert action - expert_action = scenario.expert_action(obj, time) - # check for invalid action (because no value available for taking derivative) - # or because the vehicle is at an invalid state - if expert_action is None: - continue - expert_action = expert_action.numpy() - # now find the corresponding expert actions in the grids - - # throw out actions containing NaN or out-of-bound values - if np.isnan(expert_action).any() \ - or expert_action[0] < expert_action_bounds[0][0] \ - or expert_action[0] > expert_action_bounds[0][1] \ - or expert_action[1] < expert_action_bounds[1][0] \ - or expert_action[1] > expert_action_bounds[1][1]: - continue - else: - expert_pos_shift = scenario.expert_pos_shift(obj, time) - if expert_pos_shift is None: - continue - expert_pos_shift = expert_pos_shift.numpy() - expert_heading_shift = scenario.expert_heading_shift( - obj, time) - if expert_heading_shift is None \ - or expert_pos_shift[0] < expert_action_bounds[0][0] \ - or expert_pos_shift[0] > expert_action_bounds[0][1] \ - or expert_pos_shift[1] < expert_action_bounds[1][0] \ - or expert_pos_shift[1] > expert_action_bounds[1][1] \ - or expert_heading_shift < expert_action_bounds[2][0] \ - or expert_heading_shift > expert_action_bounds[2][1]: - continue - expert_action = np.concatenate( - (expert_pos_shift, [expert_heading_shift])) - - # yield state and expert action - if stacked_state[obj.getID()] is not None: - if initial_warmup <= 0: # warmup to wait for stacked state to be filled up - state_list.append(stacked_state[obj.getID()]) - action_list.append(expert_action) - else: - state_list.append(state) - action_list.append(expert_action) - - # step the simulation - sim.step(dt) - if initial_warmup > 0: - initial_warmup -= 1 - - if len(state_list) > 0: - temp = list(zip(state_list, action_list)) - random.shuffle(temp) - state_list, action_list = zip(*temp) - for state_return, action_return in zip(state_list, action_list): - yield (state_return, action_return) - - -class WaymoDataset(torch.utils.data.IterableDataset): - """Waymo dataset loader.""" - - def __init__(self, - data_path, - dataloader_config={}, - scenario_config={}, - file_limit=None): - super(WaymoDataset).__init__() - - # save configs - self.dataloader_config = dataloader_config - self.scenario_config = scenario_config - - # get paths of dataset files (up to file_limit paths) - self.file_paths = list( - Path(data_path).glob('tfrecord*.json'))[:file_limit] - print(f'WaymoDataset: loading {len(self.file_paths)} files.') - - # sort the paths for reproducibility if testing on a small set of files - self.file_paths.sort() - - def __iter__(self): - """Partition files for each worker and return an (state, expert_action) iterable.""" - # get info on current worker process - worker_info = torch.utils.data.get_worker_info() - - if worker_info is None: - # single-process data loading, return the whole set of files - return _get_waymo_iterator(self.file_paths, self.dataloader_config, - self.scenario_config) - - # distribute a unique set of file paths to each worker process - worker_file_paths = np.array_split( - self.file_paths, worker_info.num_workers)[worker_info.id] - return _get_waymo_iterator(list(worker_file_paths), - self.dataloader_config, - self.scenario_config) - - -if __name__ == '__main__': - dataset = WaymoDataset(data_path='dataset/tf_records', - file_limit=20, - dataloader_config={ - 'view_dist': 80, - 'n_stacked_states': 3, - }, - scenario_config={ - 'start_time': 0, - 'allow_non_vehicles': True, - 'spawn_invalid_objects': True, - }) - - data_loader = torch.utils.data.DataLoader( - dataset, - batch_size=32, - num_workers=4, - pin_memory=True, - ) - - for i, x in zip(range(100), data_loader): - print(i, x[0].shape, x[1].shape) diff --git a/examples/nocturne_functions.py b/examples/nocturne_functions.py deleted file mode 100644 index 93f37c52..00000000 --- a/examples/nocturne_functions.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Example of how to make movies of Nocturne scenarios.""" -import os - -import hydra -import matplotlib.pyplot as plt -import numpy as np - -from cfgs.config import PROJECT_PATH, get_scenario_dict, set_display_window -from nocturne import Simulation, Action - - -def save_image(img, output_path='./img.png'): - """Make a single image from the scenario.""" - dpi = 100 - height, width, depth = img.shape - figsize = width / float(dpi), height / float(dpi) - plt.figure(figsize=figsize, dpi=dpi) - plt.axis('off') - plt.imshow(img) - plt.savefig(output_path) - print('>', output_path) - - -@hydra.main(config_path="../cfgs/", config_name="config") -def main(cfg): - """Initialize the scenario.""" - set_display_window() - if not os.path.exists(PROJECT_PATH / 'examples/rendering'): - os.makedirs(PROJECT_PATH / 'examples/rendering') - # load scenario. by default this won't have pedestrians or cyclists - sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' / - 'example_scenario.json'), - config=get_scenario_dict(cfg)) - scenario = sim.getScenario() - img = scenario.getImage( - img_width=2000, - img_height=2000, - padding=50.0, - draw_target_positions=True, - ) - save_image(img, - PROJECT_PATH / 'examples/rendering' / 'scene_with_no_peds.png') - # grab all the vehicles - vehs = scenario.getVehicles() - # grab all the vehicles that moved and show some things - # we can do with them - vehs = scenario.getObjectsThatMoved() - vehs[0].highlight = True # draw a circle around it on the rendered image - # setting a vehicle to expert_control will cause - # this agent will replay expert data starting frmo - # the current time in the simulation - vehs[0].expert_control = True - print(f'width is {vehs[0].width}, length is {vehs[0].length}') - print(f'speed is {vehs[0].speed}, heading is {vehs[0].heading}') - print(f'position is {vehs[0].width}, length is {vehs[0].length}') - # for efficiency, we return position as a custom Vector2D object - # this object can be converted to and from numpy and comes with - # support for a variety of algebraic operations - print(f'position is {vehs[0].position}') - print(f'position as numpy array is {vehs[0].position.numpy()}') - print(f'norm of position is {vehs[0].position.norm()}') - print(f'angle in a world-centered frame {vehs[0].position.angle()}') - print(f'rotated position is {vehs[0].position.rotate(np.pi).numpy()}') - # we can set vehicle accel, steering, head angle directly - vehs[0].acceleration = -1 - vehs[0].steering = 1 - vehs[0].head_angle = np.pi - # we can also set them all directly using an action object - vehs[0].apply_action(Action(acceleration=-1, steering=1, head_angle=np.pi)) - # we can grab the state for this vehicle in two way: - # 1) a flattened vector corresponding to the set of visible objects - # concatenated according to [visible objects, visible road points, - # visible stop signs, visible traffic lights] - # note that since we want to make a fixed length vector, for each of these - # types the config, under the scenario key has the following items - # max_visible_objects: 16 - # max_visible_road_points: 1000 - # max_visible_traffic_lights: 20 - # max_visible_stop_signs: 4 - # we grab all the visible items for each type, sort them by distance from - # the vehicle and return the closest. If we have fewer than the maximum - # we pad with 0s. - flattened_vector = scenario.flattened_visible_state(object=vehs[0], - view_dist=80, - view_angle=120 * - (np.pi / 180), - head_angle=0.0) - # we can also grab a dict of all of the objects - # if padding is true we will add extra objects to the dict - # to ensure we hit the maximum number of objects for each type - visible_dict = scenario.visible_state(object=vehs[0], - view_dist=80, - view_angle=120 * (np.pi / 180), - padding=False) - # step the scenario. By default we step at 0.1s. - # you can use any step you want, but, if you do so make sure - # not to make any vehicle an expert as the expert positions / speeds / headings - # are only available in increments of 0.1 seconds - sim.step(cfg['dt']) - - # load scenario, this time with pedestrians and cyclists - cfg['scenario']['allow_non_vehicles'] = True - sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' / - 'example_scenario.json'), - config=get_scenario_dict(cfg)) - scenario = sim.getScenario() - img = scenario.getImage( - img_width=2000, - img_height=2000, - padding=50.0, - draw_target_positions=True, - ) - save_image(img, - PROJECT_PATH / 'examples/rendering' / 'scene_with_peds.png') - # now we need to be slightly more careful about how we select objects - # since getMovingObjects will return pedestrians and cyclists - # and getVehicles will return vehicles that don't necessarily need to move - objects_that_moved = scenario.getObjectsThatMoved() - objects_of_interest = [ - obj for obj in scenario.getVehicles() if obj in objects_that_moved - ] # noqa: 841 - vehicles = scenario.getVehicles() - cyclists = scenario.getCyclists() - pedestrians = scenario.getPedestrians() - all_objects = scenario.getObjects() - - -if __name__ == '__main__': - main() diff --git a/examples/on_policy_files/nocturne_runner.py b/examples/on_policy_files/nocturne_runner.py deleted file mode 100644 index 8f988ad5..00000000 --- a/examples/on_policy_files/nocturne_runner.py +++ /dev/null @@ -1,562 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# Code modified from https://github.com/marlbenchmark/on-policy -"""Runner for PPO from https://github.com/marlbenchmark/on-policy.""" -from pathlib import Path -import os -import time - -import hydra -from cfgs.config import set_display_window -import imageio -import numpy as np -import setproctitle -import torch -import wandb - -from algos.ppo.base_runner import Runner -from algos.ppo.env_wrappers import SubprocVecEnv, DummyVecEnv - -from nocturne.envs.wrappers import create_ppo_env - - -def _t2n(x): - """Convert torch tensor to a numpy array.""" - return x.detach().cpu().numpy() - - -def make_train_env(cfg): - """Construct a training environment.""" - - def get_env_fn(rank): - - def init_env(): - env = create_ppo_env(cfg, rank) - # TODO(eugenevinitsky) implement this - env.seed(cfg.seed + rank * 1000) - return env - - return init_env - - if cfg.algorithm.n_rollout_threads == 1: - return DummyVecEnv([get_env_fn(0)]) - else: - return SubprocVecEnv( - [get_env_fn(i) for i in range(cfg.algorithm.n_rollout_threads)]) - - -def make_eval_env(cfg): - """Construct an eval environment.""" - - def get_env_fn(rank): - - def init_env(): - env = create_ppo_env(cfg) - # TODO(eugenevinitsky) implement this - env.seed(cfg.seed + rank * 1000) - return env - - return init_env - - if cfg.algorithm.n_eval_rollout_threads == 1: - return DummyVecEnv([get_env_fn(0)]) - else: - return SubprocVecEnv( - [get_env_fn(i) for i in range(cfg.algorithm.n_eval_rollout_threads)]) - - -def make_render_env(cfg): - """Construct a rendering environment.""" - - def get_env_fn(rank): - - def init_env(): - env = create_ppo_env(cfg) - # TODO(eugenevinitsky) implement this - env.seed(cfg.seed + rank * 1000) - return env - - return init_env - - return DummyVecEnv([get_env_fn(0)]) - - -class NocturneSharedRunner(Runner): - """ - Runner class to perform training, evaluation and data collection for the Nocturne envs. - - WARNING: Assumes a shared policy. - """ - - def __init__(self, config): - """Initialize.""" - super(NocturneSharedRunner, self).__init__(config) - self.cfg = config['cfg.algo'] - self.render_envs = config['render_envs'] - - def run(self): - """Run the training code.""" - self.warmup() - - start = time.time() - episodes = int(self.num_env_steps - ) // self.episode_length // self.n_rollout_threads - - for episode in range(episodes): - if self.use_linear_lr_decay: - self.trainer.policy.lr_decay(episode, episodes) - - for step in range(self.episode_length): - # Sample actions - values, actions, action_log_probs, rnn_states, rnn_states_critic, actions_env = self.collect( - step) - - # Obser reward and next obs - obs, rewards, dones, infos = self.envs.step(actions_env) - - data = obs, rewards, dones, infos, values, actions, action_log_probs, rnn_states, rnn_states_critic - - # insert data into buffer - self.insert(data) - - # compute return and update network - self.compute() - train_infos = self.train() - - # post process - total_num_steps = ( - episode + 1) * self.episode_length * self.n_rollout_threads - - # save model - if (episode % self.save_interval == 0 or episode == episodes - 1): - self.save() - - # log information - if episode % self.log_interval == 0: - end = time.time() - print( - "\n Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n" - .format(self.algorithm_name, self.experiment_name, - episode * self.n_rollout_threads, - episodes * self.n_rollout_threads, total_num_steps, - self.num_env_steps, - int(total_num_steps / (end - start)))) - - if self.use_wandb: - wandb.log({'fps': int(total_num_steps / (end - start))}, - step=total_num_steps) - env_infos = {} - for agent_id in range(self.num_agents): - idv_rews = [] - for info in infos: - if 'individual_reward' in info[agent_id].keys(): - idv_rews.append( - info[agent_id]['individual_reward']) - agent_k = 'agent%i/individual_rewards' % agent_id - env_infos[agent_k] = idv_rews - - # TODO(eugenevinitsky) this does not correctly account for the fact that there could be - # two episodes in the buffer - train_infos["average_episode_rewards"] = np.mean( - self.buffer.rewards) * self.episode_length - print("average episode rewards is {}".format( - train_infos["average_episode_rewards"])) - print( - f"maximum per step reward is {np.max(self.buffer.rewards)}" - ) - self.log_train(train_infos, total_num_steps) - self.log_env(env_infos, total_num_steps) - - # eval - if episode % self.eval_interval == 0 and self.use_eval: - self.eval(total_num_steps) - - # save videos - if episode % self.cfg.render_interval == 0: - self.render(total_num_steps) - - def warmup(self): - """Initialize the buffers.""" - # reset env - obs = self.envs.reset() - - # replay buffer - if self.use_centralized_V: - share_obs = obs.reshape(self.n_rollout_threads, -1) - share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents, - axis=1) - else: - share_obs = obs - - self.buffer.share_obs[0] = share_obs.copy() - self.buffer.obs[0] = obs.copy() - - @torch.no_grad() - def collect(self, step): - """Collect rollout data.""" - self.trainer.prep_rollout() - value, action, action_log_prob, rnn_states, rnn_states_critic \ - = self.trainer.policy.get_actions(np.concatenate(self.buffer.share_obs[step]), - np.concatenate(self.buffer.obs[step]), - np.concatenate(self.buffer.rnn_states[step]), - np.concatenate(self.buffer.rnn_states_critic[step]), - np.concatenate(self.buffer.masks[step])) - # [self.envs, agents, dim] - values = np.array(np.split(_t2n(value), self.n_rollout_threads)) - actions = np.array(np.split(_t2n(action), self.n_rollout_threads)) - action_log_probs = np.array( - np.split(_t2n(action_log_prob), self.n_rollout_threads)) - rnn_states = np.array( - np.split(_t2n(rnn_states), self.n_rollout_threads)) - rnn_states_critic = np.array( - np.split(_t2n(rnn_states_critic), self.n_rollout_threads)) - # rearrange action - if self.envs.action_space[0].__class__.__name__ == 'MultiDiscrete': - for i in range(self.envs.action_space[0].shape): - uc_actions_env = np.eye(self.envs.action_space[0].high[i] + - 1)[actions[:, :, i]] - if i == 0: - actions_env = uc_actions_env - else: - actions_env = np.concatenate((actions_env, uc_actions_env), - axis=2) - elif self.envs.action_space[0].__class__.__name__ == 'Discrete': - actions_env = np.squeeze( - np.eye(self.envs.action_space[0].n)[actions], 2) - else: - raise NotImplementedError - - return values, actions, action_log_probs, rnn_states, rnn_states_critic, actions_env - - def insert(self, data): - """Store the data in the buffers.""" - obs, rewards, dones, _, values, actions, action_log_probs, rnn_states, rnn_states_critic = data - - dones_env = np.all(dones, axis=1) - - rnn_states[dones_env] = np.zeros(((dones_env).sum(), self.num_agents, - self.recurrent_N, self.hidden_size), - dtype=np.float32) - rnn_states_critic[dones_env] = np.zeros( - ((dones_env).sum(), self.num_agents, - *self.buffer.rnn_states_critic.shape[3:]), - dtype=np.float32) - - masks = np.ones((self.n_rollout_threads, self.num_agents, 1), - dtype=np.float32) - masks[dones_env] = np.zeros(((dones_env).sum(), self.num_agents, 1), - dtype=np.float32) - - active_masks = np.ones((self.n_rollout_threads, self.num_agents, 1), - dtype=np.float32) - active_masks[dones] = np.zeros(((dones).sum(), 1), dtype=np.float32) - active_masks[dones_env] = np.ones( - ((dones_env).sum(), self.num_agents, 1), dtype=np.float32) - - if self.use_centralized_V: - share_obs = obs.reshape(self.n_rollout_threads, -1) - share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents, - axis=1) - else: - share_obs = obs - - self.buffer.insert(share_obs, - obs, - rnn_states, - rnn_states_critic, - actions, - action_log_probs, - values, - rewards, - masks, - active_masks=active_masks) - - @torch.no_grad() - def eval(self, total_num_steps): - """Get the policy returns in deterministic mode.""" - eval_episode = 0 - - eval_episode_rewards = [] - one_episode_rewards = [[] for _ in range(self.n_eval_rollout_threads)] - num_achieved_goals = 0 - num_collisions = 0 - - i = 0 - eval_obs = self.eval_envs.reset() - - eval_rnn_states = np.zeros( - (self.n_eval_rollout_threads, self.num_agents, self.recurrent_N, - self.hidden_size), - dtype=np.float32) - eval_masks = np.ones((self.n_eval_rollout_threads, self.num_agents, 1), - dtype=np.float32) - - while eval_episode < self.cfg.eval_episodes: - i += 1 - self.trainer.prep_rollout() - eval_actions, eval_rnn_states = \ - self.trainer.policy.act(np.concatenate(eval_obs), - np.concatenate(eval_rnn_states), - np.concatenate(eval_masks), - deterministic=True) - eval_actions = np.array( - np.split(_t2n(eval_actions), self.n_eval_rollout_threads)) - eval_rnn_states = np.array( - np.split(_t2n(eval_rnn_states), self.n_eval_rollout_threads)) - - # Observed reward and next obs - eval_obs, eval_rewards, eval_dones, eval_infos = self.eval_envs.step( - eval_actions) - for info_arr in eval_infos: - for agent_info_arr in info_arr: - if 'goal_achieved' in agent_info_arr and agent_info_arr[ - 'goal_achieved']: - num_achieved_goals += 1 - if 'collided' in agent_info_arr and agent_info_arr[ - 'collided']: - num_collisions += 1 - - for i in range(self.n_eval_rollout_threads): - one_episode_rewards[i].append(eval_rewards[i]) - - eval_dones_env = np.all(eval_dones, axis=1) - - eval_rnn_states[eval_dones_env] = np.zeros( - ((eval_dones_env).sum(), self.num_agents, self.recurrent_N, - self.hidden_size), - dtype=np.float32) - - eval_masks = np.ones( - (self.n_eval_rollout_threads, self.num_agents, 1), - dtype=np.float32) - eval_masks[eval_dones_env] = np.zeros( - ((eval_dones_env).sum(), self.num_agents, 1), dtype=np.float32) - - for eval_i in range(self.n_eval_rollout_threads): - if eval_dones_env[eval_i]: - eval_episode += 1 - eval_episode_rewards.append( - np.sum(one_episode_rewards[eval_i], axis=0).mean()) - one_episode_rewards[eval_i] = [] - - eval_episode_rewards = np.array(eval_episode_rewards) - eval_episode_rewards = np.mean(eval_episode_rewards) - if self.use_wandb: - wandb.log({'eval_episode_rewards': eval_episode_rewards}, - step=total_num_steps) - wandb.log( - { - 'avg_eval_goals_achieved': - num_achieved_goals / self.num_agents / - self.cfg.eval_episodes - }, - step=total_num_steps) - wandb.log( - { - 'avg_eval_num_collisions': - num_collisions / self.num_agents / self.cfg.eval_episodes - }, - step=total_num_steps) - - @torch.no_grad() - def render(self, total_num_steps): - """Visualize the env.""" - envs = self.render_envs - - all_frames = [] - for episode in range(self.cfg.render_episodes): - obs = envs.reset() - if self.cfg.save_gifs: - image = envs.envs[0].render('rgb_array') - all_frames.append(image) - else: - envs.render('human') - - rnn_states = np.zeros( - (1, self.num_agents, self.recurrent_N, self.hidden_size), - dtype=np.float32) - masks = np.ones((1, self.num_agents, 1), dtype=np.float32) - - episode_rewards = [] - - self.trainer.prep_rollout() - for step in range(self.episode_length): - calc_start = time.time() - - action, rnn_states = self.trainer.policy.act( - np.concatenate(obs), - np.concatenate(rnn_states), - np.concatenate(masks), - deterministic=True) - actions = np.array(np.split(_t2n(action), 1)) - rnn_states = np.array(np.split(_t2n(rnn_states), 1)) - - if envs.action_space[0].__class__.__name__ == 'MultiDiscrete': - for i in range(envs.action_space[0].shape): - uc_actions_env = np.eye(envs.action_space[0].high[i] + - 1)[actions[:, :, i]] - if i == 0: - actions_env = uc_actions_env - else: - actions_env = np.concatenate( - (actions_env, uc_actions_env), axis=2) - elif envs.action_space[0].__class__.__name__ == 'Discrete': - actions_env = np.squeeze( - np.eye(envs.action_space[0].n)[actions], 2) - else: - raise NotImplementedError - - # Obser reward and next obs - obs, rewards, dones, infos = envs.step(actions_env) - episode_rewards.append(rewards) - - rnn_states[dones] = np.zeros( - ((dones).sum(), self.recurrent_N, self.hidden_size), - dtype=np.float32) - masks = np.ones((1, self.num_agents, 1), dtype=np.float32) - masks[dones] = np.zeros(((dones).sum(), 1), dtype=np.float32) - - if self.cfg.save_gifs: - image = envs.envs[0].render('rgb_array') - all_frames.append(image) - calc_end = time.time() - elapsed = calc_end - calc_start - if elapsed < self.cfg.ifi: - time.sleep(self.cfg.ifi - elapsed) - else: - envs.render('human') - - if np.all(dones[0]): - break - - # note, every rendered episode is exactly the same since there's no randomness in the env and our actions - # are deterministic - # TODO(eugenevinitsky) why is this lower than the non-render reward? - render_val = np.mean(np.sum(np.array(episode_rewards), axis=0)) - print("episode reward of rendered episode is: " + str(render_val)) - if self.use_wandb: - wandb.log({'render_rew': render_val}, step=total_num_steps) - - if self.cfg.save_gifs: - if self.use_wandb: - np_arr = np.stack(all_frames).transpose((0, 3, 1, 2)) - wandb.log({"video": wandb.Video(np_arr, fps=4, format="gif")}, - step=total_num_steps) - # else: - imageio.mimsave(os.getcwd() + '/render.gif', - all_frames, - duration=self.cfg.ifi) - - -@hydra.main(config_path='../../cfgs/', config_name='config') -def main(cfg): - """Run the on-policy code.""" - set_display_window() - logdir = Path(os.getcwd()) - if cfg.wandb_id is not None: - wandb_id = cfg.wandb_id - else: - wandb_id = wandb.util.generate_id() - # with open(os.path.join(logdir, 'wandb_id.txt'), 'w+') as f: - # f.write(wandb_id) - wandb_mode = "disabled" if (cfg.debug or not cfg.wandb) else "online" - - if cfg.wandb: - run = wandb.init(config=cfg, - project=cfg.wandb_name, - name=wandb_id, - group='ppov2_' + cfg.experiment, - resume="allow", - settings=wandb.Settings(start_method="fork"), - mode=wandb_mode) - else: - if not logdir.exists(): - curr_run = 'run1' - else: - exst_run_nums = [ - int(str(folder.name).split('run')[1]) - for folder in logdir.iterdir() - if str(folder.name).startswith('run') - ] - if len(exst_run_nums) == 0: - curr_run = 'run1' - else: - curr_run = 'run%i' % (max(exst_run_nums) + 1) - logdir = logdir / curr_run - if not logdir.exists(): - os.makedirs(str(logdir)) - - if cfg.algorithm.algorithm_name == "rmappo": - assert (cfg.algorithm.use_recurrent_policy - or cfg.algorithm.use_naive_recurrent_policy), ( - "check recurrent policy!") - elif cfg.algorithm.algorithm_name == "mappo": - assert (not cfg.algorithm.use_recurrent_policy - and not cfg.algorithm.use_naive_recurrent_policy), ( - "check recurrent policy!") - else: - raise NotImplementedError - - # cuda - if 'cpu' not in cfg.algorithm.device and torch.cuda.is_available(): - print("choose to use gpu...") - device = torch.device(cfg.algorithm.device) - torch.set_num_threads(cfg.algorithm.n_training_threads) - # if cfg.algorithm.cuda_deterministic: - # import torch.backends.cudnn as cudnn - # cudnn.benchmark = False - # cudnn.deterministic = True - else: - print("choose to use cpu...") - device = torch.device("cpu") - torch.set_num_threads(cfg.algorithm.n_training_threads) - - setproctitle.setproctitle( - str(cfg.algorithm.algorithm_name) + "-" + str(cfg.experiment)) - - # seed - torch.manual_seed(cfg.algorithm.seed) - torch.cuda.manual_seed_all(cfg.algorithm.seed) - np.random.seed(cfg.algorithm.seed) - - # env init - # TODO(eugenevinitsky) this code requires a fixed number of agents but this - # should be done by overriding in the hydra config rather than here - cfg.subscriber.keep_inactive_agents = True - envs = make_train_env(cfg) - eval_envs = make_eval_env(cfg) - render_envs = make_render_env(cfg) - # TODO(eugenevinitsky) hacky - num_agents = envs.reset().shape[1] - - config = { - "cfg.algo": cfg.algorithm, - "envs": envs, - "eval_envs": eval_envs, - "render_envs": render_envs, - "num_agents": num_agents, - "device": device, - "logdir": logdir - } - - # run experiments - runner = NocturneSharedRunner(config) - runner.run() - - # post process - envs.close() - if cfg.algorithm.use_eval and eval_envs is not envs: - eval_envs.close() - - if cfg.wandb: - run.finish() - else: - runner.writter.export_scalars_to_json( - str(runner.log_dir + '/summary.json')) - runner.writter.close() - - -if __name__ == '__main__': - main() diff --git a/examples/rendering.py b/examples/rendering.py deleted file mode 100644 index 46050f57..00000000 --- a/examples/rendering.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Example of how to make movies of Nocturne scenarios.""" -import os - -import hydra -import imageio -import matplotlib.pyplot as plt -import numpy as np - -from cfgs.config import PROJECT_PATH, get_scenario_dict, set_display_window -from nocturne import Simulation - - -def get_sim(cfg): - """Initialize the scenario.""" - # load scenario, set vehicles to be expert-controlled - sim = Simulation(scenario_path=str(PROJECT_PATH / 'examples' / - 'example_scenario.json'), - config=get_scenario_dict(cfg)) - for obj in sim.getScenario().getObjectsThatMoved(): - obj.expert_control = True - return sim - - -def make_movie(cfg, - scenario_fn, - output_path='./vid.mp4', - dt=0.1, - steps=90, - fps=10): - """Make a movie from the scenario.""" - sim = get_sim(cfg) - scenario = sim.getScenario() - movie_frames = [] - timestep = 0 - movie_frames.append(scenario_fn(scenario, timestep)) - for i in range(steps): - sim.step(dt) - timestep += 1 - movie_frames.append(scenario_fn(scenario, timestep)) - movie_frames = np.array(movie_frames) - imageio.mimwrite(output_path, movie_frames, fps=fps) - print('>', output_path) - del sim - del movie_frames - - -def make_image(cfg, scenario_fn, output_path='./img.png'): - """Make a single image from the scenario.""" - sim = get_sim(cfg) - scenario = sim.getScenario() - img = scenario_fn(scenario) - dpi = 100 - height, width, depth = img.shape - figsize = width / float(dpi), height / float(dpi) - plt.figure(figsize=figsize, dpi=dpi) - plt.axis('off') - plt.imshow(img) - plt.savefig(output_path) - print('>', output_path) - - -@hydra.main(config_path="../cfgs/", config_name="config") -def main(cfg): - """See file docstring.""" - # NOTE: don't run this file all at once since the memory usage for - # rendering all the videos will be dozens of gigabytes - set_display_window() - - if not os.path.exists(PROJECT_PATH / 'examples/rendering'): - os.makedirs(PROJECT_PATH / 'examples/rendering') - - # movie of whole scenario - make_movie( - cfg, - scenario_fn=lambda scenario, _: scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'movie_whole_scenario.mp4', - ) - - # movie around a vehicle - make_movie( - cfg, - scenario_fn=lambda scenario, _: scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - source=scenario.getVehicles()[3], - view_width=120, - view_height=120, - rotate_with_source=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'movie_around_vehicle.mp4', - ) - - # movie around a vehicle (without rotating with source) - make_movie( - cfg, - scenario_fn=lambda scenario, _: scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - source=scenario.getObjectsThatMoved()[0], - view_width=120, - view_height=120, - rotate_with_source=False, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'movie_around_vehicle_stable.mp4', - ) - - # movie of cone around vehicle - make_movie( - cfg, - scenario_fn=lambda scenario, _: scenario.getConeImage( - source=scenario.getObjectsThatMoved()[0], - view_dist=80, - view_angle=np.pi * (120 / 180), - head_angle=0.0, - img_width=1600, - img_height=1600, - padding=50.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / 'movie_cone.mp4', - ) - - # movie of cone around vehicle with varying head angle - make_movie( - cfg, - scenario_fn=lambda scenario, timestep: scenario.getConeImage( - source=scenario.getVehicles()[6], - view_dist=80.0, - view_angle=np.pi * (120 / 180), - head_angle=0.8 * np.sin(timestep / 10), - img_width=1600, - img_height=1600, - padding=50.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'movie_cone_head_angle.mp4', - ) - - # image of whole scenario - make_image( - cfg, - scenario_fn=lambda scenario: scenario.getImage( - img_width=2000, - img_height=2000, - padding=50.0, - draw_target_positions=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / 'img_scenario.png', - ) - - # image of cone - make_image( - cfg, - scenario_fn=lambda scenario: scenario.getConeImage( - source=scenario.getVehicles()[9], - view_dist=80, - view_angle=np.pi * (120 / 180), - head_angle=np.pi / 8.0, - img_width=2000, - img_height=2000, - padding=50.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'img_cone_tilted.png', - ) - - # image of visible state - make_image( - cfg, - scenario_fn=lambda scenario: scenario.getFeaturesImage( - source=scenario.getVehicles()[9], - view_dist=80, - view_angle=np.pi * (120 / 180), - head_angle=np.pi / 8.0, - img_width=2000, - img_height=2000, - padding=50.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / 'examples/rendering' / - 'img_features_tilted.png', - ) - - -if __name__ == '__main__': - main() diff --git a/examples/rllib_files/run_rllib.py b/examples/rllib_files/run_rllib.py deleted file mode 100644 index fb019d00..00000000 --- a/examples/rllib_files/run_rllib.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Example run script for RLlib.""" -import os - -import hydra -from omegaconf import OmegaConf -from cfgs.config import set_display_window -import ray -from ray import tune -from ray.tune.registry import register_env -from ray.rllib.env.multi_agent_env import MultiAgentEnv - -from nocturne.envs.wrappers import create_env - - -class RLlibWrapperEnv(MultiAgentEnv): - """Thin wrapper making our env look like a MultiAgentEnv.""" - - metadata = { - "render.modes": ["rgb_array"], - } - - def __init__(self, env): - """See wrapped env class.""" - self._skip_env_checking = True # temporary fix for rllib env checking issue - super().__init__() - self._env = env - - def step(self, actions): - """See wrapped env class.""" - next_obs, rew, done, info = self._env.step(actions) - return next_obs, rew, done, info - - def reset(self): - """See wrapped env class.""" - obses = self._env.reset() - return obses - - @property - def observation_space(self): - """See wrapped env class.""" - return self._env.observation_space - - @property - def action_space(self): - """See wrapped env class.""" - return self._env.action_space - - def render(self, mode=None): - """See wrapped env class.""" - return self._env.render() - - def seed(self, seed=None): - """Set seed on the wrapped env.""" - self._env.seed(seed) - - def __getattr__(self, name): - """Return attributes from the wrapped env.""" - return getattr(self._env, name) - - -def create_rllib_env(cfg): - """Return an MultiAgentEnv wrapped environment.""" - return RLlibWrapperEnv(create_env(cfg)) - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def main(cfg): - """Run RLlib example.""" - set_display_window() - cfg = OmegaConf.to_container(cfg, resolve=True) - # TODO(eugenevinitsky) move these into a config - if cfg['debug']: - ray.init(local_mode=True) - num_workers = 0 - num_envs_per_worker = 1 - num_gpus = 0 - use_lstm = False - else: - num_workers = 15 - num_envs_per_worker = 5 - num_gpus = 1 - use_lstm = True - - register_env("nocturne", lambda cfg: create_rllib_env(cfg)) - - username = os.environ["USER"] - tune.run( - "PPO", - # TODO(eugenevinitsky) move into config - local_dir=f"/checkpoint/{username}/nocturne/ray_results", - stop={"episodes_total": 60000}, - checkpoint_freq=1000, - config={ - # Enviroment specific. - "env": - "nocturne", - "env_config": - cfg, - # General - "framework": - "torch", - "num_gpus": - num_gpus, - "num_workers": - num_workers, - "num_envs_per_worker": - num_envs_per_worker, - "observation_filter": - "MeanStdFilter", - # Method specific. - "entropy_coeff": - 0.0, - "num_sgd_iter": - 5, - "train_batch_size": - max(100 * num_workers * num_envs_per_worker, 512), - "rollout_fragment_length": - 20, - "sgd_minibatch_size": - max(int(100 * num_workers * num_envs_per_worker / 4), 512), - "multiagent": { - # We only have one policy (calling it "shared"). - # Class, obs/act-spaces, and config will be derived - # automatically. - "policies": {"shared_policy"}, - # Always use "shared" policy. - "policy_mapping_fn": - (lambda agent_id, episode, **kwargs: "shared_policy"), - # each agent step is counted towards train_batch_size - # rather than environment steps - "count_steps_by": - "agent_steps", - }, - "model": { - "use_lstm": use_lstm - }, - # Evaluation stuff - "evaluation_interval": - 50, - # Run evaluation on (at least) one episodes - "evaluation_duration": - 1, - # ... using one evaluation worker (setting this to 0 will cause - # evaluation to run on the local evaluation worker, blocking - # training until evaluation is done). - # TODO: if this is not 0, it seems to error out - "evaluation_num_workers": - 0, - # Special evaluation config. Keys specified here will override - # the same keys in the main config, but only for evaluation. - "evaluation_config": { - # Store videos in this relative directory here inside - # the default output dir (~/ray_results/...). - # Alternatively, you can specify an absolute path. - # Set to True for using the default output dir (~/ray_results/...). - # Set to False for not recording anything. - "record_env": "videos_test", - # "record_env": "/Users/xyz/my_videos/", - # Render the env while evaluating. - # Note that this will always only render the 1st RolloutWorker's - # env and only the 1st sub-env in a vectorized env. - "render_env": True, - }, - }, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/sample_factory_files/results/plot_successes.py b/examples/sample_factory_files/results/plot_successes.py deleted file mode 100644 index fda57827..00000000 --- a/examples/sample_factory_files/results/plot_successes.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Util for plotting eval_sample_factory.py output.""" -import matplotlib.pyplot as plt -import numpy as np - -if __name__ == '__main__': - plt.figure() - num_arr = np.load('success_by_veh_number.npy') - for i in range(num_arr.shape[0]): - veh_num_arr = num_arr[i, i] - plt.figure() - plt.plot(list(range(len(veh_num_arr))), veh_num_arr[:, 0]) - plt.plot(list(range(len(veh_num_arr))), veh_num_arr[:, 1]) - plt.plot(list(range(len(veh_num_arr))), - veh_num_arr[:, 1] + veh_num_arr[:, 0]) - plt.xlabel('num vehicles') - plt.ylabel('rate') - plt.legend(['goal rate', 'collide rate', 'sum']) - plt.title('goal rate as function of number of vehicles') - plt.savefig(f'{i}_goal_func_num.png') - plt.close() - num_arr = np.load('success_by_dist.npy') - for i in range(num_arr.shape[0]): - dist_arr = num_arr[i, i] - plt.figure() - plt.plot(10 * np.array(list(range(len(dist_arr)))), dist_arr[:, 0]) - plt.plot(10 * np.array(list(range(len(dist_arr)))), dist_arr[:, 1]) - plt.plot(10 * np.array(list(range(len(dist_arr)))), - dist_arr[:, 1] + dist_arr[:, 0]) - plt.xlabel('distance') - plt.ylabel('rate') - plt.legend(['goal rate', 'collide rate', 'sum']) - plt.title('goal rate as function of start distance') - plt.savefig(f'{i}_goal_func_dist.png') - plt.close() diff --git a/examples/sample_factory_files/results/success_by_dist.npy b/examples/sample_factory_files/results/success_by_dist.npy deleted file mode 100644 index 88bb3470..00000000 Binary files a/examples/sample_factory_files/results/success_by_dist.npy and /dev/null differ diff --git a/examples/sample_factory_files/results/success_by_veh_number.npy b/examples/sample_factory_files/results/success_by_veh_number.npy deleted file mode 100644 index e2ccc8c0..00000000 Binary files a/examples/sample_factory_files/results/success_by_veh_number.npy and /dev/null differ diff --git a/examples/sample_factory_files/results/zsc_collision.txt b/examples/sample_factory_files/results/zsc_collision.txt deleted file mode 100644 index fab1b513..00000000 --- a/examples/sample_factory_files/results/zsc_collision.txt +++ /dev/null @@ -1,2 +0,0 @@ -3.073423876390209974e-01,2.998212611906500014e-01 -2.892664684601605751e-01,3.056283516749848106e-01 diff --git a/examples/sample_factory_files/results/zsc_goal.txt b/examples/sample_factory_files/results/zsc_goal.txt deleted file mode 100644 index da893d8f..00000000 --- a/examples/sample_factory_files/results/zsc_goal.txt +++ /dev/null @@ -1,2 +0,0 @@ -6.806898396366272141e-01,6.894188181744292931e-01 -6.908729456388121859e-01,6.775820683229745178e-01 diff --git a/examples/sample_factory_files/run_sample_factory.py b/examples/sample_factory_files/run_sample_factory.py deleted file mode 100644 index e39582da..00000000 --- a/examples/sample_factory_files/run_sample_factory.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -""" -Runner script for sample factory. - -To run in single agent mode on one file for testing. -python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=True \ - ++algorithm.num_workers=10 ++algorithm.experiment=EXPERIMENT_NAME \ - ++max_num_vehicles=1 ++num_files=1 - -To run in multiagent mode on one file for testing -python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=True \ - ++algorithm.num_workers=10 ++algorithm.experiment=EXPERIMENT_NAME \ - ++num_files=1 - -To run on all files set ++num_files=-1 - -For debugging -python -m run_sample_factory algorithm=APPO ++algorithm.train_in_background_thread=False \ - ++algorithm.num_workers=1 ++force_envs_single_thread=False -After training for a desired period of time, evaluate the policy by running: -python -m sample_factory_examples.enjoy_custom_multi_env --algo=APPO \ - --env=my_custom_multi_env_v1 --experiment=example -""" -import os -import sys - -import hydra -import numpy as np -from omegaconf import OmegaConf -from sample_factory.envs.env_registry import global_env_registry -from sample_factory.run_algorithm import run_algorithm -from sample_factory_examples.train_custom_env_custom_model import override_default_params_func -from sample_factory.algorithms.appo.model_utils import get_obs_shape, EncoderBase, nonlinearity, register_custom_encoder -from torch import nn - -from nocturne.envs.wrappers import create_env - - -class SampleFactoryEnv(): - """Wrapper environment that converts between our dicts and Sample Factory format.""" - - def __init__(self, env): - """Initialize wrapper. - - Args - ---- - env (BaseEnv): Base environment that we are wrapping. - """ - self.env = env - self.num_agents = self.env.cfg['max_num_vehicles'] - self.agent_ids = [i for i in range(self.num_agents)] - self.is_multiagent = True - _ = self.env.reset() - # used to track which agents are done - self.already_done = [False for _ in self.agent_ids] - self.episode_rewards = np.zeros(self.num_agents) - - def step(self, actions): - """Convert between environment dicts and sample factory lists. - - Important to note: - 1) Items in info['episode_extra_stats'] will be logged by sample factory. - 2) sample factory does not reset the environment for you - so we reset it if the env returns __all__ in its done dict - - Args: - actions ({str: numpy array}): agent actions - - Returns - ------- - obs_n ([np.array]): N length list of agent observations - rew_n ([float]): N length list of agent rewards - info_n ([{str: float}]): N length list of info dicts - done_n ([bool]): N length list of whether agents are done - - """ - agent_actions = {} - for action, agent_id, already_done in zip(actions, self.agent_ids, - self.already_done): - if already_done: - continue - agent_actions[self.agent_id_to_env_id_map[agent_id]] = action - next_obses, rew, done, info = self.env.step(agent_actions) - rew_n = [] - done_n = [] - info_n = [] - - for agent_id in self.agent_ids: - # first check that the agent_id ever had a corresponding vehicle - # and then check that there's actually an observation for it i.e. it's not done - if agent_id in self.agent_id_to_env_id_map.keys( - ) and self.agent_id_to_env_id_map[agent_id] in next_obses.keys(): - map_key = self.agent_id_to_env_id_map[agent_id] - # since the environment may have just reset, we don't actually have - # reward objects yet - rew_n.append(rew.get(map_key, 0)) - agent_info = info.get(map_key, {}) - # track the per-agent reward for later logging - self.episode_rewards[agent_id] += rew.get(map_key, 0) - self.num_steps[agent_id] += 1 - self.goal_achieved[agent_id] = self.goal_achieved[ - agent_id] or agent_info['goal_achieved'] - self.collided[agent_id] = self.collided[ - agent_id] or agent_info['collided'] - self.veh_edge_collided[agent_id] = self.veh_edge_collided[ - agent_id] or agent_info['veh_edge_collision'] - self.veh_veh_collided[agent_id] = self.veh_veh_collided[ - agent_id] or agent_info['veh_veh_collision'] - else: - rew_n.append(0) - agent_info = {} - if self.already_done[agent_id]: - agent_info['is_active'] = False - else: - agent_info['is_active'] = True - info_n.append(agent_info) - # now stick in some extra state information if needed - # anything in episode_extra_stats is logged at the end of the episode - if done['__all__']: - # log any extra info that you need - avg_rew = np.mean(self.episode_rewards[self.valid_indices]) - avg_len = np.mean(self.num_steps[self.valid_indices]) - avg_goal_achieved = np.mean(self.goal_achieved[self.valid_indices]) - avg_collided = np.mean(self.collided[self.valid_indices]) - avg_veh_edge_collided = np.mean( - self.veh_edge_collided[self.valid_indices]) - avg_veh_veh_collided = np.mean( - self.veh_veh_collided[self.valid_indices]) - for info in info_n: - info['episode_extra_stats'] = {} - info['episode_extra_stats']['avg_rew'] = avg_rew - info['episode_extra_stats']['avg_agent_len'] = avg_len - info['episode_extra_stats'][ - 'goal_achieved'] = avg_goal_achieved - info['episode_extra_stats']['collided'] = avg_collided - info['episode_extra_stats'][ - 'veh_edge_collision'] = avg_veh_edge_collided - info['episode_extra_stats'][ - 'veh_veh_collision'] = avg_veh_veh_collided - - # update the dones so we know if we need to reset - # sample factory does not call reset for you - for env_id, done_val in done.items(): - # handle the __all__ signal that's just in there for - # telling when the environment should stop - if env_id == '__all__': - continue - if done_val: - agent_id = self.env_id_to_agent_id_map[env_id] - self.already_done[agent_id] = True - - # okay, now if all the agents are done set done to True for all of them - # otherwise, False. Sample factory uses info['is_active'] to track if agents - # are done, not the done signal - # also, convert the obs_dict into the right format - if done['__all__']: - done_n = [True] * self.num_agents - obs_n = self.reset() - else: - done_n = [False] * self.num_agents - obs_n = self.obs_dict_to_list(next_obses) - return obs_n, rew_n, done_n, info_n - - def obs_dict_to_list(self, obs_dict): - """Convert the dictionary returned by the environment into a fixed size list of arrays. - - Args: - obs_dict ({agent id in environment: observation}): dict mapping ID to observation - - Returns - ------- - [np.array]: List of arrays ordered by which agent ID they correspond to. - """ - obs_n = [] - for agent_id in self.agent_ids: - # first check that the agent_id ever had a corresponding vehicle - # and then check that there's actually an observation for it i.e. it's not done - if agent_id in self.agent_id_to_env_id_map.keys( - ) and self.agent_id_to_env_id_map[agent_id] in obs_dict.keys(): - map_key = self.agent_id_to_env_id_map[agent_id] - obs_n.append(obs_dict[map_key]) - else: - obs_n.append(self.dead_feat) - return obs_n - - def reset(self): - """Reset the environment. - - Key things done here: - 1) build a map between the agent IDs in the environment (which are not necessarily 0-N) - and the agent IDs for sample factory which are from 0 to the maximum number of agents - 2) sample factory (until some bugs are fixed) requires a fixed number of agents. Some of these - agents will be dummy agents that do not act in the environment. So, here we build valid - indices which can be used to figure out which agent IDs correspond - - Returns - ------- - [np.array]: List of numpy arrays, one for each agent. - """ - # track the agent_ids that actually take an action during the episode - self.valid_indices = [] - self.episode_rewards = np.zeros(self.num_agents) - self.num_steps = np.zeros(self.num_agents) - self.goal_achieved = np.zeros(self.num_agents) - self.collided = np.zeros(self.num_agents) - self.veh_veh_collided = np.zeros(self.num_agents) - self.veh_edge_collided = np.zeros(self.num_agents) - self.already_done = [False for _ in self.agent_ids] - next_obses = self.env.reset() - env_keys = sorted(list(next_obses.keys())) - # agent ids is a list going from 0 to (num_agents - 1) - # however, the vehicle IDs might go from 0 to anything - # we want to initialize a mapping that is maintained through the episode and always - # uniquely convert the vehicle ID to an agent id - self.agent_id_to_env_id_map = { - agent_id: env_id - for agent_id, env_id in zip(self.agent_ids, env_keys) - } - self.env_id_to_agent_id_map = { - env_id: agent_id - for agent_id, env_id in zip(self.agent_ids, env_keys) - } - # if there isn't a mapping from an agent id to a vehicle id, that agent should be - # set to permanently inactive - for agent_id in self.agent_ids: - if agent_id not in self.agent_id_to_env_id_map.keys(): - self.already_done[agent_id] = True - else: - # check that this isn't actually a fake padding agent used - # when keep_inactive_agents is True - if agent_id in self.agent_id_to_env_id_map.keys( - ) and self.agent_id_to_env_id_map[ - agent_id] not in self.env.dead_agent_ids: - self.valid_indices.append(agent_id) - obs_n = self.obs_dict_to_list(next_obses) - return obs_n - - @property - def observation_space(self): - """See superclass.""" - return self.env.observation_space - - @property - def action_space(self): - """See superclass.""" - return self.env.action_space - - def render(self, mode=None): - """See superclass.""" - return self.env.render(mode) - - def seed(self, seed=None): - """Pass the seed to the environment.""" - self.env.seed(seed) - - def __getattr__(self, name): - """Pass attributes directly through to the wrapped env. TODO(remove).""" - return getattr(self.env, name) - - -class CustomEncoder(EncoderBase): - """Encoder for the input.""" - - def __init__(self, cfg, obs_space, timing): - super().__init__(cfg, timing) - - obs_shape = get_obs_shape(obs_space) - assert len(obs_shape.obs) == 1 - - fc_encoder_layer = cfg.encoder_hidden_size - encoder_layers = [ - nn.Linear(obs_shape.obs[0], fc_encoder_layer), - nonlinearity(cfg), - nn.Linear(fc_encoder_layer, fc_encoder_layer), - nonlinearity(cfg), - ] - - self.mlp_head = nn.Sequential(*encoder_layers) - self.init_fc_blocks(fc_encoder_layer) - - def forward(self, obs_dict): - """See superclass.""" - x = self.mlp_head(obs_dict['obs']) - x = self.forward_fc_blocks(x) - return x - - -def make_custom_multi_env_func(full_env_name, cfg, env_config=None): - """Return a wrapped base environment. - - Args: - full_env_name (str): Unused. - cfg (dict): Dict needed to configure the environment. - env_config (dict, optional): Deprecated. Will be removed from SampleFactory later. - - Returns - ------- - SampleFactoryEnv: Wrapped environment. - """ - env = create_env(cfg) - return SampleFactoryEnv(env) - - -def register_custom_components(): - """Register needed constructors for custom environments.""" - global_env_registry().register_env( - env_name_prefix='my_custom_multi_env_', - make_env_func=make_custom_multi_env_func, - override_default_params_func=override_default_params_func, - ) - register_custom_encoder('custom_env_encoder', CustomEncoder) - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def main(cfg): - """Script entry point.""" - register_custom_components() - # cfg = parse_args() - # TODO(ev) hacky renaming and restructuring, better to do this cleanly - cfg_dict = OmegaConf.to_container(cfg, resolve=True) - # copy algo keys into the main keys - for key, value in cfg_dict['algorithm'].items(): - cfg_dict[key] = value - # we didn't set a train directory so use the hydra one - if cfg_dict['train_dir'] is None: - cfg_dict['train_dir'] = os.getcwd() - print(f'storing the results in {os.getcwd()}') - else: - output_dir = cfg_dict['train_dir'] - print(f'storing results in {output_dir}') - - # recommendation from Aleksei to keep horizon length fixed - # and number of agents fixed and just pad missing / exited - # agents with a vector of -1s - cfg_dict['subscriber']['keep_inactive_agents'] = True - - # put it into a namespace so sample factory code runs correctly - class Bunch(object): - - def __init__(self, adict): - self.__dict__.update(adict) - - cfg = Bunch(cfg_dict) - status = run_algorithm(cfg) - return status - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/examples/sample_factory_files/success_by_veh_number b/examples/sample_factory_files/success_by_veh_number deleted file mode 100644 index 3aa29c4e..00000000 Binary files a/examples/sample_factory_files/success_by_veh_number and /dev/null differ diff --git a/examples/sample_factory_files/visualize_sample_factory.py b/examples/sample_factory_files/visualize_sample_factory.py deleted file mode 100644 index a9676b0f..00000000 --- a/examples/sample_factory_files/visualize_sample_factory.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Use to create movies of trained policies.""" -import argparse -from collections import deque -import json -import sys -import time -import os - -import imageio -import matplotlib.pyplot as plt -import numpy as np -import torch - -from sample_factory.algorithms.appo.actor_worker import transform_dict_observations -from sample_factory.algorithms.appo.learner import LearnerWorker -from sample_factory.algorithms.appo.model import create_actor_critic -from sample_factory.algorithms.appo.model_utils import get_hidden_size -from sample_factory.algorithms.utils.action_distributions import ContinuousActionDistribution, \ - CategoricalActionDistribution -from sample_factory.algorithms.utils.arguments import load_from_checkpoint -from sample_factory.algorithms.utils.multi_agent_wrapper import MultiAgentWrapper, is_multiagent_env -from sample_factory.envs.create_env import create_env -from sample_factory.utils.utils import log, AttrDict - -from run_sample_factory import register_custom_components - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, PROJECT_PATH, set_display_window # noqa: F401 - - -def run_eval(cfg_dict, max_num_frames=1e9): - """Run evaluation over a single file. Exits when one episode finishes. - - Args: - cfg (dict): configuration file for instantiating the agents and environment. - max_num_frames (int, optional): Deprecated. Should be removed. - - Returns - ------- - None: None - - """ - cfg = load_from_checkpoint(cfg_dict) - - render_action_repeat = cfg.render_action_repeat if cfg.render_action_repeat is not None else cfg.env_frameskip - if render_action_repeat is None: - log.warning('Not using action repeat!') - render_action_repeat = 1 - log.debug('Using action repeat %d during evaluation', render_action_repeat) - - cfg.env_frameskip = 1 # for evaluation - cfg.num_envs = 1 - cfg.seed = np.random.randint(10000) - cfg.scenario_path = cfg_dict.scenario_path - - def make_env_func(env_config): - return create_env(cfg.env, cfg=cfg, env_config=env_config) - - env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0})) - - is_multiagent = is_multiagent_env(env) - if not is_multiagent: - env = MultiAgentWrapper(env) - - if hasattr(env.unwrapped, 'reset_on_init'): - # reset call ruins the demo recording for VizDoom - env.unwrapped.reset_on_init = False - - actor_critic = create_actor_critic(cfg, env.observation_space, - env.action_space) - - device = torch.device('cpu' if cfg.device == 'cpu' else 'cuda') - actor_critic.model_to_device(device) - - policy_id = cfg.policy_index - checkpoints = LearnerWorker.get_checkpoints( - LearnerWorker.checkpoint_dir(cfg, policy_id)) - checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device) - actor_critic.load_state_dict(checkpoint_dict['model']) - - episode_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)] - true_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)] - num_frames = 0 - - last_render_start = time.time() - - def max_frames_reached(frames): - return max_num_frames is not None and frames > max_num_frames - - obs = env.reset() - print(os.path.join(env.cfg['scenario_path'], env.unwrapped.file)) - rnn_states = torch.zeros( - [env.num_agents, get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - episode_reward = np.zeros(env.num_agents) - finished_episode = [False] * env.num_agents - - if not cfg.no_render: - fig = plt.figure() - frames = [] - ego_frames = [] - feature_frames = [] - - with torch.no_grad(): - while not max_frames_reached(num_frames): - obs_torch = AttrDict(transform_dict_observations(obs)) - for key, x in obs_torch.items(): - obs_torch[key] = torch.from_numpy(x).to(device).float() - - policy_outputs = actor_critic(obs_torch, - rnn_states, - with_action_distribution=True) - - # sample actions from the distribution by default - actions = policy_outputs.actions - - action_distribution = policy_outputs.action_distribution - if isinstance(action_distribution, ContinuousActionDistribution): - if not cfg.continuous_actions_sample: # TODO: add similar option for discrete actions - actions = action_distribution.means - if isinstance(action_distribution, CategoricalActionDistribution): - if not cfg.discrete_actions_sample: - actions = policy_outputs['action_logits'].argmax(axis=1) - - actions = actions.cpu().numpy() - - rnn_states = policy_outputs.rnn_states - - for _ in range(render_action_repeat): - if not cfg.no_render: - target_delay = 1.0 / cfg.fps if cfg.fps > 0 else 0 - current_delay = time.time() - last_render_start - time_wait = target_delay - current_delay - - if time_wait > 0: - # log.info('Wait time %.3f', time_wait) - time.sleep(time_wait) - - last_render_start = time.time() - img = env.render() - frames.append(img) - ego_img = env.render_ego() - if ego_img is not None: - ego_frames.append(ego_img) - feature_img = env.render_features() - if feature_img is not None: - feature_frames.append(feature_img) - - obs, rew, done, infos = env.step(actions) - - episode_reward += rew - num_frames += 1 - - for agent_i, done_flag in enumerate(done): - if done_flag: - finished_episode[agent_i] = True - episode_rewards[agent_i].append( - episode_reward[agent_i]) - true_rewards[agent_i].append(infos[agent_i].get( - 'true_reward', episode_reward[agent_i])) - log.info( - 'Episode finished for agent %d at %d frames. Reward: %.3f, true_reward: %.3f', - agent_i, num_frames, episode_reward[agent_i], - true_rewards[agent_i][-1]) - rnn_states[agent_i] = torch.zeros( - [get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - episode_reward[agent_i] = 0 - - # if episode terminated synchronously for all agents, pause a bit before starting a new one - if all(done): - if not cfg.no_render: - imageio.mimsave(os.path.join(PROJECT_PATH, - 'animation.mp4'), - np.array(frames), - fps=30) - plt.close(fig) - imageio.mimsave(os.path.join(PROJECT_PATH, - 'animation_ego.mp4'), - np.array(ego_frames), - fps=30) - plt.close(fig) - imageio.mimsave(os.path.join(PROJECT_PATH, - 'animation_feature.mp4'), - np.array(feature_frames), - fps=30) - plt.close(fig) - if not cfg.no_render: - env.render() - time.sleep(0.05) - - if all(finished_episode): - finished_episode = [False] * env.num_agents - avg_episode_rewards_str, avg_true_reward_str = '', '' - for agent_i in range(env.num_agents): - avg_rew = np.mean(episode_rewards[agent_i]) - avg_true_rew = np.mean(true_rewards[agent_i]) - if not np.isnan(avg_rew): - if avg_episode_rewards_str: - avg_episode_rewards_str += ', ' - avg_episode_rewards_str += f'#{agent_i}: {avg_rew:.3f}' - if not np.isnan(avg_true_rew): - if avg_true_reward_str: - avg_true_reward_str += ', ' - avg_true_reward_str += f'#{agent_i}: {avg_true_rew:.3f}' - avg_goal = infos[0]['episode_extra_stats']['goal_achieved'] - avg_collisions = infos[0]['episode_extra_stats'][ - 'collided'] - log.info(f'Avg goal achieved, {avg_goal}') - log.info(f'Avg num collisions, {avg_collisions}') - log.info('Avg episode rewards: %s, true rewards: %s', - avg_episode_rewards_str, avg_true_reward_str) - log.info( - 'Avg episode reward: %.3f, avg true_reward: %.3f', - np.mean([ - np.mean(episode_rewards[i]) - for i in range(env.num_agents) - ]), - np.mean([ - np.mean(true_rewards[i]) - for i in range(env.num_agents) - ])) - return avg_goal - env.close() - - -def main(): - """Script entry point.""" - set_display_window() - register_custom_components() - - parser = argparse.ArgumentParser() - parser.add_argument('cfg_path', type=str) - args = parser.parse_args() - - file_path = os.path.join(args.cfg_path, 'cfg.json') - with open(file_path, 'r') as file: - cfg_dict = json.load(file) - - cfg_dict['cli_args'] = {} - cfg_dict['fps'] = 0 - cfg_dict['render_action_repeat'] = None - cfg_dict['no_render'] = False - cfg_dict['policy_index'] = 0 - cfg_dict['record_to'] = os.path.join(os.getcwd(), '..', 'recs') - cfg_dict['continuous_actions_sample'] = True - cfg_dict['discrete_actions_sample'] = False - cfg_dict['remove_at_collide'] = True - cfg_dict['remove_at_goal'] = True - cfg_dict['scenario_path'] = PROCESSED_VALID_NO_TL - - class Bunch(object): - - def __init__(self, adict): - self.__dict__.update(adict) - - cfg = Bunch(cfg_dict) - avg_goals = [] - for _ in range(1): - avg_goal = run_eval(cfg) - avg_goals.append(avg_goal) - print(avg_goals) - print('the total average goal achieved is {}'.format(np.mean(avg_goals))) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/nocturne/__init__.py b/nocturne/__init__.py index 9aeaf2bd..c4db0c62 100644 --- a/nocturne/__init__.py +++ b/nocturne/__init__.py @@ -22,10 +22,3 @@ "Cyclist", "envs", ] - -import os -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, PROJECT_PATH - -os.environ["PROCESSED_TRAIN_NO_TL"] = str(PROCESSED_TRAIN_NO_TL) -os.environ["PROCESSED_VALID_NO_TL"] = str(PROCESSED_VALID_NO_TL) -os.environ["NOCTURNE_LOG_DIR"] = str(os.path.join(PROJECT_PATH, 'logs')) diff --git a/nocturne/envs/base_env.py b/nocturne/envs/base_env.py index 85ec0684..22dbbe80 100644 --- a/nocturne/envs/base_env.py +++ b/nocturne/envs/base_env.py @@ -2,339 +2,315 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -"""Default environment for Nocturne.""" -from typing import Any, Dict, Sequence, Union +"""Default Nocturne env with minor adaptations.""" -from collections import defaultdict, deque -from itertools import islice import json -import os +import logging +from collections import defaultdict, deque +from enum import Enum +from itertools import islice, product +from pathlib import Path +from typing import Any, Dict, Optional, Tuple, TypeVar, Union -from gym import Env -from gym.spaces import Box, Discrete import numpy as np import torch +import yaml +from box import Box as ConfigBox +from gym import Env +from gym.spaces import Box, Discrete + +from nocturne import Action, Simulation, Vector2D, Vehicle + +_MAX_NUM_TRIES_TO_FIND_VALID_VEHICLE = 1_000 + +logging.getLogger(__name__) + +ActType = TypeVar("ActType") # pylint: disable=invalid-name +ObsType = TypeVar("ObsType") # pylint: disable=invalid-name +RenderType = TypeVar("RenderType") # pylint: disable=invalid-name + -from cfgs.config import ERR_VAL as INVALID_POSITION, get_scenario_dict -from nocturne import Action, Simulation +class CollisionType(Enum): + """Enum for collision types.""" + NONE = 0 + VEHICLE_VEHICLE = 1 + VEHICLE_EDGE = 2 -class BaseEnv(Env): - """Default environment for Nocturne.""" - def __init__(self, cfg: Dict[str, Any], rank: int = 0) -> None: - """Initialize the environment. +class BaseEnv(Env): # pylint: disable=too-many-instance-attributes + """Nocturne base Gym environment.""" + + def __init__( # pylint: disable=too-many-arguments + self, + config: Dict[str, Any], + *, + img_width=1600, + img_height=1600, + draw_target_positions=True, + padding=50.0, + ) -> None: + """Initialize a Nocturne environment. Args ---- - cfg (dict): configuration file describing the experiment - rank (int, optional): [description]. Defaults to 0. + config (dict): configuration file for the environment. + + Optional Args + ------------- + img_width (int): width of the image to render. + img_height (int): height of the image to render. + draw_target_positions (bool): whether to draw the target positions. + padding (float): padding to add to the image. """ super().__init__() - self.cfg = cfg - with open(os.path.join(cfg['scenario_path'], - 'valid_files.json')) as file: + self.config = ConfigBox(config) + self.config.data_path = Path(self.config.data_path) + self._render_settings = { + "img_width": img_width, + "img_height": img_height, + "draw_target_positions": draw_target_positions, + "padding": padding, + } + + self.seed(self.config.seed) + + # Load the list of valid files + with open(self.config.data_path / "valid_files.json", encoding="utf-8") as file: self.valid_veh_dict = json.load(file) - self.files = list(self.valid_veh_dict.keys()) - # sort the files so that we have a consistent order - self.files = sorted(self.files) - if cfg['num_files'] != -1: - self.files = self.files[0:cfg['num_files']] - self.file = self.files[np.random.randint(len(self.files))] - self.simulation = Simulation(os.path.join(cfg['scenario_path'], - self.file), - config=get_scenario_dict(cfg)) - - self.scenario = self.simulation.getScenario() - self.controlled_vehicles = self.scenario.getObjectsThatMoved() - self.cfg = cfg - self.n_frames_stacked = self.cfg['subscriber'].get( - 'n_frames_stacked', 1) - if self.n_frames_stacked > 1: - print( - 'WARNING: you are frame stacking and may want to turn off recurrence if it is enabled\ - in your agent as frame-stacking may not be needed when using recurrent policies.' - ) - self.single_agent_mode = cfg['single_agent_mode'] - self.seed(cfg['seed']) - self.episode_length = cfg['episode_length'] - self.t = 0 - self.step_num = 0 - self.rank = rank - self.seed(cfg['seed']) + files = sorted(list(self.valid_veh_dict.keys())) + if self.config.num_files != -1: + self.files = files[: self.config.num_files] + if len(self.files) == 0: + raise ValueError("Data path does not contain scenes.") + obs_dict = self.reset() - self.observation_space = Box(low=-np.infty, - high=np.infty, - shape=(obs_dict[list( - obs_dict.keys())[0]].shape[0], )) - if self.cfg['discretize_actions']: - self.accel_discretization = self.cfg['accel_discretization'] - self.steering_discretization = self.cfg['steering_discretization'] - self.head_angle_discretization = self.cfg[ - 'head_angle_discretization'] - self.action_space = Discrete(self.accel_discretization * - self.steering_discretization * - self.head_angle_discretization) - self.accel_grid = np.linspace( - -np.abs(self.cfg['accel_lower_bound']), - self.cfg['accel_upper_bound'], self.accel_discretization) - self.steering_grid = np.linspace( - -np.abs(self.cfg['steering_lower_bound']), - self.cfg['steering_upper_bound'], self.steering_discretization) - self.head_angle_grid = np.linspace( - -np.abs(self.cfg['head_angle_lower_bound']), - self.cfg['head_angle_upper_bound'], - self.head_angle_discretization) - # compute the indexing only once - self.idx_to_actions = {} - i = 0 - for accel in self.accel_grid: - for steer in self.steering_grid: - for head_angle in self.head_angle_grid: - self.idx_to_actions[i] = [accel, steer, head_angle] - i += 1 + + # Set observation space + self.observation_space = Box(low=-np.inf, high=np.inf, shape=(obs_dict[list(obs_dict.keys())[0]].shape[0],)) + + # Set action space + if self.config.discretize_actions: + self._set_discrete_action_space() else: - self.action_space = Box( - low=-np.array([ - np.abs(self.cfg['accel_lower_bound']), - self.cfg['steering_lower_bound'], - self.cfg['head_angle_lower_bound'] - ]), - high=np.array([ - np.abs(self.cfg['accel_upper_bound']), - self.cfg['steering_upper_bound'], - self.cfg['head_angle_upper_bound'] - ]), - ) + self._set_continuous_action_space() - def apply_actions( - self, action_dict: Dict[int, Union[Action, np.ndarray, Sequence[float], - int]] - ) -> None: - """Apply a dict of actions to the vehicle objects.""" + def apply_actions(self, action_dict: Dict[int, ActType]) -> None: + """Apply a dict of actions to the vehicle objects. + + Args + ---- + action_dict (Dict[int, ActType]): Dictionary of actions to apply to the vehicles. + """ for veh_obj in self.scenario.getObjectsThatMoved(): action = action_dict.get(veh_obj.id, None) if action is None: continue + _apply_action_to_vehicle(veh_obj, action, idx_to_actions=self.idx_to_actions) - # TODO: Make this a util function. - if isinstance(action, Action): - veh_obj.apply_action(action) - elif isinstance(action, np.ndarray): - veh_obj.apply_action(Action.from_numpy(action)) - elif isinstance(action, (tuple, list)): - veh_obj.acceleration = action[0] - veh_obj.steering = action[1] - veh_obj.head_angle = action[2] - else: - accel, steer, head_angle = self.idx_to_actions[action] - veh_obj.acceleration = accel - veh_obj.steering = steer - veh_obj.head_angle = head_angle - - def step( - self, action_dict: Dict[int, Union[Action, np.ndarray, Sequence[float], - int]] - ) -> None: - """See superclass.""" + def step( # pylint: disable=arguments-renamed,too-many-locals,too-many-branches,too-many-statements + self, action_dict: Dict[int, ActType] + ) -> Tuple[Dict[int, ObsType], Dict[int, float], Dict[int, bool], Dict[int, Dict[str, Union[bool, str]]]]: + """Run one timestep of the environment's dynamics. + + Args + ---- + action_dict (Dict[int, ActType]): Dictionary of actions to apply to the vehicles. + + Raises + ------ + ValueError: If the action is not of a supported type or if the vehicle collision type is unknown. + + + Returns + ------- + Dict[int, ObsType]: Dictionary with observation for each vehicle. + Dict[int, float]: Dictionary with reward for each vehicle. + Dict[int, bool]: Dictionary with done flag for each vehicle. + Dict[int, Dict[str, Union[bool, str]]]]: Dictionary with info for each vehicle. + """ obs_dict = {} rew_dict = {} done_dict = {} info_dict = defaultdict(dict) - rew_cfg = self.cfg['rew_cfg'] + + rew_cfg = self.config.rew_cfg + self.apply_actions(action_dict) - self.simulation.step(self.cfg['dt']) - self.t += self.cfg['dt'] + self.simulation.step(self.config.dt) + self.t += self.config.dt self.step_num += 1 - objs_to_remove = [] + for veh_obj in self.controlled_vehicles: veh_id = veh_obj.getID() if veh_id in self.done_ids: continue self.context_dict[veh_id].append(self.get_observation(veh_obj)) - if self.n_frames_stacked > 1: + if self.config.subscriber.n_frames_stacked > 1: veh_deque = self.context_dict[veh_id] context_list = list( - islice(veh_deque, - len(veh_deque) - self.n_frames_stacked, - len(veh_deque))) + islice( + veh_deque, + len(veh_deque) - self.config.subscriber.n_frames_stacked, + len(veh_deque), + ) + ) obs_dict[veh_id] = np.concatenate(context_list) else: obs_dict[veh_id] = self.context_dict[veh_id][-1] rew_dict[veh_id] = 0 done_dict[veh_id] = False - info_dict[veh_id]['goal_achieved'] = False - info_dict[veh_id]['collided'] = False - info_dict[veh_id]['veh_veh_collision'] = False - info_dict[veh_id]['veh_edge_collision'] = False + info_dict[veh_id]["goal_achieved"] = False + info_dict[veh_id]["collided"] = False + info_dict[veh_id]["veh_veh_collision"] = False + info_dict[veh_id]["veh_edge_collision"] = False obj_pos = veh_obj.position goal_pos = veh_obj.target_position - '''############################################ - Compute rewards - ############################################''' + ############################################ + # Compute rewards + ############################################ position_target_achieved = True speed_target_achieved = True heading_target_achieved = True - if rew_cfg['position_target']: - position_target_achieved = ( - goal_pos - - obj_pos).norm() < rew_cfg['position_target_tolerance'] - if rew_cfg['speed_target']: - speed_target_achieved = np.abs( - veh_obj.speed - - veh_obj.target_speed) < rew_cfg['speed_target_tolerance'] - if rew_cfg['heading_target']: - heading_target_achieved = np.abs( - self.angle_sub(veh_obj.heading, veh_obj.target_heading) - ) < rew_cfg['heading_target_tolerance'] + if rew_cfg.position_target: + position_target_achieved = (goal_pos - obj_pos).norm() < rew_cfg.position_target_tolerance + if rew_cfg.speed_target: + speed_target_achieved = np.abs(veh_obj.speed - veh_obj.target_speed) < rew_cfg.speed_target_tolerance + if rew_cfg.heading_target: + heading_target_achieved = ( + np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) < rew_cfg.heading_target_tolerance + ) if position_target_achieved and speed_target_achieved and heading_target_achieved: - info_dict[veh_id]['goal_achieved'] = True - rew_dict[veh_id] += rew_cfg['goal_achieved_bonus'] / rew_cfg[ - 'reward_scaling'] - if rew_cfg['shaped_goal_distance'] and rew_cfg['position_target']: + info_dict[veh_id]["goal_achieved"] = True + rew_dict[veh_id] += rew_cfg.goal_achieved_bonus / rew_cfg.reward_scaling + if rew_cfg.shaped_goal_distance and rew_cfg.position_target: # penalize the agent for its distance from goal - # we scale by goal_dist_normalizers to ensure that this value is always less than the penalty for - # collision - if rew_cfg['goal_distance_penalty']: - rew_dict[veh_id] -= rew_cfg.get( - 'shaped_goal_distance_scaling', 1.0) * ( - (goal_pos - obj_pos).norm() / - self.goal_dist_normalizers[veh_id] - ) / rew_cfg['reward_scaling'] + # we scale by goal_dist_normalizers to ensure that this value is always + # less than the penalty for collision + if rew_cfg.goal_distance_penalty: + rew_dict[veh_id] -= ( + rew_cfg.shaped_goal_distance_scaling + * ((goal_pos - obj_pos).norm() / self.goal_dist_normalizers[veh_id]) + / rew_cfg.reward_scaling + ) else: # the minus one is to ensure that it's not beneficial to collide # we divide by goal_achieved_bonus / episode_length to ensure that - # acquiring the maximum "get-close-to-goal" reward at every time-step is - # always less than just acquiring the goal reward once - # we also assume that vehicles are never more than 400 meters from their goal - # which makes sense as the episodes are 9 seconds long i.e. we'd have to go more than - # 40 m/s to get there - rew_dict[veh_id] += rew_cfg.get( - 'shaped_goal_distance_scaling', - 1.0) * (1 - (goal_pos - obj_pos).norm() / - self.goal_dist_normalizers[veh_id] - ) / rew_cfg['reward_scaling'] + # acquiring the maximum "get-close-to-goal" reward at every + # time-step is always less than just acquiring the goal reward once + rew_dict[veh_id] += ( + rew_cfg.shaped_goal_distance_scaling + * (1 - (goal_pos - obj_pos).norm() / self.goal_dist_normalizers[veh_id]) + / rew_cfg.reward_scaling + ) # repeat the same thing for speed and heading - if rew_cfg['shaped_goal_distance'] and rew_cfg['speed_target']: - if rew_cfg['goal_distance_penalty']: - rew_dict[veh_id] -= rew_cfg.get( - 'shaped_goal_distance_scaling', 1.0) * ( - np.abs(veh_obj.speed - veh_obj.target_speed) / - 40.0) / rew_cfg['reward_scaling'] + if rew_cfg.shaped_goal_distance and rew_cfg.speed_target: + if rew_cfg.goal_distance_penalty: + rew_dict[veh_id] -= ( + rew_cfg.shaped_goal_distance_scaling + * (np.abs(veh_obj.speed - veh_obj.target_speed) / rew_cfg.goal_speed_scaling) + / rew_cfg.reward_scaling + ) else: - rew_dict[veh_id] += rew_cfg.get( - 'shaped_goal_distance_scaling', 1.0 - ) * (1 - np.abs(veh_obj.speed - veh_obj.target_speed) / - 40.0) / rew_cfg['reward_scaling'] - if rew_cfg['shaped_goal_distance'] and rew_cfg[ - 'heading_target']: - if rew_cfg['goal_distance_penalty']: - rew_dict[veh_id] -= rew_cfg.get( - 'shaped_goal_distance_scaling', - 1.0) * (np.abs( - self.angle_sub(veh_obj.heading, - veh_obj.target_heading)) / - (2 * np.pi)) / rew_cfg['reward_scaling'] + rew_dict[veh_id] += ( + rew_cfg.shaped_goal_distance_scaling + * (1 - np.abs(veh_obj.speed - veh_obj.target_speed) / rew_cfg.goal_speed_scaling) + / rew_cfg.reward_scaling + ) + if rew_cfg.shaped_goal_distance and rew_cfg.heading_target: + if rew_cfg.goal_distance_penalty: + rew_dict[veh_id] -= ( + rew_cfg.shaped_goal_distance_scaling + * (np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) / (2 * np.pi)) + / rew_cfg.reward_scaling + ) else: - rew_dict[veh_id] += rew_cfg.get( - 'shaped_goal_distance_scaling', - 1.0) * (1 - np.abs( - self.angle_sub(veh_obj.heading, - veh_obj.target_heading)) / - (2 * np.pi)) / rew_cfg['reward_scaling'] - '''############################################ - Handle potential done conditions - ############################################''' + rew_dict[veh_id] += ( + rew_cfg.shaped_goal_distance_scaling + * (1 - np.abs(_angle_sub(veh_obj.heading, veh_obj.target_heading)) / (2 * np.pi)) + / rew_cfg.reward_scaling + ) + ############################################ + # Handle potential done conditions + ############################################ # achieved our goal - if info_dict[veh_id]['goal_achieved'] and self.cfg.get( - 'remove_at_goal', True): + if info_dict[veh_id]["goal_achieved"] and self.config.get("remove_at_goal", True): done_dict[veh_id] = True if veh_obj.getCollided(): - info_dict[veh_id]['collided'] = True - if int(veh_obj.collision_type) == 1: - info_dict[veh_id]['veh_veh_collision'] = True - if int(veh_obj.collision_type) == 2: - info_dict[veh_id]['veh_edge_collision'] = True - rew_dict[veh_id] -= np.abs( - rew_cfg['collision_penalty']) / rew_cfg['reward_scaling'] - if self.cfg.get('remove_at_collide', True): + info_dict[veh_id]["collided"] = True + if int(veh_obj.collision_type) == CollisionType.VEHICLE_VEHICLE.value: + info_dict[veh_id]["veh_veh_collision"] = True + elif int(veh_obj.collision_type) == CollisionType.VEHICLE_EDGE.value: + info_dict[veh_id]["veh_edge_collision"] = True + elif int(veh_obj.collision_type) != CollisionType.NONE.value: + raise ValueError(f"Unknown collision type: {veh_obj.collision_type}.") + rew_dict[veh_id] -= np.abs(rew_cfg.collision_penalty) / rew_cfg.reward_scaling + if self.config.get("remove_at_collide", True): done_dict[veh_id] = True - # remove the vehicle so that its trajectory doesn't continue. This is important - # in the multi-agent setting. + # remove the vehicle so that its trajectory doesn't continue. This is + # important in the multi-agent setting. if done_dict[veh_id]: self.done_ids.append(veh_id) - if (info_dict[veh_id]['goal_achieved'] - and self.cfg.get('remove_at_goal', True)) or ( - info_dict[veh_id]['collided'] - and self.cfg.get('remove_at_collide', True)): - objs_to_remove.append(veh_obj) - - for veh_obj in objs_to_remove: - self.scenario.removeVehicle(veh_obj) - - if self.cfg['rew_cfg']['shared_reward']: - total_reward = np.sum([rew_dict[key] for key in rew_dict.keys()]) - rew_dict = {key: total_reward for key in rew_dict.keys()} - - # fill in the missing observations if we should be doing so - if self.cfg['subscriber']['keep_inactive_agents']: - # force all vehicles done to be false since they should persist through the episode - done_dict = {key: False for key in self.all_vehicle_ids} - for key in self.all_vehicle_ids: - if key not in obs_dict.keys(): - obs_dict[key] = self.dead_feat - rew_dict[key] = 0.0 - info_dict[key]['goal_achieved'] = False - info_dict[key]['collided'] = False - info_dict[key]['veh_veh_collision'] = False - info_dict[key]['veh_edge_collision'] = False - - if self.step_num >= self.episode_length: - done_dict = {key: True for key in done_dict.keys()} - - all_done = True - for value in done_dict.values(): - all_done *= value - done_dict['__all__'] = all_done + if (info_dict[veh_id]["goal_achieved"] and self.config.get("remove_at_goal", True)) or ( + info_dict[veh_id]["collided"] and self.config.get("remove_at_collide", True) + ): + self.scenario.removeVehicle(veh_obj) + + if self.config.rew_cfg.shared_reward: + total_reward = np.sum(rew_dict.values()) + rew_dict = {key: total_reward for key in rew_dict} + + if self.step_num >= self.config.episode_length: + done_dict = {key: True for key in done_dict} + + done_dict["__all__"] = all(done_dict.values()) return obs_dict, rew_dict, done_dict, info_dict - def reset(self): - """See superclass.""" + def reset( # pylint: disable=arguments-differ,too-many-locals,too-many-branches,too-many-statements + self, + ) -> Dict[int, ObsType]: + """Reset the environment. + + Returns + ------- + Dict[int, ObsType]: Dictionary of observations for each vehicle. + """ self.t = 0 self.step_num = 0 - enough_vehicles = False # we don't want to initialize scenes with 0 actors after satisfying # all the conditions on a scene that we have - while not enough_vehicles: - self.file = self.files[np.random.randint(len(self.files))] - self.simulation = Simulation(os.path.join( - self.cfg['scenario_path'], self.file), - config=get_scenario_dict(self.cfg)) + for _ in range(_MAX_NUM_TRIES_TO_FIND_VALID_VEHICLE): + self.file = np.random.choice(self.files) + self.simulation = Simulation(str(self.config.data_path / self.file), config=self.config.scenario) self.scenario = self.simulation.getScenario() - '''################################################################## - Construct context dictionary of observations that can be used to - warm up policies by stepping all vehicles as experts. - #####################################################################''' - dead_obs = self.get_observation(self.scenario.getVehicles()[0]) - self.dead_feat = -np.ones( - dead_obs.shape[0] * self.n_frames_stacked) - # step all the vehicles forward by one second and record their observations as context - context_len = max(10, self.n_frames_stacked) + + ##################################################################### + # Construct context dictionary of observations that can be used to + # warm up policies by stepping all vehicles as experts. + ##################################################################### + dead_feat = -np.ones( + self.get_observation(self.scenario.getVehicles()[0]).shape[0] * self.config.subscriber.n_frames_stacked + ) + # step all the vehicles forward by one second and record their observations + # as context + self.config.scenario.context_length = max( + self.config.scenario.context_length, self.config.subscriber.n_frames_stacked + ) # Note: Consider raising an error if context_length < n_frames_stacked. self.context_dict = { - veh.getID(): - deque([self.dead_feat for _ in range(context_len)], - maxlen=context_len) + veh.getID(): deque( + [dead_feat for _ in range(self.config.scenario.context_length)], + maxlen=self.config.scenario.context_length, + ) for veh in self.scenario.getObjectsThatMoved() } for veh in self.scenario.getObjectsThatMoved(): veh.expert_control = True - for _ in range(10): + for _ in range(self.config.scenario.context_length): for veh in self.scenario.getObjectsThatMoved(): - self.context_dict[veh.getID()].append( - self.get_observation(veh)) - self.simulation.step(self.cfg['dt']) + self.context_dict[veh.getID()].append(self.get_observation(veh)) + self.simulation.step(self.config.dt) # now hand back control to our actual controllers for veh in self.scenario.getObjectsThatMoved(): veh.expert_control = False @@ -342,99 +318,75 @@ def reset(self): # remove all the objects that are in collision or are already in goal dist # additionally set the objects that have infeasible goals to be experts for veh_obj in self.simulation.getScenario().getObjectsThatMoved(): - obj_pos = veh_obj.getPosition() - obj_pos = np.array([obj_pos.x, obj_pos.y]) - goal_pos = veh_obj.getGoalPosition() - goal_pos = np.array([goal_pos.x, goal_pos.y]) - '''############################################ - Remove vehicles at goal - ############################################''' + obj_pos = _position_as_array(veh_obj.getPosition()) + goal_pos = _position_as_array(veh_obj.getGoalPosition()) + ############################################ + # Remove vehicles at goal + ############################################ norm = np.linalg.norm(goal_pos - obj_pos) - if norm < self.cfg['rew_cfg'][ - 'goal_tolerance'] or veh_obj.getCollided(): + if norm < self.config.rew_cfg.goal_tolerance or veh_obj.getCollided(): self.scenario.removeVehicle(veh_obj) - '''############################################ - Set all vehicles with unachievable goals to be experts - ############################################''' - if self.file in self.valid_veh_dict and veh_obj.getID( - ) in self.valid_veh_dict[self.file]: + ############################################ + # Set all vehicles with unachievable goals to be experts + ############################################ + if self.file in self.valid_veh_dict and veh_obj.getID() in self.valid_veh_dict[self.file]: veh_obj.expert_control = True - '''############################################ - Pick out the vehicles that we are controlling - ############################################''' - # ensure that we have no more than max_num_vehicles are controlled - temp_vehicles = self.scenario.getObjectsThatMoved() - np.random.shuffle(temp_vehicles) + ############################################ + # Pick out the vehicles that we are controlling + ############################################ + # Ensure that no more than max_num_vehicles are controlled + temp_vehicles = np.random.permutation(self.scenario.getObjectsThatMoved()) curr_index = 0 self.controlled_vehicles = [] - self.expert_controlled_vehicles = [] - self.vehicles_to_delete = [] for vehicle in temp_vehicles: - # this vehicle was invalid at the end of the 1 second context - # step so we need to remove it. - if np.isclose(vehicle.position.x, INVALID_POSITION): - self.vehicles_to_delete.append(vehicle) - # we don't want to include vehicles that had unachievable goals + # This vehicle was invalid at the end of the 1 second context + # step so we need to remove it + if np.isclose(vehicle.position.x, self.config.scenario.invalid_position): + self.scenario.removeVehicle(vehicle) + # We don't want to include vehicles that had unachievable goals # as controlled vehicles - elif not vehicle.expert_control and curr_index < self.cfg[ - 'max_num_vehicles']: + elif not vehicle.expert_control and curr_index < self.config.max_num_vehicles: self.controlled_vehicles.append(vehicle) curr_index += 1 else: - self.expert_controlled_vehicles.append(vehicle) - self.all_vehicle_ids = [ - veh.getID() for veh in self.controlled_vehicles - ] - # make all the vehicles that are in excess of max_num_vehicles controlled by an expert - for veh in self.expert_controlled_vehicles: - veh.expert_control = True - # remove vehicles that are currently at an invalid position - for veh in self.vehicles_to_delete: - self.scenario.removeVehicle(veh) + vehicle.expert_control = True + + self.all_vehicle_ids = [veh.getID() for veh in self.controlled_vehicles] # check that we have at least one vehicle or if we have just one file, exit anyways # or else we might be stuck in an infinite loop - if len(self.all_vehicle_ids) > 0 or len(self.files) == 1: - enough_vehicles = True - - # for one reason or another (probably we had a file where all the agents achieved their goals) - # we have no controlled vehicles - # just grab a vehicle even if it hasn't moved so that we have something - # to return obs for even if it's not controlled - # NOTE: this case only occurs during our eval procedure where we set the - # self.files list to be length 1. Otherwise, the while loop above will repeat - # until a file is found. - if len(self.all_vehicle_ids) == 0: - self.controlled_vehicles = [self.scenario.getVehicles()[0]] - self.all_vehicle_ids = [ - veh.getID() for veh in self.controlled_vehicles - ] + if len(self.all_vehicle_ids) > 0: + break + else: # No break in for-loop, i.e., no valid vehicle found in any of the files. + raise ValueError(f"No controllable vehicles in any of the {len(self.files)} scenes.") # construct the observations and goal normalizers obs_dict = {} self.goal_dist_normalizers = {} - max_goal_dist = -100 + max_goal_dist = -np.inf for veh_obj in self.controlled_vehicles: veh_id = veh_obj.getID() # store normalizers for each vehicle - obj_pos = veh_obj.getPosition() - obj_pos = np.array([obj_pos.x, obj_pos.y]) - goal_pos = veh_obj.getGoalPosition() - goal_pos = np.array([goal_pos.x, goal_pos.y]) + obj_pos = _position_as_array(veh_obj.getPosition()) + goal_pos = _position_as_array(veh_obj.getGoalPosition()) dist = np.linalg.norm(obj_pos - goal_pos) self.goal_dist_normalizers[veh_id] = dist # compute the obs self.context_dict[veh_id].append(self.get_observation(veh_obj)) - if self.n_frames_stacked > 1: + if self.config.subscriber.n_frames_stacked > 1: veh_deque = self.context_dict[veh_id] context_list = list( - islice(veh_deque, - len(veh_deque) - self.n_frames_stacked, - len(veh_deque))) + islice( + veh_deque, + len(veh_deque) - self.config.subscriber.n_frames_stacked, + len(veh_deque), + ) + ) obs_dict[veh_id] = np.concatenate(context_list) else: obs_dict[veh_id] = self.context_dict[veh_id][-1] - # pick the vehicle that has to travel the furthest distance and use it for rendering + # pick the vehicle that has to travel the furthest distance and use it for + # rendering if dist > max_goal_dist: # this attribute is just used for rendering of the view # from the ego frame @@ -442,114 +394,222 @@ def reset(self): max_goal_dist = dist self.done_ids = [] - # we should return obs for the missing agents - if self.cfg['subscriber']['keep_inactive_agents']: - max_id = max([int(key) for key in obs_dict.keys()]) - num_missing_agents = max( - 0, self.cfg['max_num_vehicles'] - len(obs_dict)) - for i in range(num_missing_agents): - obs_dict[max_id + i + 1] = self.dead_feat - self.dead_agent_ids = [ - max_id + i + 1 for i in range(num_missing_agents) - ] - self.all_vehicle_ids = list(obs_dict.keys()) - else: - self.dead_agent_ids = [] + + logging.debug("Scene: %s | Controlling vehicles: %s", self.file, [veh.id for veh in self.controlled_vehicles]) + return obs_dict - def get_observation(self, veh_obj): - """Return the observation for a particular vehicle.""" - ego_obs = self.scenario.ego_state(veh_obj) - if self.cfg['subscriber']['use_ego_state'] and self.cfg['subscriber'][ - 'use_observations']: - obs = np.concatenate( - (ego_obs, - self.scenario.flattened_visible_state( - veh_obj, - view_dist=self.cfg['subscriber']['view_dist'], - view_angle=self.cfg['subscriber']['view_angle'], - head_angle=veh_obj.head_angle))) - elif self.cfg['subscriber']['use_ego_state'] and not self.cfg[ - 'subscriber']['use_observations']: - obs = ego_obs - else: - obs = self.scenario.flattened_visible_state( - veh_obj, - view_dist=self.cfg['subscriber']['view_dist'], - view_angle=self.cfg['subscriber']['view_angle'], - head_angle=veh_obj.head_angle) + def get_observation(self, veh_obj: Vehicle) -> np.ndarray: + """Return the observation for a particular vehicle. + + Args + ---- + veh_obj (Vehicle): Vehicle object to get the observation for. + + Returns + ------- + np.ndarray: Observation for the vehicle. + """ + cur_position = _position_as_array(veh_obj.getPosition()) + obs = np.concatenate( + ( + self.scenario.ego_state(veh_obj) if self.config.subscriber.use_ego_state else [], + cur_position if self.config.subscriber.use_current_position else [], + self.scenario.flattened_visible_state( + veh_obj, self.config.subscriber.view_dist, self.config.subscriber.view_angle + ) + if self.config.subscriber.use_observations + else [], + ) + ) return obs - def make_all_vehicles_experts(self): + def make_all_vehicles_experts(self) -> None: """Force all vehicles to be experts.""" for veh in self.scenario.getVehicles(): veh.expert_control = True - def get_vehicles(self): - """Return the vehicles.""" - return self.scenario.getVehicles() - - def get_objects_that_moved(self): - """Return the objects that moved.""" - return self.scenario.getObjectsThatMoved() - - def render(self, mode=None): - """See superclass.""" - return self.scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - ) + def render(self, mode: Optional[bool] = None) -> Optional[RenderType]: # pylint: disable=unused-argument + """Render the environment. + + Args + ---- + mode (Optional[bool]): Render mode. + + Returns + ------- + Optional[RenderType]: Rendered image. + """ + return self.scenario.getImage(**self._render_settings) + + def render_ego(self, mode: Optional[bool] = None) -> Optional[RenderType]: # pylint: disable=unused-argument + """Render the ego vehicles. + + Args + ---- + mode (Optional[bool]): Render mode. - def render_ego(self, mode=None): - """See superclass.""" + Returns + ------- + Optional[RenderType]: Rendered image. + """ if self.render_vehicle.getID() in self.done_ids: return None - else: - return self.scenario.getConeImage( - source=self.render_vehicle, - view_dist=self.cfg['subscriber']['view_dist'], - view_angle=self.cfg['subscriber']['view_angle'], - head_angle=self.render_vehicle.head_angle, - img_width=1600, - img_height=1600, - padding=50.0, - draw_target_position=True, - ) + return self.scenario.getConeImage( + source=self.render_vehicle, + view_dist=self.config.subscriber.view_dist, + view_angle=self.config.subscriber.view_angle, + head_angle=self.render_vehicle.head_angle, + **self._render_settings, + ) - def render_features(self, mode=None): - """See superclass.""" + def render_features(self, mode: Optional[bool] = None) -> Optional[RenderType]: # pylint: disable=unused-argument + """Render the features. + + Args + ---- + mode (Optional[bool]): Render mode. + + Returns + ------- + Optional[RenderType]: Rendered image. + """ if self.render_vehicle.getID() in self.done_ids: return None - else: - return self.scenario.getFeaturesImage( - source=self.render_vehicle, - view_dist=self.cfg['subscriber']['view_dist'], - view_angle=self.cfg['subscriber']['view_angle'], - head_angle=self.render_vehicle.head_angle, - img_width=1600, - img_height=1600, - padding=50.0, - draw_target_position=True, - ) + return self.scenario.getFeaturesImage( + source=self.render_vehicle, + view_dist=self.config.subscriber.view_dist, + view_angle=self.config.subscriber.view_angle, + head_angle=self.render_vehicle.head_angle, + **self._render_settings, + ) - def seed(self, seed=None): - """Ensure determinism.""" - if seed is None: - np.random.seed(1) - else: + def seed(self, seed: Optional[int] = None) -> None: + """Seed the environment. + + Args + ---- + seed (Optional[int]): Seed to use. + """ + if seed is not None: np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) - def angle_sub(self, current_angle, target_angle) -> int: - """Subtract two angles to find the minimum angle between them.""" - # Subtract the angles, constraining the value to [0, 2 * np.pi) - diff = (target_angle - current_angle) % (2 * np.pi) + def _set_discrete_action_space(self) -> None: + """Set the discrete action space.""" + self.action_space = Discrete(self.config.accel_discretization * self.config.steering_discretization) + self.accel_grid = np.linspace( + -np.abs(self.config.accel_lower_bound), + self.config.accel_upper_bound, + self.config.accel_discretization, + ) + self.steering_grid = np.linspace( + -np.abs(self.config.steering_lower_bound), + self.config.steering_upper_bound, + self.config.steering_discretization, + ) - # If we are more than np.pi we're taking the long way around. - # Let's instead go in the shorter, negative direction - if diff > np.pi: - diff = -(2 * np.pi - diff) - return diff + self.idx_to_actions = {} + for i, (accel, steer) in enumerate(product(self.accel_grid, self.steering_grid)): + self.idx_to_actions[i] = [accel, steer] + + def _set_continuous_action_space(self) -> None: + """Set the continuous action space.""" + self.action_space = Box( + low=-np.array( + [ + np.abs(self.config.accel_lower_bound), + self.config.steering_lower_bound, + ] + ), + high=np.array( + [ + np.abs(self.config.accel_upper_bound), + self.config.steering_upper_bound, + ] + ), + ) + self.idx_to_actions = None + + +def _angle_sub(current_angle: float, target_angle: float) -> float: + """Subtract two angles to find the minimum angle between them. + + Args + ---- + current_angle (float): Current angle. + target_angle (float): Target angle. + + Returns + ------- + float: Minimum angle between the two angles. + """ + # Subtract the angles, constraining the value to [0, 2 * np.pi) + diff = (target_angle - current_angle) % (2 * np.pi) + + # If we are more than np.pi we're taking the long way around. + # Let's instead go in the shorter, negative direction + if diff > np.pi: + diff = -(2 * np.pi - diff) + return diff + + +def _apply_action_to_vehicle( + veh_obj: Vehicle, action: ActType, *, idx_to_actions: Optional[Dict[int, Tuple[float, float]]] = None +) -> None: + """Apply an action to a vehicle. + + Args + ---- + veh_obj (Vehicle): Vehicle object to apply the action to. + action (ActType): Action to apply to the vehicle. + + Optional Args + ------------- + idx_to_actions (Optional[Dict[int, Tuple[float, float]]]): Dictionary of actions to apply to the vehicle. + + Raises + ------ + NotImplementedError: If the action type is not supported. + """ + if isinstance(action, Action): + veh_obj.apply_action(action) + elif isinstance(action, np.ndarray): + veh_obj.apply_action(Action.from_numpy(action)) + elif isinstance(action, (tuple, list)): + veh_obj.acceleration = action[0] + veh_obj.steering = action[1] + elif isinstance(action, int) and idx_to_actions is not None: + accel, steer = idx_to_actions[action] + veh_obj.acceleration = accel + veh_obj.steering = steer + elif isinstance(action, np.int64): + accel, steer = idx_to_actions[action] + veh_obj.acceleration = accel + veh_obj.steering = steer + else: + raise NotImplementedError(f"Action type '{type(action)}' not supported.") + + +def _position_as_array(position: Vector2D) -> np.ndarray: + """Convert a position to an array. + + Args + ---- + position (Vector2D): Position to convert. + + Returns + ------- + np.ndarray: Position as an array. + """ + return np.array([position.x, position.y]) + + +if __name__ == "__main__": + # Load environment settings + with open("./configs/env_config.yaml", "r") as stream: + env_config = yaml.safe_load(stream) + + # Initialize environment + env = BaseEnv(config=env_config) diff --git a/nocturne/envs/wrappers.py b/nocturne/envs/wrappers.py deleted file mode 100644 index da846671..00000000 --- a/nocturne/envs/wrappers.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Wrappers and env constructors for the environments.""" -from gym.spaces import Box -import numpy as np - -from nocturne.envs import BaseEnv - - -class OnPolicyPPOWrapper(object): - """Wrapper to make env compatible with On-Policy code.""" - - def __init__(self, env, use_images=False): - """Wrap with appropriate observation spaces and make fixed length. - - Args - ---- - env ([type]): [description] - no_img_concat (bool, optional): If true, we don't concat images into the 'state' key - """ - self._env = env - self.use_images = use_images - - self.n = self.cfg.max_num_vehicles - obs_dict = self.reset() - # tracker used to match observations to actions - self.agent_ids = [] - self.feature_shape = obs_dict[0].shape - self.share_observation_space = [ - Box(low=-np.inf, - high=+np.inf, - shape=self.feature_shape, - dtype=np.float32) for _ in range(self.n) - ] - - @property - def observation_space(self): - """See superclass.""" - return [self._env.observation_space for _ in range(self.n)] - - @property - def action_space(self): - """See superclass.""" - return [self._env.action_space for _ in range(self.n)] - - def step(self, actions): - """Convert returned dicts to lists.""" - agent_actions = {} - for action_vec, agent_id in zip(actions, self.agent_ids): - agent_actions[agent_id] = action_vec - next_obses, rew, done, info = self._env.step(agent_actions) - obs_n = [] - rew_n = [] - done_n = [] - info_n = [] - for key in self.agent_ids: - if isinstance(next_obses[key], dict): - obs_n.append(next_obses[key]['features']) - else: - obs_n.append(next_obses[key]) - rew_n.append([rew[key]]) - done_n.append(done[key]) - agent_info = info[key] - agent_info['individual_reward'] = rew[key] - info_n.append(agent_info) - return obs_n, rew_n, done_n, info_n - - def reset(self): - """Convert observation dict to list.""" - obses = self._env.reset() - obs_n = [] - self.agent_ids = [] - for key in obses.keys(): - self.agent_ids.append(key) - if not hasattr(self, 'agent_key'): - self.agent_key = key - if isinstance(obses[key], dict): - obs_n.append(obses[key]['features']) - else: - obs_n.append(obses[key]) - return obs_n - - def render(self, mode=None): - """See superclass.""" - return self._env.render(mode) - - def seed(self, seed=None): - """See superclass.""" - self._env.seed(seed) - - def __getattr__(self, name): - """See superclass.""" - return getattr(self._env, name) - - -def create_env(cfg): - """Return the base environment.""" - env = BaseEnv(cfg) - return env - - -def create_ppo_env(cfg, rank=0): - """Return a PPO wrapped environment.""" - env = BaseEnv(cfg, rank=rank) - return OnPolicyPPOWrapper(env, use_images=cfg.img_as_state) diff --git a/nocturne/utils/eval/average_displacement.py b/nocturne/utils/eval/average_displacement.py deleted file mode 100644 index 4d6502ad..00000000 --- a/nocturne/utils/eval/average_displacement.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Average displacement error computation.""" -from collections import defaultdict -from itertools import repeat -import json -from multiprocessing import Pool -import os -import random - -import numpy as np -import torch - -from cfgs.config import PROCESSED_VALID_NO_TL, ERR_VAL -from nocturne import Simulation - -SIM_N_STEPS = 90 # number of steps per trajectory -GOAL_TOLERANCE = 0.5 - - -def _average_displacement_impl(arg): - trajectory_path, model, configs = arg - print(trajectory_path) - - scenario_config = configs['scenario_cfg'] - - view_dist = configs['dataloader_cfg']['view_dist'] - view_angle = configs['dataloader_cfg']['view_angle'] - state_normalization = configs['dataloader_cfg']['state_normalization'] - dt = configs['dataloader_cfg']['dt'] - - n_stacked_states = configs['dataloader_cfg']['n_stacked_states'] - state_size = configs['model_cfg']['n_inputs'] // n_stacked_states - state_dict = defaultdict(lambda: np.zeros(state_size * n_stacked_states)) - - # create expert simulation - sim_expert = Simulation(str(trajectory_path), scenario_config) - scenario_expert = sim_expert.getScenario() - vehicles_expert = scenario_expert.getVehicles() - objects_expert = scenario_expert.getObjectsThatMoved() - id2veh_expert = {veh.id: veh for veh in vehicles_expert} - - # create model simulation - sim_model = Simulation(str(trajectory_path), scenario_config) - scenario_model = sim_model.getScenario() - vehicles_model = scenario_model.getVehicles() - objects_model = scenario_model.getObjectsThatMoved() - - # set all objects to be expert-controlled - for obj in objects_expert: - obj.expert_control = True - for obj in objects_model: - obj.expert_control = True - - # in model sim, model will control vehicles that moved - controlled_vehicles = [ - veh for veh in vehicles_model if veh in objects_model - ] - random.shuffle(controlled_vehicles) - # controlled_vehicles = controlled_vehicles[:2] - - # warmup to build up state stacking - for i in range(n_stacked_states - 1): - for veh in controlled_vehicles: - ego_state = scenario_model.ego_state(veh) - visible_state = scenario_model.flattened_visible_state( - veh, view_dist=view_dist, view_angle=view_angle) - state = np.concatenate( - (ego_state, visible_state)) / state_normalization - state_dict[veh.getID()] = np.roll(state_dict[veh.getID()], - len(state)) - state_dict[veh.getID()][:len(state)] = state - sim_model.step(dt) - sim_expert.step(dt) - - for veh in controlled_vehicles: - veh.expert_control = False - - avg_displacements = [] - final_displacements = [0 for _ in controlled_vehicles] - collisions = [False for _ in controlled_vehicles] - goal_achieved = [False for _ in controlled_vehicles] - for i in range(SIM_N_STEPS - n_stacked_states): - for veh in controlled_vehicles: - if np.isclose(veh.position.x, ERR_VAL): - veh.expert_control = True - else: - veh.expert_control = False - # set model actions - all_states = [] - for veh in controlled_vehicles: - # get vehicle state - state = np.concatenate( - (scenario_model.ego_state(veh), - scenario_model.flattened_visible_state( - veh, view_dist=view_dist, - view_angle=view_angle))) / state_normalization - # stack state - state_dict[veh.getID()] = np.roll(state_dict[veh.getID()], - len(state)) - state_dict[veh.getID()][:len(state)] = state - all_states.append(state_dict[veh.getID()]) - all_states = torch.as_tensor(np.array(all_states), dtype=torch.float32) - - # compute vehicle actions - all_actions = model(all_states, deterministic=True - ) # /!\ this returns an array (2,n) and not (n,2) - accel_actions = all_actions[0].cpu().numpy() - steering_actions = all_actions[1].cpu().numpy() - # set vehicles actions - for veh, accel_action, steering_action in zip(controlled_vehicles, - accel_actions, - steering_actions): - veh.acceleration = accel_action - veh.steering = steering_action - - # step simulations - sim_expert.step(dt) - sim_model.step(dt) - - # compute displacements over non-collided vehicles - displacements = [] - for i, veh in enumerate(controlled_vehicles): - # get corresponding vehicle in expert simulation - expert_veh = id2veh_expert[veh.id] - # make sure it is valid - if np.isclose(expert_veh.position.x, - ERR_VAL) or expert_veh.collided: - continue - # print(expert_veh.position, veh.position) - # compute displacement - expert_pos = id2veh_expert[veh.id].position - model_pos = veh.position - pos_diff = (model_pos - expert_pos).norm() - displacements.append(pos_diff) - final_displacements[i] = pos_diff - if veh.collided: - collisions[i] = True - if (veh.position - veh.target_position).norm() < GOAL_TOLERANCE: - goal_achieved[i] = True - - # average displacements over all vehicles - if len(displacements) > 0: - avg_displacements.append(np.mean(displacements)) - # print(displacements, np.mean(displacements)) - - # average displacements over all time steps - avg_displacement = np.mean( - avg_displacements) if len(avg_displacements) > 0 else np.nan - final_displacement = np.mean( - final_displacements) if len(final_displacements) > 0 else np.nan - avg_collisions = np.mean(collisions) if len(collisions) > 0 else np.nan - avg_goals = np.mean(goal_achieved) if len(goal_achieved) > 0 else np.nan - print('displacements', avg_displacement) - print('final_displacement', final_displacement) - print('collisions', avg_collisions) - print('goal_rate', avg_goals) - return avg_displacement, final_displacement, avg_collisions, avg_goals - - -def compute_average_displacement(trajectories_dir, model, configs): - """Compute average displacement error between a model and the ground truth.""" - NUM_FILES = 200 - # get trajectories paths - with open(os.path.join(trajectories_dir, 'valid_files.json')) as file: - valid_veh_dict = json.load(file) - files = list(valid_veh_dict.keys()) - # sort the files so that we have a consistent order - np.random.seed(0) - np.random.shuffle(files) - # compute average displacement over each individual trajectory file - trajectories_paths = files[:NUM_FILES] - for i, trajectory in enumerate(trajectories_paths): - trajectories_paths[i] = os.path.join(trajectories_dir, trajectory) - with Pool(processes=14) as pool: - result = list( - pool.map(_average_displacement_impl, - zip(trajectories_paths, repeat(model), repeat(configs)))) - average_displacements = np.array(result)[:, 0] - final_displacements = np.array(result)[:, 1] - average_collisions = np.array(result)[:, 2] - average_goals = np.array(result)[:, 3] - print(average_displacements, final_displacements, average_collisions, - average_goals) - - return [ - np.mean(average_displacements[~np.isnan(average_displacements)]), - np.std(average_displacements[~np.isnan(average_displacements)]) - ], [ - np.mean(final_displacements[~np.isnan(final_displacements)]), - np.std(final_displacements[~np.isnan(final_displacements)]) - ], [ - np.mean(average_collisions[~np.isnan(average_collisions)]), - np.std(average_collisions[~np.isnan(average_displacements)]) - ], [ - np.mean(average_goals[~np.isnan(average_goals)]), - np.std(average_goals[~np.isnan(average_goals)]) - ] - - -if __name__ == '__main__': - from examples.imitation_learning.model import ImitationAgent # noqa: F401 - model = torch.load( - '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\ - ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_600.pth' - ).to('cpu') - model.actions_grids = [x.to('cpu') for x in model.actions_grids] - model.eval() - model.nn[0].eval() - with open( - '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\ - ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/configs.json', - 'r') as fp: - configs = json.load(fp) - configs['device'] = 'cpu' - with torch.no_grad(): - ade, fde, collisions, goals = compute_average_displacement( - PROCESSED_VALID_NO_TL, model=model, configs=configs) - print(f'Average Displacement Error: {ade[0]:.3f} ± {ade[1]:.3f} meters') - print(f'Final Displacement Error: {fde[0]:.3f} ± {fde[1]:.3f} meters') - print(f'Average Collisions: {collisions[0]:.3f} ± {collisions[1]:.3f}%') - print( - f'Average Success at getting to goal: {goals[0]:.3f} ± {goals[1]:.3f}%' - ) diff --git a/nocturne/utils/eval/collision_rate.py b/nocturne/utils/eval/collision_rate.py deleted file mode 100644 index 38e29755..00000000 --- a/nocturne/utils/eval/collision_rate.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Collision rate computation.""" -from pathlib import Path -import numpy as np -import torch - -from nocturne import Simulation -from cfgs.config import ERR_VAL as INVALID_POSITION - - -SIM_N_STEPS = 90 # number of steps per trajectory -SIM_STEP_TIME = 0.1 # dt (in seconds) - - -def _collision_rate_impl(trajectory_path, model=None, sim_allow_non_vehicles=True, check_vehicles_only=True): - # create expert simulation - sim = Simulation(scenario_path=str(trajectory_path), start_time=0, allow_non_vehicles=sim_allow_non_vehicles) - scenario = sim.getScenario() - vehicles = scenario.getVehicles() - objects_that_moved = scenario.getObjectsThatMoved() - vehicles_that_moved = [veh for veh in vehicles if veh in objects_that_moved] - - # set all objects to be expert-controlled - for obj in objects_that_moved: - obj.expert_control = True - for obj in vehicles: - obj.expert_control = True - - # if a model is given, model will control vehicles that moved - if model is not None: - controlled_vehicles = vehicles_that_moved - for veh in controlled_vehicles: - veh.expert_control = False - else: - controlled_vehicles = [] - - # vehicles to check for collisions on - objects_to_check = [ - obj for obj in (vehicles_that_moved if check_vehicles_only else objects_that_moved) - if (obj.target_position - obj.position).norm() > 0.5 - ] - - # step sim until the end and check for collisions - collided_with_vehicle = {obj.id: False for obj in objects_to_check} - collided_with_edge = {obj.id: False for obj in objects_to_check} - for i in range(SIM_N_STEPS): - # set model actions - for veh in controlled_vehicles: - # get vehicle state - state = torch.as_tensor(np.expand_dims(np.concatenate( - (scenario.ego_state(veh), - scenario.flattened_visible_state(veh, view_dist=120, view_angle=3.14)) - ), axis=0), dtype=torch.float32) - # compute vehicle action - action = model(state)[0] - # set vehicle action - veh.acceleration = action[0] - veh.steering = action[1] - - # step simulation - sim.step(SIM_STEP_TIME) - - # check for collisions - for obj in objects_to_check: - if not np.isclose(obj.position.x, INVALID_POSITION) and obj.collided: - if int(obj.collision_type) == 1: - collided_with_vehicle[obj.id] = True - if int(obj.collision_type) == 2: - collided_with_edge[obj.id] = True - - # compute collision rate - collisions_with_vehicles = list(collided_with_vehicle.values()) - collisions_with_edges = list(collided_with_edge.values()) - collision_rate_vehicles = collisions_with_vehicles.count(True) / len(collisions_with_vehicles) - collision_rate_edges = collisions_with_edges.count(True) / len(collisions_with_edges) - - return collision_rate_vehicles, collision_rate_edges - - -def compute_average_collision_rate(trajectories_dir, model=None, **kwargs): - """Compute average collision rate for a model.""" - # get trajectories paths - if isinstance(trajectories_dir, str): - # if trajectories_dir is a string, treat it as the path to a directory of trajectories - trajectories_dir = Path(trajectories_dir) - trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json')) - elif isinstance(trajectories_dir, list): - # if trajectories_dir is a list, treat it as a list of paths to trajectory files - trajectories_paths = [Path(path) for path in trajectories_dir] - # compute average collision rate over each individual trajectory file - average_collision_rates = np.array(list(map( - lambda path: _collision_rate_impl(path, model, **kwargs), - trajectories_paths - ))) - - return np.mean(average_collision_rates, axis=0) - - -if __name__ == '__main__': - from nocturne.utils.imitation_learning.waymo_data_loader import ImitationAgent # noqa: F401 - model = torch.load('model.pth') - collisions_with_vehicles, collisions_with_road_lines = \ - compute_average_collision_rate('dataset/json_files', model=None) - print(f'Average Collision Rate: {100*collisions_with_vehicles:.2f}% with vehicles, ' - f'{100*collisions_with_road_lines:.2f}% with road lines') diff --git a/nocturne/utils/eval/goal_by_intersection.py b/nocturne/utils/eval/goal_by_intersection.py deleted file mode 100644 index e3d36f20..00000000 --- a/nocturne/utils/eval/goal_by_intersection.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Goal reaching rate and collision rate computation as a function of number of intersections in expert trajectory.""" -from pathlib import Path -import numpy as np -import torch -from collections import defaultdict -import random -import json - -from nocturne import Simulation -from cfgs.config import ERR_VAL as INVALID_POSITION -from multiprocessing import Pool -from itertools import repeat, combinations - -SIM_N_STEPS = 90 # number of steps per trajectory -GOAL_TOLERANCE = 0.5 - - -def _compute_expert_intersections(trajectory_path): - with open(trajectory_path, 'r') as fp: - data = json.load(fp) - - segments = defaultdict(list) - for veh_id, veh in enumerate(data['objects']): - # note: i checked and veh_id is consistent with how it's loaded in simulation - - for i in range(len(veh['position']) - 1): - # compute polyline (might not be continuous since we have invalid positions) - segment = np.array([ - [veh['position'][i]['x'], veh['position'][i]['y']], - [veh['position'][i + 1]['x'], veh['position'][i + 1]['y']], - ]) - - # if segment doesnt contain an invalid position, append to trajectory - if np.isclose(segment, INVALID_POSITION).any(): - continue - segments[veh_id].append(segment) - - # go over pair of vehicles and check if their segments intersect - n_collisions = defaultdict(int) - for veh1, veh2 in combinations(segments.keys(), 2): - # get corresponding segments - segments1 = np.array(segments[veh1]) - segments2 = np.array(segments[veh2]) - - # check bounding rectangle intersection - O(n) - xmin1, ymin1 = np.min(np.min(segments1, axis=0), axis=0) - xmax1, ymax1 = np.max(np.max(segments1, axis=0), axis=0) - xmin2, ymin2 = np.min(np.min(segments2, axis=0), axis=0) - xmax2, ymax2 = np.max(np.max(segments2, axis=0), axis=0) - - if xmax1 <= xmin2 or xmax2 <= xmin1 or ymax1 <= ymin2 or ymax2 <= ymin1: - # segments can't intersect since their bounding rectangle don't intersect - continue - - # check intersection over pairs of segments - O(n^2) - - # construct numpy array of shape (N = len(segments1) * len(segments2), 4, 2) - # where each element contain 4 points ABCD (segment AB of segments1 and segment CD of segments2) - idx1 = np.repeat( - np.arange(len(segments1)), - len(segments2)) # build indexes 1 1 1 2 2 2 3 3 3 4 4 4 - idx2 = np.tile(np.arange(len(segments2)), - len(segments1)) # build indexes 1 2 3 1 2 3 1 2 3 1 2 3 - segment_pairs = np.concatenate( - (segments1[idx1], segments2[idx2]), - axis=1) # concatenate to create all pairs - - # now we need to check if at least one element ABCD contains an intersection between segment AB and segment CD - def ccw(A, B, C): - return (C[:, 1] - A[:, 1]) * (B[:, 0] - A[:, 0]) > ( - B[:, 1] - A[:, 1]) * (C[:, 0] - A[:, 0]) - - # ABCD are each arrays of N points (shape (N, 2)) - A = segment_pairs[:, 0] - B = segment_pairs[:, 1] - C = segment_pairs[:, 2] - D = segment_pairs[:, 3] - if np.logical_and( - ccw(A, C, D) != ccw(B, C, D), - ccw(A, B, C) != ccw(A, B, D)).any(): - n_collisions[veh1] += 1 - n_collisions[veh2] += 1 - - return n_collisions - - -def _intesection_metrics_impl(trajectory_path, model, configs): - print(trajectory_path) - - scenario_config = configs['scenario_cfg'] - - view_dist = configs['dataloader_cfg']['view_dist'] - view_angle = configs['dataloader_cfg']['view_angle'] - state_normalization = configs['dataloader_cfg']['state_normalization'] - dt = configs['dataloader_cfg']['dt'] - - n_stacked_states = configs['dataloader_cfg']['n_stacked_states'] - state_size = configs['model_cfg']['n_inputs'] // n_stacked_states - state_dict = defaultdict(lambda: np.zeros(state_size * n_stacked_states)) - - # create model simulation - sim = Simulation(str(trajectory_path), scenario_config) - scenario = sim.getScenario() - vehicles = scenario.getVehicles() - objects = scenario.getObjectsThatMoved() - - # set all objects to be expert-controlled - for obj in objects: - obj.expert_control = True - - # in model sim, model will control vehicles that moved - controlled_vehicles = [veh for veh in vehicles if veh in objects] - - # only control 2 vehicles at random - random.shuffle(controlled_vehicles) - # controlled_vehicles = controlled_vehicles[:2] - - # warmup to build up state stacking - for i in range(n_stacked_states - 1): - for veh in controlled_vehicles: - ego_state = scenario.ego_state(veh) - visible_state = scenario.flattened_visible_state( - veh, view_dist=view_dist, view_angle=view_angle) - state = np.concatenate( - (ego_state, visible_state)) / state_normalization - state_dict[veh.getID()] = np.roll(state_dict[veh.getID()], - len(state)) - state_dict[veh.getID()][:len(state)] = state - sim.step(dt) - - for veh in controlled_vehicles: - veh.expert_control = False - - collisions = [False] * len(controlled_vehicles) - goal_achieved = [False] * len(controlled_vehicles) - for i in range(SIM_N_STEPS - n_stacked_states): - for veh in controlled_vehicles: - if np.isclose(veh.position.x, INVALID_POSITION): - veh.expert_control = True - else: - veh.expert_control = False - # set model actions - # get all actions at once - all_states = [] - for veh in controlled_vehicles: - # get vehicle state - state = np.concatenate( - (scenario.ego_state(veh), - scenario.flattened_visible_state( - veh, view_dist=view_dist, - view_angle=view_angle))) / state_normalization - # stack state - state_dict[veh.getID()] = np.roll(state_dict[veh.getID()], - len(state)) - state_dict[veh.getID()][:len(state)] = state - all_states.append(state_dict[veh.getID()]) - all_states = torch.as_tensor(np.array(all_states), dtype=torch.float32) - - # compute vehicle actions - all_actions = model(all_states, deterministic=True - ) # /!\ this returns an array (2,n) and not (n,2) - accel_actions = all_actions[0].cpu().numpy() - steering_actions = all_actions[1].cpu().numpy() - # set vehicles actions - for veh, accel_action, steering_action in zip(controlled_vehicles, - accel_actions, - steering_actions): - veh.acceleration = accel_action - veh.steering = steering_action - - # step simulation - sim.step(dt) - - # compute displacements over non-collided vehicles - for i, veh in enumerate(controlled_vehicles): - # make sure it is valid - if np.isclose(veh.position.x, INVALID_POSITION): - continue - - # a collision with another a vehicle - if veh.collided and int(veh.collision_type) == 1: - collisions[i] = True - if (veh.position - veh.target_position).norm() < GOAL_TOLERANCE: - goal_achieved[i] = True - - # compute expert intersections for all vehicles (mapping veh_id -> nb of intersections in expert traj) - intersection_data = _compute_expert_intersections(trajectory_path) - - # compute metrics as a function of number of intersections - - collision_rates = np.zeros(4) - goal_rates = np.zeros(4) - counts = np.zeros(4) - for i, veh in enumerate(controlled_vehicles): - n_intersections = min(intersection_data[veh.getID()], 3) - counts[n_intersections] += 1 - if collisions[i]: - collision_rates[n_intersections] += 1 - if goal_achieved[i]: - goal_rates[n_intersections] += 1 - collision_rates /= counts - goal_rates /= counts - # note: returned values can contain NaN - - return collision_rates, goal_rates - - -def compute_metrics_by_intersection(trajectories_dir, model, configs): - """Compute metrics as a function of number of intesections in a vehicle's expert trajectory.""" - NUM_FILES = 200 - NUM_CPUS = 14 - - # get trajectories paths - trajectories_dir = Path(trajectories_dir) - trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json')) - trajectories_paths.sort() - trajectories_paths = trajectories_paths[:NUM_FILES] - - # parallel metric computation - with Pool(processes=NUM_CPUS) as pool: - result = np.array( - list( - pool.starmap( - _intesection_metrics_impl, - zip(trajectories_paths, repeat(model), repeat(configs))))) - assert result.shape == (len(trajectories_paths), 2, 4 - ) # collision rates, goal rates (in 4 bins) - avg_result = np.nanmean(result, axis=0) # nanmean ignores NaN values - print(avg_result) - return avg_result - - -if __name__ == '__main__': - from examples.imitation_learning.model import ImitationAgent # noqa: F401 - model = torch.load( - '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17/\ - ++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/model_600.pth' - ).to('cpu') - model.actions_grids = [x.to('cpu') for x in model.actions_grids] - model.eval() - model.nn[0].eval() - with open( - '/checkpoint/eugenevinitsky/nocturne/test/2022.06.05/test/14.23.17\ - /++device=cuda,++file_limit=1000/train_logs/2022_06_05_14_23_23/configs.json', - 'r') as fp: - configs = json.load(fp) - configs['device'] = 'cpu' - with torch.no_grad(): - result = compute_metrics_by_intersection( - '/checkpoint/eugenevinitsky/waymo_open/motion_v1p1/\ - uncompressed/scenario/formatted_json_v2_no_tl_valid', - model=model, - configs=configs) - print('collision rates', result[0]) - print('goal rates', result[1]) diff --git a/nocturne/utils/eval/goal_reaching_rate.py b/nocturne/utils/eval/goal_reaching_rate.py deleted file mode 100644 index e0ccfee3..00000000 --- a/nocturne/utils/eval/goal_reaching_rate.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Goal reaching rate computation.""" -from pathlib import Path -import numpy as np -import torch - -from nocturne import Simulation - -SIM_N_STEPS = 90 # number of steps per trajectory -SIM_STEP_TIME = 0.1 # dt (in seconds) - - -def _goal_reaching_rate_impl(trajectory_path, - model=None, - sim_allow_non_vehicles=True, - check_vehicles_only=True): - # create expert simulation - sim = Simulation(scenario_path=str(trajectory_path), - start_time=0, - allow_non_vehicles=sim_allow_non_vehicles) - scenario = sim.getScenario() - vehicles = scenario.getVehicles() - objects_that_moved = scenario.getObjectsThatMoved() - vehicles_that_moved = [ - veh for veh in vehicles if veh in objects_that_moved - ] - - # set all objects to be expert-controlled - for obj in objects_that_moved: - obj.expert_control = True - for obj in vehicles: - obj.expert_control = True - - # if a model is given, model will control vehicles that moved - if model is not None: - controlled_vehicles = vehicles_that_moved - for veh in controlled_vehicles: - veh.expert_control = False - else: - controlled_vehicles = [] - - # vehicles to check for collisions on - objects_to_check = vehicles_that_moved if check_vehicles_only else objects_that_moved - - # step sim until the end and check for collisions - reached_goal = {obj.id: False for obj in objects_to_check} - for i in range(SIM_N_STEPS): - # set model actions - for veh in controlled_vehicles: - # get vehicle state - state = torch.as_tensor(np.expand_dims(np.concatenate( - (scenario.ego_state(veh), - scenario.flattened_visible_state(veh, - view_dist=120, - view_angle=3.14))), - axis=0), - dtype=torch.float32) - # compute vehicle action - action = model(state)[0] - # set vehicle action - veh.acceleration = action[0] - veh.steering = action[1] - - # step simulation - sim.step(SIM_STEP_TIME) - - # check for collisions - for obj in objects_to_check: - if (obj.target_position - obj.position).norm() < 0.5: - reached_goal[obj.id] = True - - # compute collision rate - reached_goal_values = list(reached_goal.values()) - reached_goal_rate = reached_goal_values.count(True) / len( - reached_goal_values) - - return reached_goal_rate - - -def compute_average_goal_reaching_rate(trajectories_dir, model=None, **kwargs): - """Compute average goal reaching rate for a model.""" - # get trajectories paths - if isinstance(trajectories_dir, str): - # if trajectories_dir is a string, treat it as the path to a directory of trajectories - trajectories_dir = Path(trajectories_dir) - trajectories_paths = list(trajectories_dir.glob('*tfrecord*.json')) - elif isinstance(trajectories_dir, list): - # if trajectories_dir is a list, treat it as a list of paths to trajectory files - trajectories_paths = [Path(path) for path in trajectories_dir] - # compute average collision rate over each individual trajectory file - average_goal_reaching_rates = np.array( - list( - map(lambda path: _goal_reaching_rate_impl(path, model, **kwargs), - trajectories_paths))) - - return np.mean(average_goal_reaching_rates) - - -if __name__ == '__main__': - from nocturne.utils.imitation_learning.waymo_data_loader import ImitationAgent # noqa: F401 - model = torch.load('model.pth') - goal_reaching_rate = compute_average_goal_reaching_rate( - 'dataset/json_files', model=None) - print(f'Average Goal Reaching Rate: {100*goal_reaching_rate:.2f}%') diff --git a/nocturne/wrappers/sb3_wrappers.py b/nocturne/wrappers/sb3_wrappers.py new file mode 100644 index 00000000..ec944b6f --- /dev/null +++ b/nocturne/wrappers/sb3_wrappers.py @@ -0,0 +1,86 @@ + +import gymnasium +import numpy as np +import gym + +class NocturneToSB3(gymnasium.Env): + """Makes Nocturne env compatible with SB3. + ! NOTE: Controlling a single agent. + """ + + def __init__(self, nocturne_env: gym.Env): + self.env = nocturne_env + self.action_space = gymnasium.spaces.Discrete(self.env.action_space.n) + self.observation_space = gymnasium.spaces.Box( + -np.inf, np.inf, self.env.observation_space.shape, np.float32 + ) + + def step(self, action): + """Take a step in the environment, convert dicts to np arrays. + + Args: + action (Dict): Dictionary with a single action for the controlled vehicle. + + Returns: + observation, reward, terminated, truncated, info (np.ndarray, float, bool, bool, dict) + """ + next_obs_dict, rewards_dict, dones_dict, info_dict = self.env.step( + action_dict={self.controlled_vehicle: action} + ) + + return ( + next_obs_dict[self.controlled_vehicle], + rewards_dict[self.controlled_vehicle], + dones_dict[self.controlled_vehicle], + False, + info_dict[self.controlled_vehicle], + ) + + def reset(self, seed=None): + """Reset the environment.""" + obs_dict = self.env.reset() + assert ( + len(self.env.controlled_vehicles) == 1 + ), "This wrapper does not support multi-agent control." + + self.controlled_vehicle = self.env.controlled_vehicles[0].id + return obs_dict[self.controlled_vehicle], {} + + @property + def action_space(self): + return self.env.action_space + + @action_space.setter + def action_space(self, action_space): + self.env.action_space = action_space + + @property + def observation_space(self): + return self.env.observation_space + + @observation_space.setter + def observation_space(self, observation_space): + self.env.observation_space = observation_space + + def render(self): + pass + + def close(self): + pass + + @property + def seed(self, seed=None): + return None + + @seed.setter + def seed(self, seed=None): + pass + + def __getattr__(self, name): + return getattr(self._env, name) + + def get_attr(self, attr_name: str): + return getattr(self._env, attr_name) + + def set_attr(self, attr_name: str): + setattr(self._env, attr_name) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e62c4129 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,89 @@ +[tool.poetry] +name = "nocturne" +version = "0.0.1" +description = "A data-driven, fast driving simulator for multi-agent coordination under partial observability." +authors = [ + "Nathan Lichtlé ", + "Eugene Vinitsky ", + "Xiaomeng Yang " +] +maintainers = [ + "Daphne Cornelisse ", + "Eugene Vinitsky " +] +homepage = "https://github.com/Emerge-Lab/nocturne" +repository = "https://github.com/Emerge-Lab/nocturne" +documentation = "https://nocturne.readthedocs.io/" +license = "MIT" +readme = "README.md" +keywords = ["Driving", "Simulation", "Autonomous Vehicles", "Waymo", "Reinforcement Learning"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Utilities", + "Programming Language :: C++", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: C++", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +[tool.poetry.urls] +"Bug Tracker" = "https://github.com/Emerge-Lab/nocturne/issues" +"Discussions" = "https://github.com/Emerge-Lab/nocturne/discussions" +"Changelog" = "https://nocturne.readthedocs.io/en/latest/changelog.html" +"Chat" = "https://gitter.im/nocturne/Lobby" + +[tool.poetry.dependencies] +python = ">=3.10,<3.13" +numpy = "^1.26.0" +torch = "^2.0.1" +gym = "^0.26.2" +pybind11 = "^2.11.1" +python-box = "^7.1.1" +gymnasium = "^0.29.1" + +[tool.poetry.group.research.dependencies] +ipykernel = "^6.25.2" +matplotlib = "^3.8.0" +seaborn = "^0.13.0" +pandas = "^2.1.1" +wandb = "^0.15.12" +tensorboard = "^2.14.1" + +[tool.poetry.group.dev.dependencies] +pre-commit = "^3.4.0" +flake8 = "^6.1.0" +black = "^23.9.1" +isort = "^5.12.0" +pylint = "^3.0.0" +tomli = "^2.0.1" + +[tool.poetry.build] +script = "build.py" +generate-setup-file = true + +[tool.black] +line-length = 120 + +[tool.flake8] +max-line-length = 120 +extend-ignore = "E203" + +[tool.pydocstyle] +convention = "google" + +[tool.pylint] +max-line-length = 120 +disable = "W1514" + +[tool.isort] +profile = "black" + +[build-system] +requires = ["poetry-core", "pybind11>=2.11.1", "setuptools>=68.2.2"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.dev.txt b/requirements.dev.txt new file mode 100644 index 00000000..be8f9e62 --- /dev/null +++ b/requirements.dev.txt @@ -0,0 +1,27 @@ +astroid==3.0.0 ; python_version >= "3.10" and python_version < "3.13" +black==23.9.1 ; python_version >= "3.10" and python_version < "3.13" +cfgv==3.4.0 ; python_version >= "3.10" and python_version < "3.13" +click==8.1.7 ; python_version >= "3.10" and python_version < "3.13" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") +dill==0.3.7 ; python_version >= "3.10" and python_version < "3.13" +distlib==0.3.7 ; python_version >= "3.10" and python_version < "3.13" +filelock==3.12.4 ; python_version >= "3.10" and python_version < "3.13" +flake8==6.1.0 ; python_version >= "3.10" and python_version < "3.13" +identify==2.5.30 ; python_version >= "3.10" and python_version < "3.13" +isort==5.12.0 ; python_version >= "3.10" and python_version < "3.13" +mccabe==0.7.0 ; python_version >= "3.10" and python_version < "3.13" +mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "3.13" +nodeenv==1.8.0 ; python_version >= "3.10" and python_version < "3.13" +packaging==23.2 ; python_version >= "3.10" and python_version < "3.13" +pathspec==0.11.2 ; python_version >= "3.10" and python_version < "3.13" +platformdirs==3.11.0 ; python_version >= "3.10" and python_version < "3.13" +pre-commit==3.4.0 ; python_version >= "3.10" and python_version < "3.13" +pycodestyle==2.11.0 ; python_version >= "3.10" and python_version < "3.13" +pyflakes==3.1.0 ; python_version >= "3.10" and python_version < "3.13" +pylint==3.0.0 ; python_version >= "3.10" and python_version < "3.13" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "3.13" +setuptools==68.2.2 ; python_version >= "3.10" and python_version < "3.13" +tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.13" +tomlkit==0.12.1 ; python_version >= "3.10" and python_version < "3.13" +typing-extensions==4.8.0 ; python_version >= "3.10" and python_version < "3.11" +virtualenv==20.24.5 ; python_version >= "3.10" and python_version < "3.13" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..ab0a0465 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,98 @@ +absl-py==2.0.0 ; python_version >= "3.10" and python_version < "3.13" +appdirs==1.4.4 ; python_version >= "3.10" and python_version < "3.13" +appnope==0.1.3 ; python_version >= "3.10" and python_version < "3.13" and (platform_system == "Darwin" or sys_platform == "darwin") +asttokens==2.4.0 ; python_version >= "3.10" and python_version < "3.13" +backcall==0.2.0 ; python_version >= "3.10" and python_version < "3.13" +cachetools==5.3.1 ; python_version >= "3.10" and python_version < "3.13" +certifi==2023.7.22 ; python_version >= "3.10" and python_version < "3.13" +cffi==1.16.0 ; python_version >= "3.10" and python_version < "3.13" and implementation_name == "pypy" +charset-normalizer==3.3.0 ; python_version >= "3.10" and python_version < "3.13" +click==8.1.7 ; python_version >= "3.10" and python_version < "3.13" +cloudpickle==2.2.1 ; python_version >= "3.10" and python_version < "3.13" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") +comm==0.1.4 ; python_version >= "3.10" and python_version < "3.13" +contourpy==1.1.1 ; python_version >= "3.10" and python_version < "3.13" +cycler==0.12.0 ; python_version >= "3.10" and python_version < "3.13" +debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13" +decorator==5.1.1 ; python_version >= "3.10" and python_version < "3.13" +docker-pycreds==0.4.0 ; python_version >= "3.10" and python_version < "3.13" +exceptiongroup==1.1.3 ; python_version >= "3.10" and python_version < "3.11" +executing==2.0.0 ; python_version >= "3.10" and python_version < "3.13" +farama-notifications==0.0.4 ; python_version >= "3.10" and python_version < "3.13" +filelock==3.12.4 ; python_version >= "3.10" and python_version < "3.13" +fonttools==4.43.0 ; python_version >= "3.10" and python_version < "3.13" +fsspec==2023.9.2 ; python_version >= "3.10" and python_version < "3.13" +gitdb==4.0.10 ; python_version >= "3.10" and python_version < "3.13" +gitpython==3.1.37 ; python_version >= "3.10" and python_version < "3.13" +google-auth-oauthlib==1.0.0 ; python_version >= "3.10" and python_version < "3.13" +google-auth==2.23.2 ; python_version >= "3.10" and python_version < "3.13" +grpcio==1.59.0 ; python_version >= "3.10" and python_version < "3.13" +gym-notices==0.0.8 ; python_version >= "3.10" and python_version < "3.13" +gym==0.26.2 ; python_version >= "3.10" and python_version < "3.13" +gymnasium==0.29.1 ; python_version >= "3.10" and python_version < "3.13" +idna==3.4 ; python_version >= "3.10" and python_version < "3.13" +ipykernel==6.25.2 ; python_version >= "3.10" and python_version < "3.13" +ipython==8.16.1 ; python_version >= "3.10" and python_version < "3.13" +jedi==0.19.1 ; python_version >= "3.10" and python_version < "3.13" +jinja2==3.1.2 ; python_version >= "3.10" and python_version < "3.13" +jupyter-client==8.3.1 ; python_version >= "3.10" and python_version < "3.13" +jupyter-core==5.3.2 ; python_version >= "3.10" and python_version < "3.13" +kiwisolver==1.4.5 ; python_version >= "3.10" and python_version < "3.13" +markdown==3.4.4 ; python_version >= "3.10" and python_version < "3.13" +markupsafe==2.1.3 ; python_version >= "3.10" and python_version < "3.13" +matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "3.13" +matplotlib==3.8.0 ; python_version >= "3.10" and python_version < "3.13" +mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.13" +nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.13" +networkx==3.1 ; python_version >= "3.10" and python_version < "3.13" +numpy==1.26.0 ; python_version >= "3.10" and python_version < "3.13" +oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "3.13" +packaging==23.2 ; python_version >= "3.10" and python_version < "3.13" +pandas==2.1.1 ; python_version >= "3.10" and python_version < "3.13" +parso==0.8.3 ; python_version >= "3.10" and python_version < "3.13" +pathtools==0.1.2 ; python_version >= "3.10" and python_version < "3.13" +pexpect==4.8.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform != "win32" +pickleshare==0.7.5 ; python_version >= "3.10" and python_version < "3.13" +pillow==10.0.1 ; python_version >= "3.10" and python_version < "3.13" +platformdirs==3.11.0 ; python_version >= "3.10" and python_version < "3.13" +prompt-toolkit==3.0.39 ; python_version >= "3.10" and python_version < "3.13" +protobuf==4.24.4 ; python_version >= "3.10" and python_version < "3.13" +psutil==5.9.5 ; python_version >= "3.10" and python_version < "3.13" +ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform != "win32" +pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "3.13" +pyasn1-modules==0.3.0 ; python_version >= "3.10" and python_version < "3.13" +pyasn1==0.5.0 ; python_version >= "3.10" and python_version < "3.13" +pybind11==2.11.1 ; python_version >= "3.10" and python_version < "3.13" +pycparser==2.21 ; python_version >= "3.10" and python_version < "3.13" and implementation_name == "pypy" +pygments==2.16.1 ; python_version >= "3.10" and python_version < "3.13" +pyparsing==3.1.1 ; python_version >= "3.10" and python_version < "3.13" +python-box==7.1.1 ; python_version >= "3.10" and python_version < "3.13" +python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "3.13" +pytz==2023.3.post1 ; python_version >= "3.10" and python_version < "3.13" +pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.13" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "3.13" +pyzmq==25.1.1 ; python_version >= "3.10" and python_version < "3.13" +requests-oauthlib==1.3.1 ; python_version >= "3.10" and python_version < "3.13" +requests==2.31.0 ; python_version >= "3.10" and python_version < "3.13" +rsa==4.9 ; python_version >= "3.10" and python_version < "3.13" +seaborn==0.13.0 ; python_version >= "3.10" and python_version < "3.13" +sentry-sdk==1.31.0 ; python_version >= "3.10" and python_version < "3.13" +setproctitle==1.3.3 ; python_version >= "3.10" and python_version < "3.13" +setuptools-scm==8.0.4 ; python_version >= "3.10" and python_version < "3.13" +setuptools==68.2.2 ; python_version >= "3.10" and python_version < "3.13" +six==1.16.0 ; python_version >= "3.10" and python_version < "3.13" +smmap==5.0.1 ; python_version >= "3.10" and python_version < "3.13" +stack-data==0.6.3 ; python_version >= "3.10" and python_version < "3.13" +sympy==1.12 ; python_version >= "3.10" and python_version < "3.13" +tensorboard-data-server==0.7.1 ; python_version >= "3.10" and python_version < "3.13" +tensorboard==2.14.1 ; python_version >= "3.10" and python_version < "3.13" +tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.11" +torch==2.1.0 ; python_version >= "3.10" and python_version < "3.13" +tornado==6.3.3 ; python_version >= "3.10" and python_version < "3.13" +traitlets==5.11.2 ; python_version >= "3.10" and python_version < "3.13" +typing-extensions==4.8.0 ; python_version >= "3.10" and python_version < "3.13" +tzdata==2023.3 ; python_version >= "3.10" and python_version < "3.13" +urllib3==2.0.6 ; python_version >= "3.10" and python_version < "3.13" +wandb==0.15.12 ; python_version >= "3.10" and python_version < "3.13" +wcwidth==0.2.8 ; python_version >= "3.10" and python_version < "3.13" +werkzeug==3.0.0 ; python_version >= "3.10" and python_version < "3.13" diff --git a/scripts/cluster_scripts/run_imitation_cluster.py b/scripts/cluster_scripts/run_imitation_cluster.py deleted file mode 100644 index be1263c0..00000000 --- a/scripts/cluster_scripts/run_imitation_cluster.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -"""Run sample factory experiments on a SLURM cluster.""" -import argparse -import os -import pathlib -import shutil -from datetime import datetime -from subprocess import Popen - -from cfgs.config import PROJECT_PATH -from scripts.cluster_scripts.utils import Overrides - - -def make_code_snap(experiment, code_path, str_time): - """Copy code to directory to ensure that the run launches with correct commit. - - Args: - experiment (str): Name of experiment - code_path (str): Path to where we are saving the code. - str_time (str): Unique time identifier used to distinguish - experiments with same name. - - Returns - ------- - snap_dir (str): path to where the code has been copied. - """ - if len(code_path) > 0: - snap_dir = pathlib.Path(code_path) - else: - snap_dir = pathlib.Path.cwd() - snap_dir /= str_time - snap_dir /= f'{experiment}' - snap_dir.mkdir(exist_ok=True, parents=True) - - def copy_dir(dir, pat): - dst_dir = snap_dir / 'code' / dir - dst_dir.mkdir(exist_ok=True, parents=True) - for f in (src_dir / dir).glob(pat): - shutil.copy(f, dst_dir / f.name) - - dirs_to_copy = [ - '.', './cfgs/', './cfgs/algorithm', './cfgs/imitation', - './nocturne/envs/', './nocturne/pybind11', - '.examples/imitation_learning', './build' - ] - src_dir = pathlib.Path(PROJECT_PATH) - for dir in dirs_to_copy: - copy_dir(dir, '*.py') - copy_dir(dir, '*.yaml') - - return snap_dir - - -def main(): - """Launch experiments on SLURM cluster by overriding Hydra config.""" - username = os.environ["USER"] - parser = argparse.ArgumentParser() - parser.add_argument('experiment', type=str) - parser.add_argument('--code_path', - default=f'/checkpoint/{username}/nocturne/il_runs') - parser.add_argument('--dry', action='store_true') - args = parser.parse_args() - - now = datetime.now() - str_time = now.strftime('%Y.%m.%d_%H%M%S') - snap_dir = make_code_snap(args.experiment, args.code_path, str_time) - overrides = Overrides() - overrides.add('hydra/launcher', ['submitit_slurm']) - overrides.add('hydra.launcher.partition', ['learnlab']) - overrides.add('experiment', [args.experiment]) - overrides.add('num_files', [1000]) - overrides.add('epochs', [1400]) - overrides.add('seed', [0, 1, 2, 3, 4]) - - cmd = [ - 'python', - str(snap_dir / 'code' / 'nocturne' / 'utils' / 'imitation_learning' / - 'train.py'), '-m' - ] - print(cmd) - cmd += overrides.cmd() - - if args.dry: - print(' '.join(cmd)) - else: - env = os.environ.copy() - env['PYTHONPATH'] = str(snap_dir / 'code') - p = Popen(cmd, env=env) - p.communicate() - - -if __name__ == '__main__': - main() diff --git a/scripts/cluster_scripts/run_ppo_cluster.py b/scripts/cluster_scripts/run_ppo_cluster.py deleted file mode 100644 index fa9da6ab..00000000 --- a/scripts/cluster_scripts/run_ppo_cluster.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run on-policy PPO experiments on a SLURM cluster.""" -import argparse -import os -import pathlib -import shutil -from datetime import datetime -from subprocess import Popen - -from cfgs.config import PROJECT_PATH -from scripts.cluster_scripts.utils import Overrides - - -def make_code_snap(experiment, code_path, slurm_dir='exp'): - """Copy code to directory to ensure that the run launches with correct commit. - - Args: - experiment (str): Name of experiment - code_path (str): Path to where we are saving the code. - str_time (str): Unique time identifier used to distinguish - experiments with same name. - - Returns - ------- - snap_dir (str): path to where the code has been copied. - """ - now = datetime.now() - if len(code_path) > 0: - snap_dir = pathlib.Path(code_path) / slurm_dir - else: - snap_dir = pathlib.Path.cwd() / slurm_dir - snap_dir /= now.strftime('%Y.%m.%d') - snap_dir /= now.strftime('%H%M%S') + f'_{experiment}' - snap_dir.mkdir(exist_ok=True, parents=True) - - def copy_dir(dir, pat): - dst_dir = snap_dir / 'code' / dir - dst_dir.mkdir(exist_ok=True, parents=True) - for f in (src_dir / dir).glob(pat): - shutil.copy(f, dst_dir / f.name) - - dirs_to_copy = [ - '.', './cfgs/', './cfgs/algo', './algos/', './algos/ppo/', - './algos/ppo/ppo_utils', './algos/ppo/r_mappo', - './algos/ppo/r_mappo/algorithm', './algos/ppo/utils', - '.nocturne/envs/', './nocturne_utils/', '.nocturne/python/', './build' - ] - src_dir = pathlib.Path(os.path.dirname(os.getcwd())) - for dir in dirs_to_copy: - copy_dir(dir, '*.py') - copy_dir(dir, '*.yaml') - - return snap_dir - - -def main(): - """Launch experiments on SLURM cluster by overriding Hydra config.""" - parser = argparse.ArgumentParser() - parser.add_argument('experiment', type=str) - parser.add_argument('--code_path', - default='/checkpoint/eugenevinitsky/nocturne') - parser.add_argument('--dry', action='store_true') - args = parser.parse_args() - - snap_dir = make_code_snap(args.experiment, args.code_path) - print(str(snap_dir)) - overrides = Overrides() - overrides.add('hydra/launcher', ['submitit_slurm']) - overrides.add('hydra.launcher.partition', ['learnlab']) - overrides.add('experiment', [args.experiment]) - # experiment parameters - overrides.add('episode_length', [200]) - # algo - overrides.add('algo', ['ppo']) - overrides.add('algo.entropy_coef', [-0.001, 0.0, 0.001]) - overrides.add('algo.n_rollout_threads', [128]) - # rewards - overrides.add('rew_cfg.goal_achieved_bonus', [10, 50]) - # misc - overrides.add('scenario_path', - [PROJECT_PATH / 'scenarios/twenty_car_intersection.json']) - - cmd = [ - 'python', - str(snap_dir / 'code' / 'algos' / 'ppo' / 'nocturne_runner.py'), '-m' - ] - print(cmd) - cmd += overrides.cmd() - - if args.dry: - print(' '.join(cmd)) - else: - env = os.environ.copy() - env['PYTHONPATH'] = str(snap_dir / 'code') - p = Popen(cmd, env=env) - p.communicate() - - -if __name__ == '__main__': - main() diff --git a/scripts/cluster_scripts/run_rllib_cluster.py b/scripts/cluster_scripts/run_rllib_cluster.py deleted file mode 100644 index c97961dc..00000000 --- a/scripts/cluster_scripts/run_rllib_cluster.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run rllib experiments on a SLURM cluster.""" -import argparse -import os -import pathlib -import shutil -from datetime import datetime -from subprocess import Popen - -from cfgs.config import PROJECT_PATH -from scripts.utils import Overrides - - -def make_code_snap(experiment, code_path, str_time): - """Copy code to directory to ensure that the run launches with correct commit. - - Args: - experiment (str): Name of experiment - code_path (str): Path to where we are saving the code. - str_time (str): Unique time identifier used to distinguish - experiments with same name. - - Returns - ------- - snap_dir (str): path to where the code has been copied. - """ - if len(code_path) > 0: - snap_dir = pathlib.Path(code_path) - else: - snap_dir = pathlib.Path.cwd() - snap_dir /= str_time - snap_dir /= f'{experiment}' - snap_dir.mkdir(exist_ok=True, parents=True) - - def copy_dir(dir, pat): - dst_dir = snap_dir / 'code' / dir - dst_dir.mkdir(exist_ok=True, parents=True) - for f in (src_dir / dir).glob(pat): - shutil.copy(f, dst_dir / f.name) - - dirs_to_copy = [ - '.', './cfgs/', './examples/', './cfgs/algorithm', './envs/', - './nocturne_utils/', './python/', './scenarios/', './build' - ] - src_dir = pathlib.Path(PROJECT_PATH) - for dir in dirs_to_copy: - copy_dir(dir, '*.py') - copy_dir(dir, '*.yaml') - - return snap_dir - - -def main(): - """Launch experiments on SLURM cluster by overriding Hydra config.""" - username = os.environ["USER"] - parser = argparse.ArgumentParser() - parser.add_argument('experiment', type=str) - parser.add_argument( - '--code_path', - default=f'/checkpoint/{username}/nocturne/sample_factory_runs') - parser.add_argument('--dry', action='store_true') - args = parser.parse_args() - - now = datetime.now() - str_time = now.strftime('%Y.%m.%d_%H%M%S') - snap_dir = make_code_snap(args.experiment, args.code_path, str_time) - overrides = Overrides() - overrides.add('hydra/launcher', ['ray']) - overrides.add('hydra.launcher.partition', ['learnlab']) - - cmd = [ - 'python', - str(snap_dir / 'code' / 'examples' / 'run_rllib.py'), '-m' - ] - cmd += overrides.cmd() - print(cmd) - - if args.dry: - print(' '.join(cmd)) - else: - env = os.environ.copy() - env['PYTHONPATH'] = str(snap_dir / 'code') - p = Popen(cmd, env=env) - p.communicate() - - -if __name__ == '__main__': - main() diff --git a/scripts/cluster_scripts/run_sample_factory_cluster.py b/scripts/cluster_scripts/run_sample_factory_cluster.py deleted file mode 100644 index a313be12..00000000 --- a/scripts/cluster_scripts/run_sample_factory_cluster.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run sample factory experiments on a SLURM cluster.""" -import argparse -import os -import pathlib -import shutil -from datetime import datetime -from subprocess import Popen - -from cfgs.config import PROJECT_PATH -from scripts.cluster_scripts.utils import Overrides - - -def make_code_snap(experiment, code_path, str_time): - """Copy code to directory to ensure that the run launches with correct commit. - - Args: - experiment (str): Name of experiment - code_path (str): Path to where we are saving the code. - str_time (str): Unique time identifier used to distinguish - experiments with same name. - - Returns - ------- - snap_dir (str): path to where the code has been copied. - """ - if len(code_path) > 0: - snap_dir = pathlib.Path(code_path) - else: - snap_dir = pathlib.Path.cwd() - snap_dir /= str_time - snap_dir /= f'{experiment}' - snap_dir.mkdir(exist_ok=True, parents=True) - - def copy_dir(dir, pat): - dst_dir = snap_dir / 'code' / dir - dst_dir.mkdir(exist_ok=True, parents=True) - for f in (src_dir / dir).glob(pat): - shutil.copy(f, dst_dir / f.name) - - dirs_to_copy = [ - '.', './cfgs/', './examples/', './examples/sample_factory_files', - './cfgs/algorithm', './nocturne/envs/', './nocturne_utils/', - './nocturne/python/', './scenarios/', './build' - ] - src_dir = pathlib.Path(PROJECT_PATH) - for dir in dirs_to_copy: - copy_dir(dir, '*.py') - copy_dir(dir, '*.yaml') - - return snap_dir - - -def main(): - """Launch experiments on SLURM cluster by overriding Hydra config.""" - parser = argparse.ArgumentParser() - parser.add_argument('experiment', type=str) - parser.add_argument( - '--code_path', - default='/checkpoint/eugenevinitsky/nocturne/sample_factory_runs') - parser.add_argument('--dry', action='store_true') - args = parser.parse_args() - - now = datetime.now() - str_time = now.strftime('%Y.%m.%d_%H%M%S') - snap_dir = make_code_snap(args.experiment, args.code_path, str_time) - overrides = Overrides() - overrides.add('hydra/launcher', ['submitit_slurm']) - overrides.add('hydra.launcher.partition', ['learnlab']) - overrides.add('experiment', [args.experiment]) - overrides.add('num_files', [10000]) - overrides.add('seed', [0, 1, 2, 3, 4]) - overrides.add('scenario.max_visible_road_points', [500]) - overrides.add('rew_cfg.collision_penalty', [0, -80.0]) - - cmd = [ - 'python', - str(snap_dir / 'code' / 'examples' / 'sample_factory_files' / - 'run_sample_factory.py'), '-m', 'algorithm=APPO' - ] - print(cmd) - cmd += overrides.cmd() - - if args.dry: - print(' '.join(cmd)) - else: - env = os.environ.copy() - env['PYTHONPATH'] = str(snap_dir / 'code') - p = Popen(cmd, env=env) - p.communicate() - - -if __name__ == '__main__': - main() diff --git a/scripts/cluster_scripts/utils.py b/scripts/cluster_scripts/utils.py deleted file mode 100644 index 21be3246..00000000 --- a/scripts/cluster_scripts/utils.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Storage for SLURM running utilities.""" - - -class Overrides(object): - """Utility class used to convert commands into a bash runnable string.""" - - def __init__(self): - """Initialize class.""" - self.kvs = dict() - - def add(self, key, values): - """Add each of the desired key value pairs into a dict.""" - value = ','.join(str(v) for v in values) - assert key not in self.kvs - self.kvs[key] = value - - def cmd(self): - """Append the keys together into a command that can be run.""" - cmd = [] - for k, v in self.kvs.items(): - cmd.append(f'{k}={v}') - return cmd diff --git a/scripts/data_analysis/corner_case_search.py b/scripts/data_analysis/corner_case_search.py deleted file mode 100644 index f181d6b5..00000000 --- a/scripts/data_analysis/corner_case_search.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run through the data to look for cases where there are undesirable corner cases. - -The cases we currently check for are: -1) is a vehicle initialized in a colliding state with another vehicle -2) is a vehicle initialized in a colliding state with a road edge? -""" -from copy import deepcopy -from pathlib import Path -import os -import sys - -import hydra -import imageio -import matplotlib.pyplot as plt -import numpy as np - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, \ - get_scenario_dict, set_display_window -from nocturne import Simulation - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def main(cfg): - """See file docstring.""" - set_display_window() - SAVE_IMAGES = False - MAKE_MOVIES = False - output_folder = 'corner_case_vis' - output_path = Path(PROJECT_PATH) / f'nocturne_utils/{output_folder}' - output_path.mkdir(exist_ok=True) - files = list(os.listdir(PROCESSED_TRAIN_NO_TL)) - files = [file for file in files if 'tfrecord' in file] - # track the number of collisions at each time-step - collide_counter = np.zeros((2, 90)) - file_has_veh_collision_counter = 0 - file_has_edge_collision_counter = 0 - total_edge_collision_counter = 0 - total_veh_collision_counter = 0 - initialized_collision_counter = 0 - total_veh_counter = 0 - - start_cfg = deepcopy(cfg) - start_cfg['scenario']['start_time'] = 0 - start_cfg['scenario']['allow_non_vehicles'] = False - for file_idx, file in enumerate(files): - found_collision = False - edge_collision = False - sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file), - get_scenario_dict(cfg)) - vehs = sim.getScenario().getObjectsThatMoved() - # this checks if the vehicles has actually moved any distance at all - valid_vehs = [] - for veh in vehs: - veh.expert_control = True - obj_pos = veh.getPosition() - obj_pos = np.array([obj_pos.x, obj_pos.y]) - goal_pos = veh.getGoalPosition() - goal_pos = np.array([goal_pos.x, goal_pos.y]) - if np.linalg.norm(obj_pos - goal_pos) > 0.5: - valid_vehs.append(veh) - veh_edge_collided = [False for _ in vehs] - veh_veh_collided = [False for _ in vehs] - initialized_collided = [False for _ in vehs] - for time_index in range(90): - for veh_index, veh in enumerate(valid_vehs): - collided = veh.getCollided() - if collided and not np.isclose(veh.getPosition().x, -10000.0): - collide_counter[int(veh.collision_type) - 1, - time_index] += 1 - if int(veh.collision_type) == 2: - veh_edge_collided[veh_index] = True - if int(veh.collision_type) == 1: - veh_veh_collided[veh_index] = True - if time_index == 0: - initialized_collided[veh_index] = True - if np.isclose(veh.getPosition().x, -10000.0): - collided = False - if time_index == 0 and not found_collision and collided and SAVE_IMAGES: - img = sim.getScenario().getImage( - img_width=1600, - img_height=1600, - draw_target_positions=True, - padding=50.0, - ) - fig = plt.figure() - plt.imshow(img) - plt.savefig(f'{output_folder}/{file}.png') - plt.close(fig) - if not found_collision and collided: - found_collision = True - if int(veh.collision_type) == 1: - file_has_veh_collision_counter += 1 - else: - file_has_edge_collision_counter += 1 - edge_collision = True - sim.step(0.1) - total_veh_counter += len(valid_vehs) - total_edge_collision_counter += np.sum(veh_edge_collided) - total_veh_collision_counter += np.sum(veh_veh_collided) - initialized_collision_counter += np.sum(initialized_collided) - print(f'at file {file_idx} we have {collide_counter} collisions for a\ - ratio of {collide_counter / (file_idx + 1)}') - print(f'the number of files that have a veh collision at all is\ - {file_has_veh_collision_counter / (file_idx + 1)}') - print(f'the number of files that have a edge collision at all is\ - {file_has_edge_collision_counter / (file_idx + 1)}') - print(f'the fraction of vehicles that have had an edge collision\ - is {total_edge_collision_counter / total_veh_counter}') - print(f'the fraction of vehicles that have had a collision at all\ - is {(total_edge_collision_counter + total_veh_collision_counter) / total_veh_counter}' - ) - print( - f'the fraction of vehicles that are initialized in collision are \ - {initialized_collision_counter / total_veh_counter}') - if found_collision and edge_collision and MAKE_MOVIES: - movie_frames = [] - fig = plt.figure() - sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file), - get_scenario_dict(start_cfg)) - vehs = sim.getScenario().getObjectsThatMoved() - for veh in vehs: - veh.expert_control = True - for time_index in range(89): - movie_frames.append(sim.getScenario().getImage( - img_width=1600, img_height=1600)) - sim.step(0.1) - movie_frames = np.array(movie_frames) - imageio.mimwrite(f'{output_path}/{os.path.basename(file)}.mp4', - movie_frames, - fps=10) - if file_has_edge_collision_counter + file_has_veh_collision_counter > 10: - sys.exit() - - -if __name__ == '__main__': - main() diff --git a/scripts/data_analysis/data_analysis.py b/scripts/data_analysis/data_analysis.py deleted file mode 100644 index aab91bf1..00000000 --- a/scripts/data_analysis/data_analysis.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Utils that we use to understand the datasets we are working with.""" -import os - -import hydra -import matplotlib.pyplot as plt -import numpy as np - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, get_scenario_dict -from nocturne import Simulation - - -def run_analysis(cfg, files): - """Compute the expert accelerations and number of vehicles across the dataset. - - Args: - files ([str]): List of files to analyze - - Returns - ------- - [np.float], [np.float]: List of expert accels, list of number - of moving vehicles in file - """ - observed_accels = [] - num_vehicles = [] - cfg['start_time'] = 0 - cfg['allow_non_vehicles'] = False - for file_idx, file in enumerate(files): - sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file), - get_scenario_dict(cfg)) - vehs = sim.scenario().getObjectsThatMoved() - # this checks if the vehicles has actually moved any distance at all - valid_vehs = [] - prev_speeds = [] - for veh in vehs: - veh.expert_control = True - obj_pos = veh.position - goal_pos = veh.target_position - if (obj_pos - goal_pos).norm() > 0.5: - valid_vehs.append(veh) - if veh in valid_vehs: - veh_speed = sim.scenario().getExpertSpeeds(0, veh.id) - veh_speed = np.linalg.norm([veh_speed.x, veh_speed.y]) - if not np.isclose(veh.position.x, -10000.0): - prev_speeds.append( - (veh_speed, True, [veh.position.x, veh.position.y], 0)) - else: - prev_speeds.append( - (veh_speed, False, [veh.position.x, - veh.position.y], 0)) - num_vehicles.append(len(valid_vehs)) - sim.step(0.1) - for i in range(1, 90): - for veh_index, veh in enumerate(valid_vehs): - # check if the vehicle is actually valid - veh_speed = sim.scenario().getExpertSpeeds(i, veh.id) - veh_speed = veh_speed.norm() - if np.isclose(veh.position.x, -10000.0): - prev_speeds[veh_index] = (veh_speed, False, - [veh.position.x, - veh.position.y], i) - else: - # approximate the accel using an euler step but only - # if the prior step was a step where the agent - # was valid - if prev_speeds[veh_index][1]: - accel = (veh_speed - prev_speeds[veh_index][0]) / 0.1 - observed_accels.append(accel) - prev_speeds[veh_index] = (veh_speed, True, - [veh.position.x, - veh.position.y], i) - sim.step(0.1) - - if file_idx > 300: - break - return observed_accels, num_vehicles - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def analyze_accels(cfg): - """Plot the expert accels and number of observed moving vehicles.""" - f_path = PROCESSED_TRAIN_NO_TL - with open(os.path.join(f_path, 'valid_files.txt')) as file: - files = [line.strip() for line in file] - observed_accels_valid, num_vehicles_valid = run_analysis(cfg, files) - with open(os.path.join(f_path, 'invalid_files.txt')) as file: - files = [line.strip() for line in file] - _, num_vehicles_invalid = run_analysis(cfg, files) - - output_path = os.path.join(PROJECT_PATH, 'nocturne_utils/data_analysis') - if not os.path.exists(output_path): - os.makedirs(output_path) - observed_accels = np.array(observed_accels_valid) - print(np.max(observed_accels)) - print(np.min(observed_accels)) - observed_accels = observed_accels[np.abs(observed_accels) < 5] - plt.figure() - plt.hist(observed_accels) - plt.savefig(os.path.join(output_path, 'observed_accels.png')) - plt.figure() - plt.hist( - num_vehicles_valid, - bins=30, - density=True, - histtype='step', - cumulative=True, - ) - plt.hist( - num_vehicles_invalid, - bins=30, - density=True, - histtype='step', - cumulative=True, - ) - plt.legend(['valid', 'invalid']) - plt.savefig(os.path.join(output_path, 'num_vehs_cdf.png')) - plt.figure() - plt.hist(num_vehicles_valid, bins=30, alpha=0.5, color='b') - plt.axvline(np.mean(num_vehicles_valid), color='b', label='_nolegend_') - plt.hist(num_vehicles_invalid, bins=30, alpha=0.5, color='r') - plt.axvline(np.mean(num_vehicles_invalid), color='r', label='_nolegend_') - plt.legend(['valid', 'invalid']) - plt.savefig(os.path.join(output_path, 'num_vehs_hist.png')) - - -if __name__ == '__main__': - analyze_accels() diff --git a/scripts/data_analysis/speed_test.py b/scripts/data_analysis/speed_test.py deleted file mode 100644 index 6b51383c..00000000 --- a/scripts/data_analysis/speed_test.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Utils that we use to understand the datasets we are working with.""" -import json -import os -import time - -import hydra -import numpy as np - -from cfgs.config import PROCESSED_TRAIN_NO_TL, get_scenario_dict, set_display_window -from nocturne import Simulation, Action - - -def run_speed_test(files, cfg): - """Compute the expert accelerations and number of vehicles across the dataset. - - Args: - files ([str]): List of files to analyze - - Returns - ------- - [np.float], [np.float]: List of expert accels, list of number - of moving vehicles in file - """ - times_list = [] - for file in files: - sim = Simulation(os.path.join(PROCESSED_TRAIN_NO_TL, file), - get_scenario_dict(cfg)) - vehs = sim.scenario().getObjectsThatMoved() - scenario = sim.getScenario() - veh = vehs[np.random.randint(len(vehs))] - t = time.perf_counter() - _ = scenario.flattened_visible_state(veh, 80, (180 / 180) * np.pi) - veh.apply_action(Action(1.0, 1.0, 1.0)) - sim.step(0.1) - times_list.append(time.perf_counter() - t) - print('avg, std. time to get obs is {}, {}'.format(np.mean(times_list), - np.std(times_list))) - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def analyze_accels(cfg): - """Plot the expert accels and number of observed moving vehicles.""" - f_path = PROCESSED_TRAIN_NO_TL - with open(os.path.join(f_path, 'valid_files.json')) as file: - valid_veh_dict = json.load(file) - files = list(valid_veh_dict.keys()) - run_speed_test(files[0:10], cfg) - - -if __name__ == '__main__': - set_display_window() - analyze_accels() diff --git a/scripts/json_generation/make_solvable_files.py b/scripts/json_generation/make_solvable_files.py deleted file mode 100644 index 97fcf52c..00000000 --- a/scripts/json_generation/make_solvable_files.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Find all cases where collisions are required to achieve the goal. - -Due to errors in Waymo labeling, some space that is crossable is mistakenly -labeled as a road edge. This file finds most of those cases. -""" -import argparse -import json -import multiprocessing -from multiprocessing import Process, Lock -import os - -import numpy as np - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROCESSED_VALID_NO_TL, \ - get_default_scenario_dict, set_display_window -from nocturne import Simulation - - -def is_file_valid(file_list, output_file, output_file_invalid, lock=None): - """Test if file requires an agent to collide with a road edge to get to goal. - - We test for this by making the agent have very thin width. If an agent - is in collision with a road edge despite this thin width, it was crossing - that road edge because that road edge was on the way to its goal. We also - shrink the length to avoid the cases where the vehicle is initialized - in collision with a road edge. - - If a file has more than 80% of the agents need to collide with a road edge to get - to goal, we store it in output_file_invalid instead. - - Args - ---- - file_list ([str]): list of file paths. - output_file (str): file to store valid json names. - output_file_invalid (_type_): file to store invalid json names. - lock (Lock, optional): Lock used for safe file writing. - """ - file_valid_dict = {} - file_invalid_dict = {} - cfg = get_default_scenario_dict() - cfg['start_time'] = 0 - cfg['allow_non_vehicles'] = False - for i, file in enumerate(file_list): - sim = Simulation(str(file), cfg) - vehs = sim.scenario().getObjectsThatMoved() - for veh in vehs: - # we shrink the vehicle width and length to tiny values. - # then, if a vehicle collides with a road edge, we know it had to - # cross that road edge to actually get to its goal - veh._scale_shape(length_scale=0.3, width_scale=0.1) - veh.expert_control = True - # dict tracking which vehicles were forced to collide with - # an edge on their way to goal - veh_edge_collided = {veh.id: False for veh in vehs} - for _ in range(90): - for veh in vehs: - collided = veh.collided - # the second conditions check whether the - # the vehicle has "collided", but only because - # it was invalid at the same time as another - # vehicle was invalid - if collided and not np.isclose(veh.position.x, -10000.0): - if int(veh.collision_type) == 2: - veh_edge_collided[veh.id] = True - sim.step(0.1) - # write all the vehicle ids that had a collision to a file - # so that we know which vehicles should be set to be experts - # if more than 80% of the vehicles are experts, we throw the file - # away - if np.sum(list( - veh_edge_collided.values())) / len(veh_edge_collided) < 0.8: - storage = file_valid_dict - else: - storage = file_invalid_dict - storage[str(file).split('/')[-1]] = [ - key for key, val in veh_edge_collided.items() if val - ] - - for file, return_dict in zip([output_file, output_file_invalid], - [file_valid_dict, file_invalid_dict]): - if lock is not None: - lock.acquire() - with open(file, 'r') as fp: - temp_dict = json.load(fp) - with open(file, 'w') as fp: - temp_dict.update(return_dict) - json.dump(temp_dict, fp, indent=4) - if lock is not None: - lock.release() - - -def main(): - """See file docstring.""" - set_display_window() - parser = argparse.ArgumentParser( - description="Load and show waymo scenario data.") - parser.add_argument( - "--parallel", - action='store_true', - help="If true, split the conversion up over multiple processes") - parser.add_argument( - "--n_processes", - type=int, - default=40, - help="Number of processes over which to split file generation") - parser.add_argument("--datatype", - default='train', - type=str, - choices=['train', 'valid'], - nargs='+', - help="Whether to convert, train or valid data") - - args = parser.parse_args() - # TODO(eugenevinitsky) this currently assumes that we have - # constructed the scenes without traffic lights and not - # other scenes - folders_to_convert = [] - if 'train' in args.datatype: - folders_to_convert.append(PROCESSED_TRAIN_NO_TL) - if 'valid' in args.datatype: - folders_to_convert.append(PROCESSED_VALID_NO_TL) - - lock = Lock() - for folder_path in folders_to_convert: - files = os.listdir(folder_path) - files = [ - os.path.join(folder_path, file) for file in files - if 'tfrecord' in file - ] - - output_file = os.path.join(folder_path, 'valid_files.json') - with open(output_file, 'w') as fp: - json.dump({}, fp) - - output_file_invalid = os.path.join(folder_path, 'invalid_files.json') - with open(output_file_invalid, 'w') as fp: - json.dump({}, fp) - - if args.parallel: - # leave some cpus free but have at least one and don't use more than n_processes - num_cpus = min(max(multiprocessing.cpu_count() - 2, 1), - args.n_processes) - num_files = len(files) - process_list = [] - for i in range(num_cpus): - p = Process(target=is_file_valid, - args=[ - files[i * num_files // num_cpus:(i + 1) * - num_files // num_cpus], output_file, - output_file_invalid, lock - ]) - p.start() - process_list.append(p) - - for process in process_list: - process.join() - else: - is_file_valid(files, output_file, output_file_invalid, lock=None) - - -if __name__ == '__main__': - main() diff --git a/scripts/json_generation/run_waymo_constructor.py b/scripts/json_generation/run_waymo_constructor.py deleted file mode 100644 index 1f0579fd..00000000 --- a/scripts/json_generation/run_waymo_constructor.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Utils for converting TFRecords into Nocturne compatible JSON.""" -import argparse -from pathlib import Path -import os -import multiprocessing - -from cfgs.config import TRAIN_DATA_PATH, VALID_DATA_PATH, PROCESSED_TRAIN_NO_TL, \ - PROCESSED_VALID_NO_TL, PROCESSED_TRAIN, PROCESSED_VALID -import waymo_scenario_construction as waymo - - -def convert_files(args, files, output_dir, rank): - """Convert the list of files into nocturne compatible JSON. - - Args - ---- - args (NameSpace): args from the argument parser. - files ([str]): list of file paths for TFRecords that we should convert - output_dir (str): output path in which we should store the JSON - rank (int): rank of the process. - """ - cnt = 0 - for file in files: - inner_count = 0 - for data in waymo.load_protobuf(str(file)): - file_name = os.path.basename(file).split( - '.')[1] + f'_{inner_count}.json' - # this file is useful for debugging - if args.output_txt and cnt == 0 and rank == 0: - with open(os.path.basename(file).split('.')[1] + '.txt', - 'w') as f: - f.write(str(data)) - waymo.waymo_to_scenario(os.path.join(output_dir, file_name), data, - args.no_tl) - inner_count += 1 - cnt += 1 - if cnt >= args.num and not args.all_files: - break - print(inner_count) - - -def main(): - """Run the json generators.""" - parser = argparse.ArgumentParser( - description="Load and show waymo scenario data.") - parser.add_argument("--file", - type=str, - default=os.path.join( - TRAIN_DATA_PATH, - 'training.tfrecord-00995-of-01000')) - parser.add_argument("--num", type=int, default=1) - parser.add_argument("--output_txt", - action='store_true', - help='output a txt version of one of the protobufs') - parser.add_argument("--all_files", - action='store_true', - help='If true, iterate through the whole dataset') - parser.add_argument("--no_tl", - action='store_true', - help="If true, do not generate JSON files\ - that have a traffic light in them") - parser.add_argument( - "--parallel", - action='store_true', - help="If true, split the conversion up over multiple processes") - parser.add_argument("--datatype", - default='train', - type=str, - choices=['train', 'valid'], - nargs='+', - help="Whether to convert, train or valid data") - - args = parser.parse_args() - folders_to_convert = [] - if 'train' in args.datatype: - folders_to_convert.append( - (TRAIN_DATA_PATH, - PROCESSED_TRAIN_NO_TL if args.no_tl else PROCESSED_TRAIN)) - if 'valid' in args.datatype: - folders_to_convert.append( - (VALID_DATA_PATH, - PROCESSED_VALID_NO_TL if args.no_tl else PROCESSED_VALID)) - - for folder_path, output_dir in folders_to_convert: - if args.num > 1 or args.all_files: - files = list(Path(folder_path).glob('*tfrecord*')) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - if not args.all_files: - files = files[0:args.num] - - else: - output_dir = os.getcwd() - files = [args.file] - - if args.parallel: - # leave some cpus free but have at least one and don't use more than 40 - num_cpus = min(max(multiprocessing.cpu_count() - 2, 1), 40) - num_files = len(files) - process_list = [] - for i in range(num_cpus): - p = multiprocessing.Process( - target=convert_files, - args=[ - args, files[i * num_files // num_cpus:(i + 1) * - num_files // num_cpus], output_dir, i - ]) - p.start() - process_list.append(p) - - for process in process_list: - process.join() - else: - convert_files(args, files, output_dir, rank=0) - - -if __name__ == "__main__": - main() diff --git a/scripts/json_generation/waymo_scenario_construction.py b/scripts/json_generation/waymo_scenario_construction.py deleted file mode 100644 index 29406b44..00000000 --- a/scripts/json_generation/waymo_scenario_construction.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Construct a scenarios.json file from a waymos protobuf.""" - -from collections import defaultdict -import math -import json -from typing import Any, Dict, Iterator, Optional - -import tensorflow as tf -from waymo_open_dataset.protos import map_pb2, scenario_pb2 - -from cfgs.config import ERR_VAL - -_WAYMO_OBJECT_STR = { - scenario_pb2.Track.TYPE_UNSET: "unset", - scenario_pb2.Track.TYPE_VEHICLE: "vehicle", - scenario_pb2.Track.TYPE_PEDESTRIAN: "pedestrian", - scenario_pb2.Track.TYPE_CYCLIST: "cyclist", - scenario_pb2.Track.TYPE_OTHER: "other", -} - -_WAYMO_ROAD_STR = { - map_pb2.TrafficSignalLaneState.LANE_STATE_UNKNOWN: "unknown", - map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_STOP: "arrow_stop", - map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_CAUTION: "arrow_caution", - map_pb2.TrafficSignalLaneState.LANE_STATE_ARROW_GO: "arrow_go", - map_pb2.TrafficSignalLaneState.LANE_STATE_STOP: "stop", - map_pb2.TrafficSignalLaneState.LANE_STATE_CAUTION: "caution", - map_pb2.TrafficSignalLaneState.LANE_STATE_GO: "go", - map_pb2.TrafficSignalLaneState.LANE_STATE_FLASHING_STOP: "flashing_stop", - map_pb2.TrafficSignalLaneState.LANE_STATE_FLASHING_CAUTION: - "flashing_caution", -} - - -def _parse_object_state( - states: scenario_pb2.ObjectState, - final_state: scenario_pb2.ObjectState) -> Dict[str, Any]: - """Construct a dict representing the trajectory and goals of an object. - - Args: - states (scenario_pb2.ObjectState): Protobuf of object state - final_state (scenario_pb2.ObjectState): Protobuf of last valid object state. - - Returns - ------- - Dict[str, Any]: Dict representing an object. - """ - return { - "position": [{ - "x": state.center_x, - "y": state.center_y - } if state.valid else { - "x": ERR_VAL, - "y": ERR_VAL - } for state in states], - "width": - final_state.width, - "length": - final_state.length, - "heading": [ - math.degrees(state.heading) if state.valid else ERR_VAL - for state in states - ], # Use rad here? - "velocity": [{ - "x": state.velocity_x, - "y": state.velocity_y - } if state.valid else { - "x": ERR_VAL, - "y": ERR_VAL - } for state in states], - "valid": [state.valid for state in states], - "goalPosition": { - "x": final_state.center_x, - "y": final_state.center_y - } - } - - -def _init_tl_object(track): - """Construct a dict representing the traffic light states.""" - returned_dict = {} - for lane_state in track.lane_states: - returned_dict[lane_state.lane] = { - 'state': _WAYMO_ROAD_STR[lane_state.state], - 'x': lane_state.stop_point.x, - 'y': lane_state.stop_point.y - } - return returned_dict - - -def _init_object(track: scenario_pb2.Track) -> Optional[Dict[str, Any]]: - """Construct a dict representing the state of the object (vehicle, cyclist, pedestrian). - - Args: - track (scenario_pb2.Track): protobuf representing the scenario - - Returns - ------- - Optional[Dict[str, Any]]: dict representing the trajectory and velocity of an object. - """ - final_valid_index = 0 - for i, state in enumerate(track.states): - if state.valid: - final_valid_index = i - - obj = _parse_object_state(track.states, track.states[final_valid_index]) - obj["type"] = _WAYMO_OBJECT_STR[track.object_type] - return obj - - -def _init_road(map_feature: map_pb2.MapFeature) -> Optional[Dict[str, Any]]: - """Convert an element of the map protobuf to a dict representing its coordinates and type.""" - feature = map_feature.WhichOneof("feature_data") - if feature == 'stop_sign': - p = getattr(map_feature, - map_feature.WhichOneof("feature_data")).position - geometry = [{"x": p.x, "y": p.y}] - elif feature != 'crosswalk' and feature != 'speed_bump': - geometry = [{ - "x": p.x, - "y": p.y - } for p in getattr(map_feature, map_feature.WhichOneof( - "feature_data")).polyline] - else: - geometry = [{ - "x": p.x, - "y": p.y - } for p in getattr(map_feature, map_feature.WhichOneof( - "feature_data")).polygon] - return { - "geometry": geometry, - "type": map_feature.WhichOneof("feature_data"), - } - - -def load_protobuf(protobuf_path: str) -> Iterator[scenario_pb2.Scenario]: - """Yield the sharded protobufs from the TFRecord.""" - dataset = tf.data.TFRecordDataset(protobuf_path, compression_type="") - for data in dataset: - scenario = scenario_pb2.Scenario() - scenario.ParseFromString(bytearray(data.numpy())) - yield scenario - - -def waymo_to_scenario(scenario_path: str, - protobuf: scenario_pb2.Scenario, - no_tl: bool = False) -> None: - """Dump a JSON File containing the protobuf parsed into the right format. - - Args - ---- - scenario_path (str): path to dump the json file - protobuf (scenario_pb2.Scenario): the protobuf we are converting - no_tl (bool, optional): If true, environments with traffic lights are not dumped. - """ - # read the protobuf file to get the right state - - # write the json file - # construct the road geometries - # place the initial position of the vehicles - - # Construct the traffic light states - tl_dict = defaultdict(lambda: { - 'state': [], - 'x': [], - 'y': [], - 'time_index': [] - }) - all_keys = ['state', 'x', 'y'] - i = 0 - for dynamic_map_state in protobuf.dynamic_map_states: - traffic_light_dict = _init_tl_object(dynamic_map_state) - # there is a traffic light but we don't want traffic light scenes so just return - if (no_tl and len(traffic_light_dict) > 0): - return - for id, value in traffic_light_dict.items(): - for state_key in all_keys: - tl_dict[id][state_key].append(value[state_key]) - tl_dict[id]['time_index'].append(i) - i += 1 - - # Construct the object states - objects = [] - for track in protobuf.tracks: - obj = _init_object(track) - if obj is not None: - objects.append(obj) - - # Construct the map states - roads = [] - for map_feature in protobuf.map_features: - road = _init_road(map_feature) - if road is not None: - roads.append(road) - - scenario = { - "name": scenario_path.split('/')[-1], - "objects": objects, - "roads": roads, - "tl_states": tl_dict - } - with open(scenario_path, "w") as f: - json.dump(scenario, f) diff --git a/scripts/paper_plots/README.md b/scripts/paper_plots/README.md deleted file mode 100644 index 2787e62b..00000000 --- a/scripts/paper_plots/README.md +++ /dev/null @@ -1 +0,0 @@ -This folder is used to reproduce all the plots from paper TO BE TITLED. \ No newline at end of file diff --git a/scripts/paper_plots/create_zsc_plot.py b/scripts/paper_plots/create_zsc_plot.py deleted file mode 100644 index 5c3df20e..00000000 --- a/scripts/paper_plots/create_zsc_plot.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Utilities for plotting ZSC results.""" -import os - -import matplotlib.pyplot as plt -import numpy as np - - -def create_heat_map(file, title, save_path, white_switch): - """Construct a heatmap of the ZSC results. - - Args: - ---- - file (str): file path to zsc results - title (str): title of the plot - save_path (str): path to save it at - white_switch (float): if the value is greater than white_switch - we write the cell text as black. This is just to make - the plots more readable. - """ - np_arr = np.load(os.path.join(zsc_path, file)) - np_arr_mean = np.mean(np_arr, axis=-1) - - agent_indices = [f'Agent {i}' for i in range(np_arr.shape[0])] - - fig, ax = plt.subplots() - ax.imshow(np_arr_mean) - - # Show all ticks and label them with the respective list entries - ax.set_xticks(np.arange(len(agent_indices)), labels=agent_indices) - ax.set_yticks(np.arange(len(agent_indices)), labels=agent_indices) - - # Rotate the tick labels and set their alignment. - plt.setp(ax.get_xticklabels(), - rotation=45, - ha="right", - rotation_mode="anchor") - - # Loop over data dimensions and create text annotations. - for i in range(len(agent_indices)): - for j in range(len(agent_indices)): - if np_arr_mean[i, j] > white_switch: - color = 'black' - else: - color = 'w' - ax.text(j, - i, - f'{np.round(np_arr_mean[i, j], decimals=2)}', - ha="center", - va="center", - color=color) - - ax.set_title(title) - fig.tight_layout() - plt.savefig(save_path) - - -def compute_average_change(file): - """Compare cross play to self play.""" - np_arr = np.load(os.path.join(zsc_path, file)) - np_arr_mean = np.mean(np_arr, axis=-1) - self_play = np.mean(np.diag(np_arr_mean)) - cross_play = np.mean( - np_arr_mean[np.where(~np.eye(np_arr_mean.shape[0], dtype=bool))]) - self_play_std = np.std(np.diag(np_arr_mean)) / np.sqrt( - np_arr_mean.shape[0]) - cross_play_std = np.std( - np_arr_mean[np.where(~np.eye(np_arr_mean.shape[0], dtype=bool))] - ) / np.sqrt(np_arr_mean.shape[0]**2 - np_arr_mean.shape[0]) - print( - f'self play: {self_play} ± {self_play_std}, cross play: {cross_play} ± {cross_play_std}' - ) - - -if __name__ == '__main__': - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.23/srt_v10/17.02.40/23/srt_v10' - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12' - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12' - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12' - # 10000 on valid - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12' - # 10000 on train - # zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/4/srt_12' - zsc_path = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.06.01/srt_v27/17.35.33/123/srt_v27' - create_heat_map('train_zsc_goal.npy', - "Cross-play Goal Rate", - 'cross_play_heat_map.png', - white_switch=.8) - create_heat_map('train_zsc_collision.npy', - "Cross-play Collision Rate", - 'cross_play_collision_map.png', - white_switch=0.18) - compute_average_change('train_zsc_goal.npy') - compute_average_change('train_zsc_collision.npy') diff --git a/scripts/paper_plots/eval_il_agents.py b/scripts/paper_plots/eval_il_agents.py deleted file mode 100644 index 9f79ee26..00000000 --- a/scripts/paper_plots/eval_il_agents.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run script that generates summary statistics for a folder of IL agents.""" -import json -import os - -import numpy as np -import torch - -from nocturne.utils.eval.average_displacement import compute_average_displacement -from cfgs.config import PROCESSED_VALID_NO_TL, PROJECT_PATH - -if __name__ == '__main__': - outer_model_folder = '/checkpoint/eugenevinitsky/nocturne/sweep/imitation/2022.06.13/arxiv_il_v4_1kf/18.49.39' - models = [] - cfg_dicts = [] - for (dirpath, dirnames, filenames) in os.walk(outer_model_folder): - if 'configs.json' in filenames: - with open(os.path.join(dirpath, 'configs.json'), 'r') as file: - cfg_dict = json.load(file) - # now snag the model with the largest checkpoint - max_val = -100 - cur_model_name = None - for file in filenames: - if '.pth' in file: - checkpoint_val = int(file.split('.')[0].split('_')[-1]) - if checkpoint_val > max_val: - max_val = checkpoint_val - cur_model_name = file - cfg_dicts.append(cfg_dict) - model = torch.load(os.path.join(dirpath, cur_model_name)).to('cpu') - model.actions_grids = [x.to('cpu') for x in model.actions_grids] - model.eval() - model.nn[0].eval() - models.append(model) - results = np.zeros((len(cfg_dicts), 8)) - for i, (cfg_dict, model) in enumerate(zip(cfg_dicts, models)): - ade, fde, collisions, goals = compute_average_displacement( - PROCESSED_VALID_NO_TL, model=model, configs=cfg_dict) - results[i, 0] = ade[0] - results[i, 1] = ade[1] - results[i, 2] = fde[0] - results[i, 3] = fde[1] - results[i, 4] = collisions[0] - results[i, 5] = collisions[1] - results[i, 6] = goals[0] - results[i, 7] = goals[1] - np.save(os.path.join(PROJECT_PATH, 'scripts/paper_plots/il_results.npy'), - results) - print( - f'ade {np.mean(results[:, 0])} ± {np.std(results[:, 0]) / np.sqrt(results[:, 0].shape[0])}' - ) - print( - f'fde {np.mean(results[:, 2])} ± {np.std(results[:, 2]) / np.sqrt(results[:, 0].shape[0])}' - ) - print( - f'collisions {np.mean(results[:, 4])} ± {np.std(results[:, 4]) / np.sqrt(results[:, 0].shape[0])}' - ) - print( - f'goals {np.mean(results[:, 6])} ± {np.std(results[:, 6]) / np.sqrt(results[:, 0].shape[0])}' - ) diff --git a/scripts/paper_plots/eval_sample_factory.py b/scripts/paper_plots/eval_sample_factory.py deleted file mode 100644 index 601b4a71..00000000 --- a/scripts/paper_plots/eval_sample_factory.py +++ /dev/null @@ -1,1317 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Run a policy over the entire train set. - -TODO(ev) refactor, this is wildly similar to visualize_sample_factory -""" - -from copy import deepcopy -from collections import deque, defaultdict -import itertools -from itertools import repeat -import json -import multiprocessing as mp -import os -import sys - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import torch - -from sample_factory.algorithms.appo.actor_worker import transform_dict_observations -from sample_factory.algorithms.appo.learner import LearnerWorker -from sample_factory.algorithms.appo.model import create_actor_critic -from sample_factory.algorithms.appo.model_utils import get_hidden_size -from sample_factory.algorithms.utils.action_distributions import ContinuousActionDistribution, \ - CategoricalActionDistribution -from sample_factory.algorithms.utils.arguments import load_from_checkpoint -from sample_factory.algorithms.utils.multi_agent_wrapper import MultiAgentWrapper, is_multiagent_env -from sample_factory.envs.create_env import create_env -from sample_factory.utils.utils import log, AttrDict -from examples.sample_factory_files.run_sample_factory import register_custom_components - -from cfgs.config import PROCESSED_VALID_NO_TL, PROCESSED_TRAIN_NO_TL, \ - ERR_VAL, set_display_window - -CB_color_cycle = [ - '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', - '#999999', '#e41a1c', '#dede00' -] - - -class Bunch(object): - """Converts a dict into an object with the keys as attributes.""" - - def __init__(self, adict): - self.__dict__.update(adict) - - -def ccw(A, B, C): - """Blah.""" - return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] - A[0]) - - -def intersect(A, B, C, D): - """Check if two line segments AB and CD intersect.""" - return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) - - -def poly_intersection(poly1, poly2): - """Compute if two polylines intersect.""" - for i, p1_first_point in enumerate(poly1[:-1]): - p1_second_point = poly1[i + 1] - - for j, p2_first_point in enumerate(poly2[:-1]): - p2_second_point = poly2[j + 1] - - if intersect(p1_first_point, p1_second_point, p2_first_point, - p2_second_point): - return True - - return False - - -def run_rollouts(env, - cfg, - device, - expert_trajectory_dict, - distance_bins, - intersection_bins, - veh_intersection_dict, - actor_1, - actor_2=None): - """Run a single rollout. - - Args: - env (_type_): Env we are running. - cfg (dict): dictionary configuring the environment. - device (str): device you want to run the model on - expert_trajectory_dict (dict[str]: np.array): expert trajectories - keyed by ID - distance_bins (np.array): bins used to compute the goal - rate as a function of the starting distance from goal - intersection_bins (np.array): bins used to compute the - goal rate as a function of the number of intersections - between paths in the expert trajectories - veh_intersection_dict (dict[str]: np.array): dict mapping - a vehicle ID to the number of intersections it - experienced - actor_1: SampleFactory agent - actor_2: SampleFactory agent. Will be none unless we're testing for - ZSC - - Returns - ------- - avg_goal: average goal rate of agents - avg_collisions: average collision rate of agents - avg_veh_edge_collisions: average veh-edge collision rate - avg_veh_veh_collisions: average veh-veh collision rate - success_rate_by_distance: np.array(number of distance bins, 4) - where the row indexes how far the vehicle was from goal - at initialization and where the column index is - [goal rate, collision rate, veh-veh collision rate, counter of - number of vehicles in this bin] - success_rate_by_num_agents: np.array(maximum number of vehicles, 4) - where the row index is how many vehicles were in this episode - where the column index is [goal rate, collision rate, - veh-veh collision rate, counter of - number of vehicles in this bin] - success_rate_by_intersections: np.array(number of intersections, 4) - where the row index is how many intersections that vehicle - had and where the column index is [goal rate, collision rate, - veh-veh collision rate, counter of - number of vehicles in this bin] - np.mean(ades): mean average displacement error of all vehicles in the - episode - np.mean(fdes): mean final displacement error of all vehicles in the - episode - veh_counter(int): how many vehicles were in that episode - """ - episode_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)] - true_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)] - obs = env.reset() - rollout_traj_dict = defaultdict(lambda: np.zeros((80, 2))) - # some key information for tracking statistics - goal_dist = env.goal_dist_normalizers - valid_indices = env.valid_indices - agent_id_to_env_id_map = env.agent_id_to_env_id_map - env_id_to_agent_id_map = env.env_id_to_agent_id_map - - success_rate_by_num_agents = np.zeros((cfg.max_num_vehicles, 4)) - success_rate_by_distance = np.zeros((distance_bins.shape[-1], 4)) - success_rate_by_intersections = np.zeros((intersection_bins.shape[-1], 4)) - if actor_2 is not None: - # pick which valid indices go to which policy - val = np.random.uniform() - if val < 0.5: - num_choice = int(np.floor(len(valid_indices) / 2.0)) - else: - num_choice = int(np.ceil(len(valid_indices) / 2.0)) - indices_1 = list( - np.random.choice(valid_indices, num_choice, replace=False)) - indices_2 = [val for val in valid_indices if val not in indices_1] - rnn_states = torch.zeros( - [env.num_agents, get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - rnn_states_2 = torch.zeros( - [env.num_agents, get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - else: - rnn_states = torch.zeros( - [env.num_agents, get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - episode_reward = np.zeros(env.num_agents) - finished_episode = [False] * env.num_agents - goal_achieved = [False] * len(valid_indices) - collision_observed = [False] * len(valid_indices) - veh_veh_collision_observed = [False] * len(valid_indices) - veh_counter = 0 - - while not all(finished_episode): - with torch.no_grad(): - obs_torch = AttrDict(transform_dict_observations(obs)) - for key, x in obs_torch.items(): - obs_torch[key] = torch.from_numpy(x).to(device).float() - - # we have to make a copy before doing the pass - # because (for some reason), sample factory is making - # some changes to the obs in the forwards pass - # TBD what it is - if actor_2 is not None: - obs_torch_2 = deepcopy(obs_torch) - policy_outputs_2 = actor_2(obs_torch_2, - rnn_states_2, - with_action_distribution=True) - - policy_outputs = actor_1(obs_torch, - rnn_states, - with_action_distribution=True) - - # sample actions from the distribution by default - # also update the indices that should be drawn from the second policy - # with its outputs - actions = policy_outputs.actions - if actor_2 is not None: - actions[indices_2] = policy_outputs_2.actions[indices_2] - - action_distribution = policy_outputs.action_distribution - if isinstance(action_distribution, ContinuousActionDistribution): - if not cfg.continuous_actions_sample: # TODO: add similar option for discrete actions - actions = action_distribution.means - if actor_2 is not None: - actions[ - indices_2] = policy_outputs_2.action_distribution.means[ - indices_2] - if isinstance(action_distribution, CategoricalActionDistribution): - if not cfg.discrete_actions_sample: - actions = policy_outputs['action_logits'].argmax(axis=1) - if actor_2 is not None: - actions[indices_2] = policy_outputs_2[ - 'action_logits'].argmax(axis=1)[indices_2] - - actions = actions.cpu().numpy() - - for veh in env.unwrapped.get_objects_that_moved(): - # only check vehicles we are actually controlling - if veh.expert_control is False: - rollout_traj_dict[veh.id][ - env.step_num] = veh.position.numpy() - if int(veh.collision_type) == 1: - if veh.getID() in env_id_to_agent_id_map.keys(): - agent_id = env_id_to_agent_id_map[veh.getID()] - idx = valid_indices.index(agent_id) - veh_veh_collision_observed[idx] = 1 - - rnn_states = policy_outputs.rnn_states - if actor_2 is not None: - rnn_states_2 = policy_outputs_2.rnn_states - - obs, rew, done, infos = env.step(actions) - episode_reward += rew - - for i, index in enumerate(valid_indices): - goal_achieved[ - i] = infos[index]['goal_achieved'] or goal_achieved[i] - collision_observed[ - i] = infos[index]['collided'] or collision_observed[i] - - for agent_i, done_flag in enumerate(done): - if done_flag: - finished_episode[agent_i] = True - episode_rewards[agent_i].append(episode_reward[agent_i]) - true_rewards[agent_i].append(infos[agent_i].get( - 'true_reward', episode_reward[agent_i])) - log.info( - 'Episode finished for agent %d. Reward: %.3f, true_reward: %.3f', - agent_i, episode_reward[agent_i], - true_rewards[agent_i][-1]) - rnn_states[agent_i] = torch.zeros([get_hidden_size(cfg)], - dtype=torch.float32, - device=device) - episode_reward[agent_i] = 0 - - if all(finished_episode): - avg_episode_rewards_str, avg_true_reward_str = '', '' - for agent_i in range(env.num_agents): - avg_rew = np.mean(episode_rewards[agent_i]) - avg_true_rew = np.mean(true_rewards[agent_i]) - if not np.isnan(avg_rew): - if avg_episode_rewards_str: - avg_episode_rewards_str += ', ' - avg_episode_rewards_str += f'#{agent_i}: {avg_rew:.3f}' - if not np.isnan(avg_true_rew): - if avg_true_reward_str: - avg_true_reward_str += ', ' - avg_true_reward_str += f'#{agent_i}: {avg_true_rew:.3f}' - avg_goal = infos[0]['episode_extra_stats']['goal_achieved'] - avg_collisions = infos[0]['episode_extra_stats']['collided'] - avg_veh_edge_collisions = infos[0]['episode_extra_stats'][ - 'veh_edge_collision'] - avg_veh_veh_collisions = infos[0]['episode_extra_stats'][ - 'veh_veh_collision'] - success_rate_by_num_agents[len(valid_indices) - 1, - 0] += avg_goal - success_rate_by_num_agents[len(valid_indices) - 1, - 1] += avg_collisions - success_rate_by_num_agents[len(valid_indices) - 1, - 2] += np.mean( - veh_veh_collision_observed) - success_rate_by_num_agents[len(valid_indices) - 1, 3] += 1 - # track how well we do as a function of distance - for i, index in enumerate(valid_indices): - env_id = agent_id_to_env_id_map[index] - bin = np.searchsorted(distance_bins, goal_dist[env_id]) - success_rate_by_distance[bin - 1, :] += [ - goal_achieved[i], collision_observed[i], - veh_veh_collision_observed[i], 1 - ] - # track how well we do as number of intersections - for i, index in enumerate(valid_indices): - env_id = agent_id_to_env_id_map[index] - bin = min(veh_intersection_dict[env_id], - distance_bins.shape[-1] - 1) - success_rate_by_intersections[bin, :] += [ - goal_achieved[i], collision_observed[i], - veh_veh_collision_observed[i], 1 - ] - # compute ADE and FDE - ades = [] - fdes = [] - for agent_id, traj in rollout_traj_dict.items(): - masking_arr = traj.sum(axis=1) - mask = (masking_arr != 0.0) * (masking_arr != - traj.shape[1] * ERR_VAL) - expert_mask_arr = expert_trajectory_dict[agent_id].sum( - axis=1) - expert_mask = (expert_mask_arr != 0.0) * ( - expert_mask_arr != traj.shape[1] * ERR_VAL) - ade = np.linalg.norm(traj - - expert_trajectory_dict[agent_id], - axis=-1)[mask * expert_mask] - ades.append(ade.mean()) - fde = np.linalg.norm( - traj - expert_trajectory_dict[agent_id], - axis=-1)[np.max(np.argwhere(mask * expert_mask))] - fdes.append(fde) - veh_counter += 1 - - log.info('Avg episode rewards: %s, true rewards: %s', - avg_episode_rewards_str, avg_true_reward_str) - log.info( - 'Avg episode reward: %.3f, avg true_reward: %.3f', - np.mean([ - np.mean(episode_rewards[i]) - for i in range(env.num_agents) - ]), - np.mean([ - np.mean(true_rewards[i]) for i in range(env.num_agents) - ])) - - return (avg_goal, avg_collisions, avg_veh_edge_collisions, - avg_veh_veh_collisions, success_rate_by_distance, - success_rate_by_num_agents, - success_rate_by_intersections, np.mean(ades), - np.mean(fdes), veh_counter) - - -def run_eval(cfgs, - test_zsc, - output_path, - scenario_dir, - files, - file_type, - device='cuda'): - """Eval a stored agent over all files in validation set. - - Args: - cfg (dict): configuration file for instantiating the agents and environment. - test_zsc (bool): if true, we play all agents against all agents - num_file_loops (int): how many times to loop over the file set - - Returns - ------- - None: None - """ - actor_critics = [] - if not isinstance(cfgs, list): - cfgs = [cfgs] - for i, cfg in enumerate(cfgs): - if not isinstance(cfg, Bunch): - cfg = Bunch(cfg) - cfg = load_from_checkpoint(cfg) - - render_action_repeat = cfg.render_action_repeat if cfg.render_action_repeat is not None else cfg.env_frameskip - if render_action_repeat is None: - log.warning('Not using action repeat!') - render_action_repeat = 1 - log.debug('Using action repeat %d during evaluation', - render_action_repeat) - - cfg.env_frameskip = 1 # for evaluation - cfg.num_envs = 1 - # this config is used for computing displacement errors - ade_cfg = deepcopy(cfg) - ade_cfg['remove_at_goal'] = False - ade_cfg['remove_at_collide'] = False - - def make_env_func(env_config): - return create_env(cfg.env, cfg=cfg, env_config=env_config) - - env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0})) - env.seed(0) - - is_multiagent = is_multiagent_env(env) - if not is_multiagent: - env = MultiAgentWrapper(env) - - if hasattr(env.unwrapped, 'reset_on_init'): - # reset call ruins the demo recording for VizDoom - env.unwrapped.reset_on_init = False - - actor_critic = create_actor_critic(cfg, env.observation_space, - env.action_space) - - device = torch.device(device) - actor_critic.model_to_device(device) - - policy_id = cfg.policy_index - checkpoints = LearnerWorker.get_checkpoints( - LearnerWorker.checkpoint_dir(cfg, policy_id)) - checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device) - actor_critic.load_state_dict(checkpoint_dict['model']) - actor_critics.append([i, actor_critic]) - - # we bin the success rate into bins of 10 meters between 0 and 400 - # the second dimension is the counts - distance_bins = np.linspace(0, 400, 40) - intersections_bins = np.linspace(0, 7, 7) - num_files = cfg['num_eval_files'] - num_file_loops = cfg['num_file_loops'] - # TODO(eugenevinitsky) horrifying copy and paste - if test_zsc: - goal_array = np.zeros((len(actor_critics), len(actor_critics), - num_file_loops * num_files)) - collision_array = np.zeros((len(actor_critics), len(actor_critics), - num_files * num_file_loops)) - success_rate_by_num_agents = np.zeros( - (len(actor_critics), len(actor_critics), cfg.max_num_vehicles, 4)) - success_rate_by_distance = np.zeros( - (len(actor_critics), len(actor_critics), distance_bins.shape[-1], - 4)) - success_rate_by_intersections = np.zeros( - (len(actor_critics), len(actor_critics), - intersections_bins.shape[-1], 4)) - ade_array = np.zeros((len(actor_critics), len(actor_critics), - num_file_loops * num_files)) - fde_array = np.zeros((len(actor_critics), len(actor_critics), - num_file_loops * num_files)) - veh_veh_collision_array = np.zeros( - (len(actor_critics), len(actor_critics), - num_file_loops * num_files)) - veh_edge_collision_array = np.zeros( - (len(actor_critics), len(actor_critics), - num_file_loops * num_files)) - else: - goal_array = np.zeros((len(actor_critics), num_file_loops * num_files)) - collision_array = np.zeros( - (len(actor_critics), num_file_loops * num_files)) - veh_veh_collision_array = np.zeros( - (len(actor_critics), num_file_loops * num_files)) - veh_edge_collision_array = np.zeros( - (len(actor_critics), num_file_loops * num_files)) - success_rate_by_num_agents = np.zeros( - (len(actor_critics), cfg.max_num_vehicles, 4)) - success_rate_by_distance = np.zeros( - (len(actor_critics), distance_bins.shape[-1], 4)) - success_rate_by_intersections = np.zeros( - (len(actor_critics), intersections_bins.shape[-1], 4)) - ade_array = np.zeros((len(actor_critics), num_file_loops * num_files)) - fde_array = np.zeros((len(actor_critics), num_file_loops * num_files)) - - if test_zsc: - output_generator = itertools.product(actor_critics, actor_critics) - else: - output_generator = actor_critics - - for output in output_generator: - if test_zsc: - (index_1, actor_1), (index_2, actor_2) = output - else: - (index_1, actor_1) = output - goal_frac = [] - collision_frac = [] - veh_veh_collision_frac = [] - veh_edge_collision_frac = [] - average_displacement_error = [] - final_displacement_error = [] - veh_counter = 0 - for loop_num in range(num_file_loops): - for file_num, file in enumerate(files[0:cfg['num_eval_files']]): - print(loop_num * cfg['num_eval_files'] + file_num) - print('file is {}'.format(os.path.join(scenario_dir, file))) - - env.unwrapped.files = [os.path.join(scenario_dir, file)] - - # step the env to its conclusion to generate the expert trajectories we compare against - env.cfg = ade_cfg - env.reset() - expert_trajectory_dict = defaultdict(lambda: np.zeros((80, 2))) - env.unwrapped.make_all_vehicles_experts() - for i in range(80): - for veh in env.unwrapped.get_objects_that_moved(): - expert_trajectory_dict[ - veh.id][i] = veh.position.numpy() - env.unwrapped.simulation.step(0.1) - - # compute the number of expert trajectories that intersect - # while filtering out the bits of the trajectory - # that were invalid - vehs_with_intersecting_ids = defaultdict(int) - for veh_id in expert_trajectory_dict.keys(): - for veh_id2 in expert_trajectory_dict.keys(): - if veh_id == veh_id2: - continue - trajectory = expert_trajectory_dict[veh_id] - trajectory2 = expert_trajectory_dict[veh_id2] - expert_mask_arr = trajectory.sum(axis=1) - expert_mask = (expert_mask_arr != 0.0) * ( - expert_mask_arr != trajectory.shape[1] * ERR_VAL) - trajectory = trajectory[expert_mask] - expert_mask_arr = trajectory2.sum(axis=1) - expert_mask = (expert_mask_arr != 0.0) * ( - expert_mask_arr != trajectory2.shape[1] * ERR_VAL) - trajectory2 = trajectory2[expert_mask] - if poly_intersection(trajectory, trajectory2): - vehs_with_intersecting_ids[ - veh_id] += poly_intersection( - trajectory, trajectory2) - - env.cfg = cfg - if test_zsc: - output = run_rollouts(env, cfg, device, - expert_trajectory_dict, - distance_bins, intersections_bins, - vehs_with_intersecting_ids, actor_1, - actor_2) - else: - output = run_rollouts(env, cfg, device, - expert_trajectory_dict, - distance_bins, intersections_bins, - vehs_with_intersecting_ids, actor_1) - - avg_goal, avg_collisions, avg_veh_edge_collisions, avg_veh_veh_collisions, \ - success_rate_by_distance_return, success_rate_by_num_agents_return, \ - success_rate_by_intersections_return, \ - _, _, _ = output - # TODO(eugenevinitsky) hideous copy and pasting - goal_frac.append(avg_goal) - collision_frac.append(avg_collisions) - veh_veh_collision_frac.append(avg_veh_veh_collisions) - veh_edge_collision_frac.append(avg_veh_edge_collisions) - if test_zsc: - success_rate_by_distance[ - index_1, index_2] += success_rate_by_distance_return - success_rate_by_num_agents[ - index_1, index_2] += success_rate_by_num_agents_return - success_rate_by_intersections[ - index_1, - index_2] += success_rate_by_intersections_return - else: - success_rate_by_distance[ - index_1] += success_rate_by_distance_return - success_rate_by_num_agents[ - index_1] += success_rate_by_num_agents_return - success_rate_by_intersections[ - index_1] += success_rate_by_intersections_return - # do some logging - log.info( - f'Avg goal achieved {np.mean(goal_frac)}±{np.std(goal_frac) / len(goal_frac)}' - ) - log.info( - f'Avg veh-veh collisions {np.mean(veh_veh_collision_frac)}±\ - {np.std(veh_veh_collision_frac) / np.sqrt(len(veh_veh_collision_frac))}' - ) - log.info( - f'Avg veh-edge collisions {np.mean(veh_edge_collision_frac)}±\ - {np.std(veh_edge_collision_frac) / np.sqrt(len(veh_edge_collision_frac))}' - ) - log.info(f'Avg num collisions {np.mean(collision_frac)}±\ - {np.std(collision_frac) / len(collision_frac)}') - - env.cfg = ade_cfg - # okay, now run the rollout one more time but this time set - # remove_at_goal and remove_at_collide to be false so we can do the ADE computations - if test_zsc: - output = run_rollouts(env, cfg, device, - expert_trajectory_dict, - distance_bins, intersections_bins, - vehs_with_intersecting_ids, actor_1, - actor_2) - else: - output = run_rollouts(env, cfg, device, - expert_trajectory_dict, - distance_bins, intersections_bins, - vehs_with_intersecting_ids, actor_1) - - _, _, _, _, _, _, _, ade, fde, veh_counter = output - average_displacement_error.append(ade) - final_displacement_error.append(fde) - log.info(f'Avg ADE {np.mean(average_displacement_error)}±\ - {np.std(average_displacement_error) / np.sqrt(len(average_displacement_error))}' - ) - log.info(f'Avg FDE {np.mean(final_displacement_error)}±\ - {np.std(final_displacement_error) / np.sqrt(len(final_displacement_error))}' - ) - - if test_zsc: - goal_array[index_1, index_2] = goal_frac - collision_array[index_1, index_2] = collision_frac - veh_veh_collision_array[index_1, index_2] = veh_veh_collision_frac - veh_edge_collision_array[index_1, - index_2] = veh_edge_collision_frac - ade_array[index_1, index_2] = average_displacement_error - fde_array[index_1, index_2] = final_displacement_error - else: - goal_array[index_1] = goal_frac - collision_array[index_1] = collision_frac - veh_veh_collision_array[index_1] = veh_veh_collision_frac - veh_edge_collision_array[index_1] = veh_edge_collision_frac - ade_array[index_1] = average_displacement_error - fde_array[index_1] = final_displacement_error - - if test_zsc: - file_type += '_zsc' - np.save(os.path.join(output_path, '{}_goal.npy'.format(file_type)), - goal_array) - np.save(os.path.join(output_path, '{}_collision.npy'.format(file_type)), - collision_array) - np.save( - os.path.join(output_path, - '{}_veh_veh_collision.npy'.format(file_type)), - veh_veh_collision_array) - np.save( - os.path.join(output_path, - '{}_veh_edge_collision.npy'.format(file_type)), - veh_edge_collision_array) - np.save(os.path.join(output_path, '{}_ade.npy'.format(file_type)), - ade_array) - np.save(os.path.join(output_path, '{}_fde.npy'.format(file_type)), - fde_array) - with open( - os.path.join(output_path, - '{}_success_by_veh_number.npy'.format(file_type)), - 'wb') as f: - np.save(f, success_rate_by_num_agents) - with open( - os.path.join(output_path, - '{}_success_by_dist.npy'.format(file_type)), - 'wb') as f: - np.save(f, success_rate_by_distance) - with open( - os.path.join( - output_path, - '{}_success_by_num_intersections.npy'.format(file_type)), - 'wb') as f: - np.save(f, success_rate_by_intersections) - - env.close() - - return - - -def load_wandb(experiment_name, cfg_filter, force_reload=False): - """Pull the results from the wandb server. - - Args: - ---- - experiment_name (str): name of the wandb group. - cfg_filter (function): use the config dict to filter - which runs are actually kept - force_reload (bool, optional): if true we overwrite - the wandb csv - even if it exists. - """ - if not os.path.exists( - 'wandb_{}.csv'.format(experiment_name)) or force_reload: - import wandb - - api = wandb.Api() - entity, project = "eugenevinitsky", "nocturne4" # set to your entity and project - runs = api.runs(entity + "/" + project) - - history_list = [] - for run in runs: - if run.name == experiment_name: - - # # .config contains the hyperparameters. - # # We remove special values that start with _. - config = { - k: v - for k, v in run.config.items() if not k.startswith('_') - } - if cfg_filter(config): - history_df = run.history() - history_df['seed'] = config['seed'] - history_df['num_files'] = config['num_files'] - history_list.append(history_df) - - runs_df = pd.concat(history_list) - runs_df.to_csv('wandb_{}.csv'.format(experiment_name)) - - -def plot_goal_achieved(experiment_name, global_step_cutoff=3e9): - """Use the WANDB CSV to plot number of train steps v. goal achieved.""" - plt.figure(dpi=300) - df = pd.read_csv("wandb_{}.csv".format(experiment_name)) - df["timestamp"] = pd.to_datetime(df["_timestamp"] * 1e9) - - # technically not correct if the number of seeds varies by num_files - # but in this case we're alright - num_seeds = len(np.unique(df.seed.values)) - - values_num_files = np.unique(df.num_files.values) - column = "0_aux/avg_goal_achieved" - dfs = [] - stdevs = [] - for num_files in values_num_files: - if num_files == 1: - continue - - df_n = df[(df.num_files == num_files) - & (df.global_step < global_step_cutoff)].set_index( - 'global_step').sort_index() - if num_files == -1: - col_name = 134453 - else: - col_name = num_files - dfs.append((df_n[column] * 100).ewm( - halflife=500, - min_periods=10).mean().rename(f"num_files={col_name}")) - stdevs.append((df_n[column] * 100).ewm(halflife=500, - min_periods=10).std()) - - values_num_files = [ - val if val != -1 else 134453 for val in values_num_files - ] - temp = list(zip(values_num_files, dfs, stdevs)) - temp = sorted(temp, key=lambda x: x[0]) - values_num_files, dfs, stdevs = zip(*temp) - ax = plt.gca() - for i in range(len(dfs)): - x = dfs[i].index.values - y = dfs[i].values - yerr = stdevs[i].replace(np.nan, 0) / np.sqrt(num_seeds) - ax.plot(x, - y, - label=f'Training Files: {values_num_files[i]}', - color=CB_color_cycle[i]) - ax.fill_between(x, - y - 2 * yerr, - y + 2 * yerr, - color=CB_color_cycle[i], - alpha=0.3) - plt.grid(ls='--', color='#ccc') - plt.legend() - plt.xlabel("Environment step") - plt.ylabel("% Goals Achieved") - plt.savefig('goal_achieved_v_step', bbox_inches='tight', pad_inches=0.1) - - -def eval_generalization(output_folder, - num_eval_files, - files, - file_type, - scenario_dir, - num_file_loops, - test_zsc=False, - cfg_filter=None): - """Evaluate generalization for all agent checkpoints in output_folder. - - Args: - ---- - output_folder (str): path to folder containing agent checkpoints - num_eval_files (int): how many files to use for eval - files (list[str]): list of scenario files to use for eval - file_type (str): 'train' or 'test' used to indicate if we are - testing in or out of distribution - scenario_dir (str): path to directory where `files` are stored - num_file_loops (int): how many times to iterate over the files. - Used for in-distribution testing if - in-distribution we trained on M files - but we want to test over N files where - N > M. - test_zsc (bool, optional): If true we pair up ever - agent in the folder and compute - all the cross-play scores. Defaults to False. - cfg_filter (_type_, optional): function used to filter over - whether eval should actually be done on that - agent. Filters using the agent config dict. - """ - file_paths = [] - cfg_dicts = [] - for (dirpath, dirnames, filenames) in os.walk(output_folder): - if 'cfg.json' in filenames: - with open(os.path.join(dirpath, 'cfg.json'), 'r') as file: - cfg_dict = json.load(file) - - if cfg_filter is not None and not cfg_filter(cfg_dict): - continue - file_paths.append(dirpath) - cfg_dict['cli_args'] = {} - cfg_dict['fps'] = 0 - cfg_dict['render_action_repeat'] = None - cfg_dict['no_render'] = None - cfg_dict['policy_index'] = 0 - cfg_dict['record_to'] = os.path.join(os.getcwd(), '..', 'recs') - cfg_dict['continuous_actions_sample'] = False - cfg_dict['discrete_actions_sample'] = False - # for the train set, we don't want to loop over - # files we didn't train on - # also watch out for -1 which means "train on all files" - if cfg_dict[ - 'num_files'] < num_eval_files and 'train' in file_type and cfg_dict[ - 'num_files'] != -1: - cfg_dict['num_eval_files'] = cfg_dict['num_files'] - cfg_dict['num_file_loops'] = num_file_loops * int( - max(num_eval_files // cfg_dict['num_files'], 1)) - else: - cfg_dict['num_eval_files'] = num_eval_files - cfg_dict['num_file_loops'] = num_file_loops - cfg_dicts.append(cfg_dict) - if test_zsc: - # TODO(eugenevinitsky) we're currently storing the ZSC result in a random - # folder which seems bad. - run_eval([Bunch(cfg_dict) for cfg_dict in cfg_dicts], - test_zsc=test_zsc, - output_path=file_paths[0], - scenario_dir=scenario_dir, - files=files, - file_type=file_type) - print('stored ZSC result in {}'.format(file_paths[0])) - else: - # why 13? because a 16 GB GPU can do a forwards pass on 13 copies of the model - # for 20 vehicles at once. More than that and you'll run out of memory - num_cpus = min(13, mp.cpu_count() - 2) - device = 'cuda' - # if torch.cuda.is_available(): - # device = 'cuda' - # else: - # device = 'cpu' - with mp.Pool(processes=num_cpus) as pool: - list( - pool.starmap( - run_eval, - zip(cfg_dicts, repeat(test_zsc), file_paths, - repeat(scenario_dir), repeat(files), repeat(file_type), - repeat(device)))) - print(file_paths) - - -def main(): - """Script entry point.""" - set_display_window() - register_custom_components() - RUN_EVAL = False - TEST_ZSC = False - PLOT_RESULTS = True - RELOAD_WANDB = False - VERSION = 5 - NUM_EVAL_FILES = 200 - NUM_FILE_LOOPS = 1 # the number of times to loop over a fixed set of files - experiment_names = ['srt_v27'] - # output_folder = '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.20/new_road_sample/18.32.35' - # output_folder = [ - # '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.23/srt_v10/17.02.40/' - # ] - # 10 files - # output_folder = [ - # '/checkpoint/eugenevinitsky/nocturne/sweep/2022.05.28/srt_12/16.43.16/' - # ] - # SRT submission results - output_folder = [ - '/checkpoint/eugenevinitsky/nocturne/sweep/2022.06.01/srt_v27/17.35.33' - ] - generalization_dfs = [] - - cfg_filter = None - - if TEST_ZSC: - - def cfg_filter(cfg_dict): - if cfg_dict['scenario']['road_edge_first'] is False and cfg_dict[ - 'scenario']['max_visible_road_points'] == 500 and cfg_dict[ - 'algorithm']['encoder_hidden_size'] == 256 and cfg_dict[ - 'num_files'] == 10000: - return True - else: - return False - else: - - def cfg_filter(cfg_dict): - if cfg_dict['scenario']['road_edge_first'] is False and cfg_dict[ - 'scenario']['max_visible_road_points'] == 500 and cfg_dict[ - 'algorithm']['encoder_hidden_size'] == 256: - return True - else: - return False - - ''' - ############################################################################### - ######### Build the generalization dataframes ###################### - ############################################################################## - ''' - - if RUN_EVAL: - if TEST_ZSC: - output_generator = [(PROCESSED_VALID_NO_TL, - 'test_{}'.format(VERSION))] - else: - output_generator = [ - (PROCESSED_TRAIN_NO_TL, 'train_{}'.format(VERSION)), - (PROCESSED_VALID_NO_TL, 'test_{}'.format(VERSION)) - ] - - for file_path, file_type in output_generator: - with open(os.path.join(file_path, 'valid_files.json')) as file: - valid_veh_dict = json.load(file) - files = list(valid_veh_dict.keys()) - if file_type == 'test_{}'.format(VERSION): - # sort the files so that we have a consistent order - np.random.seed(0) - np.random.shuffle(files) - if file_type == 'train_{}'.format(VERSION): - # for train make sure we use the same ordering - # that is used in base_env - # TODO(eugenevinitsky) this is dangerous and could - # break easily - files = sorted(files) - for folder in output_folder: - eval_generalization(folder, - NUM_EVAL_FILES, - files, - file_type=file_type, - scenario_dir=file_path, - num_file_loops=NUM_FILE_LOOPS, - test_zsc=TEST_ZSC, - cfg_filter=cfg_filter) - - if PLOT_RESULTS: - # okay, now build a pandas dataframe of the results that we will use for plotting - # the generalization results - for folder in output_folder: - for file_type in [ - 'train_{}'.format(VERSION), 'test_{}'.format(VERSION) - # 'train', - # 'test' - ]: - file_paths = [] - data_dicts = [] - for (dirpath, dirnames, filenames) in os.walk(folder): - if 'cfg.json' in filenames: - file_paths.append(dirpath) - with open(os.path.join(dirpath, 'cfg.json'), - 'r') as file: - cfg_dict = json.load(file) - if cfg_filter(cfg_dict): - # TODO(eugenevinitsky) why do they not all have this? - goal = np.mean( - np.load( - os.path.join( - dirpath, - '{}_goal.npy'.format(file_type)))) - collide = np.mean( - np.load( - os.path.join( - dirpath, - '{}_collision.npy'.format(file_type)))) - ade = np.mean( - np.load( - os.path.join( - dirpath, - '{}_ade.npy'.format(file_type)))) - fde = np.mean( - np.load( - os.path.join( - dirpath, - '{}_fde.npy'.format(file_type)))) - veh_veh_collision = np.mean( - np.load( - os.path.join( - dirpath, - '{}_veh_veh_collision.npy'.format( - file_type)))) - veh_edge_collision = np.mean( - np.load( - os.path.join( - dirpath, - '{}_veh_edge_collision.npy'.format( - file_type)))) - success_by_num_intersections = np.load( - os.path.join( - dirpath, - '{}_success_by_num_intersections.npy'. - format(file_type))) - # there aren't a lot of data points past 3 - # so just bundle them in - success_by_num_intersections[:, - 3, :] = success_by_num_intersections[:, 3:, :].sum( - axis=1) - success_by_num_intersections = success_by_num_intersections[:, - 0: - 4, :] - success_by_veh_num = np.load( - os.path.join( - dirpath, - '{}_success_by_veh_number.npy'.format( - file_type))) - success_by_distance = np.load( - os.path.join( - dirpath, '{}_success_by_dist.npy'.format( - file_type))) - num_files = cfg_dict['num_files'] - if int(num_files) == -1: - num_files = 134453 - if int(num_files) == 1: - continue - data_dicts.append({ - 'num_files': - num_files, - 'goal_rate': - goal * 100, - 'collide_rate': - collide * 100, - 'ade': - ade, - 'fde': - fde, - 'veh_veh_collision': - veh_veh_collision, - 'veh_edge_collision': - veh_edge_collision, - 'goal_by_intersections': - np.nan_to_num( - success_by_num_intersections[0, :, 0] / - success_by_num_intersections[0, :, 3]), - 'collide_by_intersections': - np.nan_to_num( - success_by_num_intersections[0, :, 1] / - success_by_num_intersections[0, :, 3]), - 'goal_by_vehicle_num': - np.nan_to_num(success_by_veh_num[0, :, 0] / - success_by_veh_num[0, :, 3]), - 'collide_by_vehicle_num': - np.nan_to_num(success_by_veh_num[0, :, 1] / - success_by_veh_num[0, :, 3]), - 'goal_by_distance': - np.nan_to_num(success_by_distance[0, :, 0] / - success_by_distance[0, :, 3]), - 'collide_by_distance': - np.nan_to_num(success_by_distance[0, :, 1] / - success_by_distance[0, :, 3]), - }) - if cfg_dict['num_files'] == 10000: - print('goal ', - success_by_num_intersections[0, :, 0]) - print('num vehicles in bin', - success_by_num_intersections[0, :, 3]) - df = pd.DataFrame(data_dicts) - new_dict = {} - for key in data_dicts[0].keys(): - if key == 'num_files': - continue - new_dict[key] = df.groupby(['num_files' - ])[key].mean().reset_index() - try: - new_dict[key + '_std'] = df.groupby( - ['num_files'])[key].std().reset_index().rename( - columns={key: key + '_std'}) - except ValueError: - # TODO(eugenevinitsky) learn to use pandas dawg - # what even is this - temp_dict = {} - for name, group in df.groupby(['num_files'])[key]: - temp = [] - for arr in group: - temp.append(arr) - np_arr = np.vstack(temp) - std_err = np.std(np_arr, axis=0) / np.sqrt( - np_arr.shape[0]) - temp_dict[name] = std_err - new_dict[key + '_stderr'] = pd.Series( - data=temp_dict).reset_index().rename( - columns={ - 'index': 'num_files', - 0: key + '_stderr' - }) - first_elem_key = 'goal_rate' - first_elem = new_dict[first_elem_key] - for key, value in new_dict.items(): - if key == first_elem_key: - continue - first_elem = first_elem.merge(value, - how='inner', - on='num_files') - generalization_dfs.append(first_elem) - ''' - ############################################################################### - ######### load the training dataframes from wandb ###################### - ############################################################################## - ''' - global_step_cutoff = 3e9 - training_dfs = [] - for experiment_name in experiment_names: - load_wandb(experiment_name, cfg_filter, force_reload=RELOAD_WANDB) - training_dfs.append( - pd.read_csv('wandb_{}.csv'.format(experiment_name))) - - num_seeds = len(np.unique(training_dfs[0].seed)) - # create the goal plot - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - plt.plot(np.log10(df.num_files), - df.goal_rate, - color=CB_color_cycle[i], - label=file_type) - ax = plt.gca() - yerr = df.goal_rate_std.replace(np.nan, 0) / np.sqrt(num_seeds) - ax.fill_between(np.log10(df.num_files), - df.goal_rate - 2 * yerr, - df.goal_rate + 2 * yerr, - color=CB_color_cycle[i], - alpha=0.3) - print(f'{file_type} goal rate', df.goal_rate, yerr) - plt.ylim([0, 100]) - plt.xlabel(' Number of Training Files (Logarithmic Scale)') - plt.ylabel('% Goals Achieved') - plt.legend() - plt.savefig('goal_achieved.png', bbox_inches='tight', pad_inches=0.1) - - # create the collide plot - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - plt.plot(np.log10(df.num_files), - df.collide_rate, - color=CB_color_cycle[i], - label=file_type) - ax = plt.gca() - yerr = df.collide_rate_std.replace(np.nan, 0) / np.sqrt(num_seeds) - ax.fill_between(np.log10(df.num_files), - df.collide_rate - 2 * yerr, - df.collide_rate + 2 * yerr, - color=CB_color_cycle[i], - alpha=0.3) - print(f'{file_type} collide rate', df.collide_rate, yerr) - plt.ylim([0, 50]) - plt.xlabel(' Number of Training Files (Logarithmic Scale)') - plt.ylabel('% Vehicles Collided') - plt.legend() - plt.savefig('collide_rate.png', bbox_inches='tight', pad_inches=0.1) - - # create ADE and FDE plots - - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - yerr = df.ade_std.replace(np.nan, 0) / np.sqrt(num_seeds) - plt.plot(np.log10(df.num_files), - df.ade, - label=file_type, - color=CB_color_cycle[i]) - ax = plt.gca() - ax.fill_between(np.log10(df.num_files), - df.ade - 2 * yerr, - df.ade + 2 * yerr, - color=CB_color_cycle[i], - alpha=0.3) - print(f'{file_type} ade', df.ade, yerr) - plt.xlabel(' Number of Training Files (Logarithmic Scale)') - plt.ylabel('Average Displacement Error (m)') - plt.ylim([0, 5]) - plt.legend() - plt.savefig('ade.png', bbox_inches='tight', pad_inches=0.1) - - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - yerr = df.fde_std.replace(np.nan, 0) / np.sqrt(num_seeds) - plt.plot(np.log10(df.num_files), - df.fde, - label=file_type, - color=CB_color_cycle[i]) - ax = plt.gca() - ax.fill_between(np.log10(df.num_files), - df.fde - 2 * yerr, - df.fde + 2 * yerr, - color=CB_color_cycle[i], - alpha=0.3) - print(f'{file_type} fde', df.fde, yerr) - plt.ylim([4, 10]) - plt.xlabel(' Number of Training Files (Logarithmic Scale)') - plt.ylabel('Final Displacement Error (m)') - plt.legend() - plt.savefig('fde.png', bbox_inches='tight', pad_inches=0.1) - plot_goal_achieved(experiment_names[0], global_step_cutoff) - - # create error by number of expert intersections plots - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - values_num_files = np.unique(df.num_files.values) - print(values_num_files) - for value in values_num_files: - if value != 10000: - continue - numpy_arr = df[df.num_files == - value]['goal_by_intersections'].to_numpy()[0] - temp_df = pd.DataFrame(numpy_arr).melt() - plt.plot(temp_df.index, - temp_df.value * 100, - label=file_type, - color=CB_color_cycle[i]) - numpy_arr = df[df.num_files == value][ - 'goal_by_intersections_stderr'].to_numpy()[0] - std_err_df = pd.DataFrame(numpy_arr).melt() - ax = plt.gca() - ax.fill_between(temp_df.index, - 100 * (temp_df.value - 2 * std_err_df.value), - 100 * (temp_df.value + 2 * std_err_df.value), - color=CB_color_cycle[i], - alpha=0.3) - - plt.xlabel('Number of intersecting paths') - plt.ylabel('Percent Goals Achieved') - ax.set_xticks([i for i in range(numpy_arr.shape[-1])]) - plt.legend() - plt.savefig('goal_v_intersection.png', - bbox_inches='tight', - pad_inches=0.1) - - # create error by number of expert intersections plots - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - values_num_files = np.unique(df.num_files.values) - for value in values_num_files: - if value != 10000: - continue - numpy_arr = df[df.num_files == - value]['collide_by_intersections'].to_numpy()[0] - temp_df = pd.DataFrame(numpy_arr).melt() - plt.plot(temp_df.index, - temp_df.value * 100, - color=CB_color_cycle[i], - label=file_type) - numpy_arr = df[df.num_files == value][ - 'collide_by_intersections_stderr'].to_numpy()[0] - std_err_df = pd.DataFrame(numpy_arr).melt() - ax = plt.gca() - ax.fill_between(temp_df.index, - 100 * (temp_df.value - 2 * std_err_df.value), - 100 * (temp_df.value + 2 * std_err_df.value), - color=CB_color_cycle[i], - alpha=0.3) - plt.xlabel('Number of Intersecting Paths') - plt.ylabel('Percent Collisions') - ax.set_xticks([i for i in range(numpy_arr.shape[-1])]) - plt.legend() - plt.savefig('collide_v_intersection.png', - bbox_inches='tight', - pad_inches=0.1) - - # create error by number of vehicles plots - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - values_num_files = np.unique(df.num_files.values) - print(values_num_files) - for value in values_num_files: - if value != 10000: - continue - numpy_arr = df[df.num_files == - value]['goal_by_vehicle_num'].to_numpy()[0] - temp_df = pd.DataFrame(numpy_arr).melt() - plt.plot(temp_df.index, - temp_df.value * 100, - label=file_type, - color=CB_color_cycle[i]) - numpy_arr = df[df.num_files == value][ - 'goal_by_vehicle_num_stderr'].to_numpy()[0] - std_err_df = pd.DataFrame(numpy_arr).melt() - ax = plt.gca() - ax.fill_between(temp_df.index, - 100 * (temp_df.value - 2 * std_err_df.value), - 100 * (temp_df.value + 2 * std_err_df.value), - color=CB_color_cycle[i], - alpha=0.3) - # sns.lineplot(x=temp_df.index, y=temp_df.value * 100) - plt.xlabel('Number of Controlled Vehicles') - plt.ylabel('Percent Goals Achieved') - ax.set_xticks([i for i in range(numpy_arr.shape[-1])]) - plt.legend() - plt.savefig('goal_v_vehicle_num.png', - bbox_inches='tight', - pad_inches=0.1) - - # create error by distance plots - plt.figure(dpi=300) - for i, (df, file_type) in enumerate( - zip(generalization_dfs, ['Train', 'Test'])): - values_num_files = np.unique(df.num_files.values) - print(values_num_files) - for value in values_num_files: - if value != 10000: - continue - numpy_arr = df[df.num_files == - value]['goal_by_distance'].to_numpy()[0] - temp_df = pd.DataFrame(numpy_arr).melt() - plt.plot(temp_df.index, - temp_df.value * 100, - label=file_type, - color=CB_color_cycle[i]) - numpy_arr = df[df.num_files == - value]['goal_by_distance_stderr'].to_numpy()[0] - std_err_df = pd.DataFrame(numpy_arr).melt() - ax = plt.gca() - ax.fill_between(temp_df.index, - 100 * (temp_df.value - 2 * std_err_df.value), - 100 * (temp_df.value + 2 * std_err_df.value), - color=CB_color_cycle[i], - alpha=0.3) - # sns.lineplot(x=temp_df.index, y=temp_df.value * 100) - plt.xlabel('Starting Distance to Goal') - plt.ylabel('Percent Goals Achieved') - ax.set_xticks([i for i in range(numpy_arr.shape[-1])]) - plt.legend() - plt.savefig('goal_v_distance.png', bbox_inches='tight', pad_inches=0.1) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/scripts/paper_plots/generate_scenes.py b/scripts/paper_plots/generate_scenes.py deleted file mode 100644 index 985942ea..00000000 --- a/scripts/paper_plots/generate_scenes.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Example of how to make movies of Nocturne scenarios.""" -import hydra -import imageio -import matplotlib.pyplot as plt -import numpy as np -import os - -from cfgs.config import PROCESSED_TRAIN_NO_TL, PROJECT_PATH, \ - get_scenario_dict, set_display_window -from nocturne import Simulation - - -def get_sim(scenario_file, cfg): - """Initialize the scenario.""" - # load scenario, set vehicles to be expert-controlled - cfg['scenario']['allow_non_vehicles'] = False - sim = Simulation(scenario_path=str(scenario_file), - config=get_scenario_dict(cfg)) - for obj in sim.getScenario().getObjectsThatMoved(): - obj.expert_control = True - return sim - - -def make_movie(sim, - scenario_fn, - output_path='./vid.mp4', - dt=0.1, - steps=90, - fps=10): - """Make a movie from the scenario.""" - scenario = sim.getScenario() - movie_frames = [] - timestep = 0 - movie_frames.append(scenario_fn(scenario, timestep)) - for i in range(steps): - sim.step(dt) - timestep += 1 - movie_frames.append(scenario_fn(scenario, timestep)) - movie_frames = np.stack(movie_frames, axis=0) - imageio.mimwrite(output_path, movie_frames, fps=fps) - print('>', output_path) - del sim - del movie_frames - - -def make_image(sim, scenario_file, scenario_fn, output_path='./img.png'): - """Make a single image from the scenario.""" - scenario = sim.getScenario() - img = scenario_fn(scenario) - dpi = 100 - height, width, depth = img.shape - figsize = width / dpi, height / dpi - plt.figure(figsize=figsize, dpi=dpi) - plt.axis('off') - plt.imshow(img) - plt.savefig(output_path, bbox_inches='tight', pad_inches=0) - print('>', output_path) - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def main(cfg): - """See file docstring.""" - set_display_window() - - # files = ['tfrecord-00358-of-01000_{}.json'.format(i) for i in range(500)] - - files = [ - 'tfrecord-00358-of-01000_60.json', # unprotected turn - 'tfrecord-00358-of-01000_72.json', # four way stop - 'tfrecord-00358-of-01000_257.json', # crowded four way stop - 'tfrecord-00358-of-01000_332.json', # crowded merge road - 'tfrecord-00358-of-01000_79.json', # crowded parking lot - ] - for file in files: - file = os.path.join(PROCESSED_TRAIN_NO_TL, file) - sim = get_sim(file, cfg) - if os.path.exists(file): - # image of whole scenario - # make_image( - # sim, - # file, - # scenario_fn=lambda scenario: scenario.getImage( - # img_width=2000, - # img_height=2000, - # padding=50.0, - # draw_target_positions=True, - # ), - # output_path=PROJECT_PATH / - # 'scripts/paper_plots/figs/scene_{}.png'.format( - # os.path.basename(file)), - # ) - - veh_index = -3 - make_image( - sim, - file, - scenario_fn=lambda scenario: scenario.getImage( - img_height=1600, - img_width=1600, - draw_target_positions=True, - padding=0.0, - source=scenario.getVehicles()[veh_index], - view_height=80, - view_width=80, - rotate_with_source=True, - ), - output_path=PROJECT_PATH / - 'scripts/paper_plots/figs/cone_original_{}.png'.format( - os.path.basename(file)), - ) - make_image( - sim, - file, - scenario_fn=lambda scenario: scenario.getConeImage( - source=scenario.getVehicles()[veh_index], - view_dist=cfg['subscriber']['view_dist'], - view_angle=cfg['subscriber']['view_angle'], - head_angle=0.0, - img_height=1600, - img_width=1600, - padding=0.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / - 'scripts/paper_plots/figs/cone_{}.png'.format( - os.path.basename(file)), - ) - make_image( - sim, - file, - scenario_fn=lambda scenario: scenario.getFeaturesImage( - source=scenario.getVehicles()[veh_index], - view_dist=cfg['subscriber']['view_dist'], - view_angle=cfg['subscriber']['view_angle'], - head_angle=0.0, - img_height=1600, - img_width=1600, - padding=0.0, - draw_target_position=True, - ), - output_path=PROJECT_PATH / - 'scripts/paper_plots/figs/feature_{}.png'.format( - os.path.basename(file)), - ) - - -if __name__ == '__main__': - main() diff --git a/scripts/utils.py b/scripts/utils.py deleted file mode 100644 index 21be3246..00000000 --- a/scripts/utils.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Storage for SLURM running utilities.""" - - -class Overrides(object): - """Utility class used to convert commands into a bash runnable string.""" - - def __init__(self): - """Initialize class.""" - self.kvs = dict() - - def add(self, key, values): - """Add each of the desired key value pairs into a dict.""" - value = ','.join(str(v) for v in values) - assert key not in self.kvs - self.kvs[key] = value - - def cmd(self): - """Append the keys together into a command that can be run.""" - cmd = [] - for k, v in self.kvs.items(): - cmd.append(f'{k}={v}') - return cmd diff --git a/scripts/visualization/visualize_waymo_map.py b/scripts/visualization/visualize_waymo_map.py deleted file mode 100644 index 07f6c191..00000000 --- a/scripts/visualization/visualize_waymo_map.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Plot the text file representation of a protobuf.""" -import matplotlib.patches as mpatches -import matplotlib.pyplot as plt -import pprint - -pp = pprint.PrettyPrinter() - -data = {} - -current = data -file = 'output.txt' -show_tracks = True -parent_keys = [] -with open(file, 'r') as f: - lines = f.read().split('\n') - for line in lines: - # print(line) - if ":" in line: - k, v = [x.strip() for x in line.split(':')] - if k in current: - current[k].append(v) - else: - current[k] = [v] - elif "{" in line: - k = line[:-1].strip() - if k not in current: - current[k] = [] - parent_keys.append(k) - current[k].append({}) - current = current[k][-1] - elif "}" in line: - current = data - for k in parent_keys[:-1]: - current = current[k][-1] - parent_keys = parent_keys[:-1] - else: - pass - -# message Scenario: -# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/scenario.proto -print('\nScenario') -print(data.keys()) - -# message Track, message ObjectState: -# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/scenario.proto -print('\nObjects (vehicles, pedestrians, cyclists..)') -print(len(data['tracks'])) -print(data['tracks'][0].keys()) -print(len(data['tracks'][0]['states'])) -print(data['tracks'][0]['states'][0].keys()) - -# message MapFeature: -# https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/map.proto -print('\nMap (roads, lanes..)') -print(len(data['map_features'])) -print(data['map_features'][0].keys()) - -# supported values are '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted' -fig = plt.figure(figsize=(20, 20)) - -for mf in data['map_features']: - k = list(mf.keys())[1] - assert len(mf[k]) == 1 - v = mf[k][0] - - if k == 'lane': - xs = [] - ys = [] - for pt in v['polyline']: - xs.append(float(pt['x'][0])) - ys.append(float(pt['y'][0])) - plt.plot(xs, ys, color='cyan', linewidth=1) - - elif k == 'road_line': - edge_type = v['type'][0] - # linestyle = 'solid' if edge_type == 'TYPE_ROAD_EDGE_BOUNDARY' else 'dashdot' - # print(edge_type) - - xs = [] - ys = [] - for pt in v['polyline']: - xs.append(float(pt['x'][0])) - ys.append(float(pt['y'][0])) - plt.plot(xs, ys, color='orange') - - elif k == 'road_edge': - edge_type = v['type'][0] - linestyle = 'solid' if edge_type == 'TYPE_ROAD_EDGE_BOUNDARY' else 'dashdot' - - xs = [] - ys = [] - for pt in v['polyline']: - xs.append(float(pt['x'][0])) - ys.append(float(pt['y'][0])) - plt.plot(xs, ys, color='black', linestyle=linestyle) - - elif k == 'stop_sign': - pos = v['position'][0] - plt.plot(float(pos['x'][0]), float(pos['y'][0]), 'ro') - - elif k == 'crosswalk': - xs = [] - ys = [] - for pt in v['polygon']: - xs.append(float(pt['x'][0])) - ys.append(float(pt['y'][0])) - plt.plot(xs, ys, color='purple', linestyle=linestyle) - - elif k == 'speed_bump': - xs = [] - ys = [] - for pt in v['polygon']: - xs.append(float(pt['x'][0])) - ys.append(float(pt['y'][0])) - plt.plot(xs, ys, color='green', linestyle=linestyle) - - else: - print('Error with key', k) - -if show_tracks: - img_arr = [] - - from celluloid import Camera - camera = Camera(plt.gcf()) - ax = plt.gca() - # in range(len(data['tracks'][0]['states'])): - for i in range(20): - for object in data['tracks']: - if object['states'][i]['valid'][0] != 'false': - plt.scatter(float(object['states'][i]['center_x'][0]), - float(object['states'][i]['center_y'][0]), - c='blue', - s=40) - # TODO(eugenevinitsky) this is a horrible way of copying over the figure - lines = list(ax.get_lines()) - for obj in lines: - plt.plot(obj.get_data()[0], obj.get_data()[1]) - camera.snap() - animation = camera.animate() - animation.save('animation.mp4') - -patches = [] -patches.append(mpatches.Patch(color='cyan', label='lane_center')) -patches.append(mpatches.Patch(color='orange', label='road_line')) -patches.append(mpatches.Patch(color='black', label='road_edge')) -patches.append(mpatches.Patch(color='red', label='stop_sign')) -patches.append(mpatches.Patch(color='purple', label='crosswalk')) -patches.append(mpatches.Patch(color='green', label='speedbump')) -plt.legend(handles=patches) - -plt.savefig(file.split('.')[0] + '.png') diff --git a/scripts/visualization/waymo_movie.py b/scripts/visualization/waymo_movie.py deleted file mode 100644 index c20ac6c1..00000000 --- a/scripts/visualization/waymo_movie.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Make a movie from a random file.""" -import os - -import hydra -import imageio -import matplotlib.pyplot as plt -import numpy as np - -from cfgs.config import PROCESSED_TRAIN_NO_TL, get_scenario_dict, set_display_window -from nocturne import Simulation - - -@hydra.main(config_path="../../cfgs/", config_name="config") -def main(cfg): - """See file docstring.""" - set_display_window() - _ = plt.figure() - files = os.listdir(PROCESSED_TRAIN_NO_TL) - file = os.path.join(PROCESSED_TRAIN_NO_TL, - files[np.random.randint(len(files))]) - sim = Simulation(file, get_scenario_dict(cfg)) - frames = [] - scenario = sim.getScenario() - for veh in scenario.getVehicles(): - veh.expert_control = True - for i in range(90): - img = scenario.getImage( - img_width=1600, - img_height=1600, - draw_target_positions=False, - padding=50.0, - ) - frames.append(img) - sim.step(0.1) - - movie_frames = np.array(frames) - output_path = f'{os.path.basename(file)}.mp4' - imageio.mimwrite(output_path, movie_frames, fps=30) - print('>', output_path) - - -if __name__ == '__main__': - main() diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index d8306bef..00000000 --- a/setup.cfg +++ /dev/null @@ -1,63 +0,0 @@ -[metadata] -name = nocturne -version = 0.0.1 -description = A data-driven, fast driving simulator for multi-agent coordination under partial observability. -long_description = file: README.rst # todo -author = Nathan Lichtle, Eugene Vinitsky, and Xiaomeng Yang -author_email = nathan.lichtle@berkeley, ... # todo -url = https://github.com/facebookresearch/nocturne/ -license = MIT - -classifiers = # todo - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - Topic :: Software Development :: Libraries :: Python Modules - Topic :: Utilities - Programming Language :: C++ - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - License :: OSI Approved :: BSD License - Programming Language :: Python :: Implementation :: PyPy - Programming Language :: Python :: Implementation :: CPython - Programming Language :: C++ - Topic :: Software Development :: Libraries :: Python Modules - -keywords = - Driving - Simulation - Autonomous Vehicles - Waymo - Reinforcement Learning - -# todo -project_urls = - Documentation = https://nocturne.readthedocs.io/ - Bug Tracker = https://github.com/fb/nocturne/issues - Discussions = https://github.com/fb/nocturne/discussions - Changelog = https://nocturne.readthedocs.io/en/latest/changelog.html - Chat = https://gitter.im/nocturne/Lobby - -[options] -zip_safe = False -python_requires = >=3.8 - -[tool:pytest] -minversion = 6.0 -addopts = -ra -q -testpaths = - tests - -[flake8] -max-line-length = 120 -show_source = True -exclude = .git, __pycache__, build, docs, _deps, third_party, algos, nocturne/envs/__init__.py, examples/nocturne_functions.py - -[pydocstyle] -inherit = false -match = .*\.py -match_dir = ^(?!.git|__pycache__|build|docs|_deps|third_party|algos).* -convention = numpy