"""Wrapper that converts the info format for vec envs into the list format."""
from __future__ import annotations
from typing import Any
import numpy as np
from gymnasium.core import ActType, ObsType
from gymnasium.vector.vector_env import ArrayType, VectorEnv, VectorWrapper
__all__ = ["DictInfoToList"]
[docs]
class DictInfoToList(VectorWrapper):
"""Converts infos of vectorized environments from ``dict`` to ``List[dict]``.
This wrapper converts the info format of a
vector environment from a dictionary to a list of dictionaries.
This wrapper is intended to be used around vectorized
environments. If using other wrappers that perform
operation on info like `RecordEpisodeStatistics` this
need to be the outermost wrapper.
i.e. ``DictInfoToList(RecordEpisodeStatistics(vector_env))``
Example:
>>> import numpy as np
>>> dict_info = {
... "k": np.array([0., 0., 0.5, 0.3]),
... "_k": np.array([False, False, True, True])
... }
...
>>> list_info = [{}, {}, {"k": 0.5}, {"k": 0.3}]
Example for vector environments:
>>> import numpy as np
>>> import gymnasium as gym
>>> envs = gym.make_vec("CartPole-v1", num_envs=3)
>>> obs, info = envs.reset(seed=123)
>>> info
{}
>>> envs = DictInfoToList(envs)
>>> obs, info = envs.reset(seed=123)
>>> info
[{}, {}, {}]
Another example for vector environments:
>>> import numpy as np
>>> import gymnasium as gym
>>> envs = gym.make_vec("HalfCheetah-v4", num_envs=2)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
>>> envs = DictInfoToList(envs)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
[{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
Change logs:
* v0.24.0 - Initially added as ``VectorListInfo``
* v1.0.0 - Renamed to ``DictInfoToList``
"""
def __init__(self, env: VectorEnv):
"""This wrapper will convert the info into the list format.
Args:
env (Env): The environment to apply the wrapper
"""
super().__init__(env)
def step(
self, actions: ActType
) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, list[dict[str, Any]]]:
"""Steps through the environment, convert dict info to list."""
observation, reward, terminated, truncated, infos = self.env.step(actions)
assert isinstance(infos, dict)
list_info = self._convert_info_to_list(infos)
return observation, reward, terminated, truncated, list_info
def reset(
self,
*,
seed: int | list[int] | None = None,
options: dict[str, Any] | None = None,
) -> tuple[ObsType, list[dict[str, Any]]]:
"""Resets the environment using kwargs."""
obs, infos = self.env.reset(seed=seed, options=options)
assert isinstance(infos, dict)
list_info = self._convert_info_to_list(infos)
return obs, list_info
def _convert_info_to_list(self, vector_infos: dict) -> list[dict[str, Any]]:
"""Convert the dict info to list.
Convert the dict info of the vectorized environment
into a list of dictionaries where the i-th dictionary
has the info of the i-th environment.
Args:
vector_infos (dict): info dict coming from the env.
Returns:
list_info (list): converted info.
"""
list_info = [{} for _ in range(self.num_envs)]
for key, value in vector_infos.items():
if key.startswith("_"):
continue
if isinstance(value, dict):
value_list_info = self._convert_info_to_list(value)
for env_num, (env_info, has_info) in enumerate(
zip(value_list_info, vector_infos[f"_{key}"])
):
if has_info:
list_info[env_num][key] = env_info
else:
assert isinstance(value, np.ndarray)
for env_num, has_info in enumerate(vector_infos[f"_{key}"]):
if has_info:
list_info[env_num][key] = value[env_num]
return list_info