Source code for gymnasium.wrappers.vector.vectorize_reward
"""Vectorizes reward function to work with `VectorEnv`."""
from __future__ import annotations
from typing import Any, Callable
import numpy as np
from gymnasium import Env
from gymnasium.vector import VectorEnv, VectorRewardWrapper
from gymnasium.vector.vector_env import ArrayType
from gymnasium.wrappers import transform_reward
[docs]
class TransformReward(VectorRewardWrapper):
"""A reward wrapper that allows a custom function to modify the step reward.
Example with reward transformation:
>>> import gymnasium as gym
>>> from gymnasium.spaces import Box
>>> def scale_and_shift(rew):
... return (rew - 1.0) * 2.0
...
>>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
>>> envs = TransformReward(env=envs, func=scale_and_shift)
>>> _ = envs.action_space.seed(123)
>>> obs, info = envs.reset(seed=123)
>>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample())
>>> envs.close()
>>> obs
array([[-4.6343064e-01, 9.8971417e-05],
[-4.4488689e-01, -1.9375233e-03],
[-4.3118435e-01, -1.5342437e-03]], dtype=float32)
"""
def __init__(self, env: VectorEnv, func: Callable[[ArrayType], ArrayType]):
"""Initialize LambdaReward wrapper.
Args:
env (Env): The vector environment to wrap
func: (Callable): The function to apply to reward
"""
super().__init__(env)
self.func = func
def rewards(self, reward: ArrayType) -> ArrayType:
"""Apply function to reward."""
return self.func(reward)
[docs]
class VectorizeTransformReward(VectorRewardWrapper):
"""Vectorizes a single-agent transform reward wrapper for vector environments.
An example such that applies a ReLU to the reward:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import TransformReward
>>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
>>> envs = VectorizeTransformReward(envs, wrapper=TransformReward, func=lambda x: (x > 0.0) * x)
>>> _ = envs.action_space.seed(123)
>>> obs, info = envs.reset(seed=123)
>>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample())
>>> envs.close()
>>> rew
array([-0., -0., -0.])
"""
def __init__(
self,
env: VectorEnv,
wrapper: type[transform_reward.TransformReward],
**kwargs: Any,
):
"""Constructor for the vectorized lambda reward wrapper.
Args:
env: The vector environment to wrap.
wrapper: The wrapper to vectorize
**kwargs: Keyword argument for the wrapper
"""
super().__init__(env)
self.wrapper = wrapper(Env(), **kwargs)
def rewards(self, reward: ArrayType) -> ArrayType:
"""Iterates over the reward updating each with the wrapper func."""
for i, r in enumerate(reward):
reward[i] = self.wrapper.func(r)
return reward
[docs]
class ClipReward(VectorizeTransformReward):
"""A wrapper that clips the rewards for an environment between an upper and lower bound.
Example with clipped rewards:
>>> import numpy as np
>>> import gymnasium as gym
>>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
>>> envs = ClipReward(envs, 0.0, 2.0)
>>> _ = envs.action_space.seed(123)
>>> obs, info = envs.reset(seed=123)
>>> for _ in range(10):
... obs, rew, term, trunc, info = envs.step(0.5 * np.ones((3, 1)))
...
>>> envs.close()
>>> rew
array([0., 0., 0.])
"""
def __init__(
self,
env: VectorEnv,
min_reward: float | np.ndarray | None = None,
max_reward: float | np.ndarray | None = None,
):
"""Constructor for ClipReward wrapper.
Args:
env: The vector environment to wrap
min_reward: The min reward for each step
max_reward: the max reward for each step
"""
super().__init__(
env,
transform_reward.ClipReward,
min_reward=min_reward,
max_reward=max_reward,
)