Skip to content

Gym🏋️

pamiq_core.gym.GymEnvironment

GymEnvironment(env: Env[O, A] | str, **gym_make_kwds: Any)

Bases: Environment[GymObs[O], GymAction[A]]

Wrapper for Gymnasium environments to work with PAMIQ Core.

This class adapts Gymnasium environments to the PAMIQ Core Environment interface, handling observation and action conversions between the two frameworks.

Example
# Create from environment ID
env = GymEnvironment("CartPole-v1")

# Or use existing Gym environment
gym_env = gym.make("CartPole-v1")
env = GymEnvironment(gym_env)

Initialize the GymEnvironment wrapper.

PARAMETER DESCRIPTION
env

Either a Gymnasium environment instance or a string ID to create one

TYPE: Env[O, A] | str

**gym_make_kwds

Additional keyword arguments passed to gym.make() if env is a string

TYPE: Any DEFAULT: {}

Source code in src/pamiq_core/gym/env.py
def __init__(self, env: gym.Env[O, A] | str, **gym_make_kwds: Any) -> None:
    """Initialize the GymEnvironment wrapper.

    Args:
        env: Either a Gymnasium environment instance or a string ID to create one
        **gym_make_kwds: Additional keyword arguments passed to gym.make() if env is a string
    """
    super().__init__()
    if isinstance(env, str):
        # Create environment from registered ID
        self.env: gym.Env[O, A] = gym.make(env, **gym_make_kwds)  # pyright: ignore[reportUnknownMemberType, ]
    else:
        # Use provided environment instance
        self.env = env

setup

setup() -> None

Set up the environment by resetting it to initial state.

This method is called during environment initialization and stores the initial observation from the reset.

Source code in src/pamiq_core/gym/env.py
@override
def setup(self) -> None:
    """Set up the environment by resetting it to initial state.

    This method is called during environment initialization and
    stores the initial observation from the reset.
    """
    super().setup()
    # Reset environment and wrap observation in EnvReset type
    self._obs = EnvReset(*self.env.reset())

observe

observe() -> GymObs[O]

Get the current observation from the environment.

RETURNS DESCRIPTION
GymObs[O]

The current observation, which can be:

GymObs[O]
  • EnvReset: After environment reset
GymObs[O]
  • EnvStep: After a step
GymObs[O]
  • tuple[EnvStep, EnvReset]: When episode ends and new one begins
Source code in src/pamiq_core/gym/env.py
@override
def observe(self) -> GymObs[O]:
    """Get the current observation from the environment.

    Returns:
        The current observation, which can be:
        - EnvReset: After environment reset
        - EnvStep: After a step
        - tuple[EnvStep, EnvReset]: When episode ends and new one begins
    """
    return self._obs

affect

affect(action: GymAction[A]) -> None

Apply an action to the environment and update the observation.

PARAMETER DESCRIPTION
action

The action to apply, containing the actual action value and a flag indicating if reset is needed

TYPE: GymAction[A]

The observation is updated based on the step result and whether the episode has ended or a reset was requested.

Source code in src/pamiq_core/gym/env.py
@override
def affect(self, action: GymAction[A]) -> None:
    """Apply an action to the environment and update the observation.

    Args:
        action: The action to apply, containing the actual action value
               and a flag indicating if reset is needed

    The observation is updated based on the step result and whether
    the episode has ended or a reset was requested.
    """
    # Execute action in the environment
    out = self.env.step(action.action)
    obs = EnvStep(out[0], float(out[1]), out[2], out[3], out[4])
    # Check if episode ended or agent requested reset
    if obs.done or action.need_reset:
        # Package both the final step and the reset observation
        obs = (obs, EnvReset(*self.env.reset()))
    self._obs = obs

__del__

__del__() -> None

Clean up resources by closing the Gymnasium environment.

Source code in src/pamiq_core/gym/env.py
def __del__(self) -> None:
    """Clean up resources by closing the Gymnasium environment."""
    if hasattr(self, "env"):
        self.env.close()

pamiq_core.gym.GymAgent

GymAgent(agents: Mapping[str, Agent[Any, Any]] | None = None)

Bases: Agent[GymObs[O], GymAction[A]]

Base class for agents that interact with Gymnasium environments.

This abstract class provides the interface for agents to handle Gymnasium environment observations and produce actions.

Set self.need_reset=True to reset the environment after current step.

Example
class MyCartPoleAgent(GymAgent[np.ndarray, int]):
    def on_reset(self, obs, info):
        # Return initial action
        return 0

    def on_step(self, obs, reward, truncated, terminated, info):
        # Simple policy: move right if pole is tilting right
        return 1 if obs[2] > 0 else 0
Source code in src/pamiq_core/interaction/agent.py
def __init__(self, agents: Mapping[str, Agent[Any, Any]] | None = None) -> None:
    """Initialize the agent.

    Args:
        agents: Optional mapping of names to child agents. Child agents will inherit
            inference models and data collectors from the parent, and their states
            will be saved and loaded together with the parent.
    """
    self._agents: Mapping[str, Agent[Any, Any]] = {}
    if agents is not None:
        self._agents.update(agents)

setup

setup() -> None

Initialize the agent and reset the need_reset flag.

Source code in src/pamiq_core/gym/agent.py
@override
def setup(self) -> None:
    """Initialize the agent and reset the need_reset flag."""
    super().setup()
    self.need_reset = False

on_reset abstractmethod

on_reset(obs: O, info: dict[str, Any]) -> A

Handle environment reset and return initial action.

PARAMETER DESCRIPTION
obs

Initial observation from the environment

TYPE: O

info

Additional information from the environment

TYPE: dict[str, Any]

RETURNS DESCRIPTION
A

The initial action to take

Source code in src/pamiq_core/gym/agent.py
@abstractmethod
def on_reset(self, obs: O, info: dict[str, Any]) -> A:
    """Handle environment reset and return initial action.

    Args:
        obs: Initial observation from the environment
        info: Additional information from the environment

    Returns:
        The initial action to take
    """
    pass

on_step abstractmethod

on_step(
    obs: O, reward: float, terminated: bool, truncated: bool, info: dict[str, Any]
) -> A

Process a step observation and return next action.

PARAMETER DESCRIPTION
obs

Current observation from the environment

TYPE: O

reward

Reward received from the previous action

TYPE: float

truncated

Whether the episode was truncated

TYPE: bool

truncated

Whether the episode was truncated before completion

TYPE: bool

terminated

Whether the episode terminated successfully

TYPE: bool

info

Additional information from the environment

TYPE: dict[str, Any]

RETURNS DESCRIPTION
A

The next action to take

Source code in src/pamiq_core/gym/agent.py
@abstractmethod
def on_step(
    self,
    obs: O,
    reward: float,
    terminated: bool,
    truncated: bool,
    info: dict[str, Any],
) -> A:
    """Process a step observation and return next action.

    Args:
        obs: Current observation from the environment
        reward: Reward received from the previous action
        truncated: Whether the episode was truncated
        truncated: Whether the episode was truncated before completion
        terminated: Whether the episode terminated successfully
        info: Additional information from the environment

    Returns:
        The next action to take
    """
    pass

step

step(observation: GymObs[O]) -> GymAction[A]

Process observation and return action wrapped with reset flag.

Handles different observation types: - EnvReset: Initial observation after reset - EnvStep: Regular step observation - tuple: Combined step and reset (episode end)

PARAMETER DESCRIPTION
observation

Current observation from the environment

TYPE: GymObs[O]

RETURNS DESCRIPTION
GymAction[A]

Action wrapped with need_reset flag

Source code in src/pamiq_core/gym/agent.py
@override
def step(self, observation: GymObs[O]) -> GymAction[A]:
    """Process observation and return action wrapped with reset flag.

    Handles different observation types:
    - EnvReset: Initial observation after reset
    - EnvStep: Regular step observation
    - tuple: Combined step and reset (episode end)

    Args:
        observation: Current observation from the environment

    Returns:
        Action wrapped with need_reset flag
    """
    match observation:
        case EnvReset():
            action = self._on_reset(**asdict(observation))
        case EnvStep():
            action = self.on_step(**asdict(observation))
        case tuple():
            # Process final step then reset
            self.on_step(**asdict(observation[0]))
            action = self._on_reset(**asdict(observation[1]))
    return GymAction(action, self.need_reset)