Gym🏋️

pamiq_core.gym.GymEnvironment ¶

GymEnvironment(env: Env[O, A] | str, **gym_make_kwds: Any)

Bases: Environment[GymObs[O], GymAction[A]]

Wrapper for Gymnasium environments to work with PAMIQ Core.

This class adapts Gymnasium environments to the PAMIQ Core Environment interface, handling observation and action conversions between the two frameworks.

Example

# Create from environment ID
env = GymEnvironment("CartPole-v1")

# Or use existing Gym environment
gym_env = gym.make("CartPole-v1")
env = GymEnvironment(gym_env)

Initialize the GymEnvironment wrapper.

PARAMETER	DESCRIPTION
`env`	Either a Gymnasium environment instance or a string ID to create one TYPE: `Env[O, A] \| str`
`**gym_make_kwds`	Additional keyword arguments passed to gym.make() if env is a string TYPE: `Any` DEFAULT: `{}`

Source code in src/pamiq_core/gym/env.py

def __init__(self, env: gym.Env[O, A] | str, **gym_make_kwds: Any) -> None:
    """Initialize the GymEnvironment wrapper.

    Args:
        env: Either a Gymnasium environment instance or a string ID to create one
        **gym_make_kwds: Additional keyword arguments passed to gym.make() if env is a string
    """
    super().__init__()
    if isinstance(env, str):
        # Create environment from registered ID
        self.env: gym.Env[O, A] = gym.make(env, **gym_make_kwds)  # pyright: ignore[reportUnknownMemberType, ]
    else:
        # Use provided environment instance
        self.env = env

setup ¶

setup() -> None

Set up the environment by resetting it to initial state.

This method is called during environment initialization and stores the initial observation from the reset.

Source code in src/pamiq_core/gym/env.py

@override
def setup(self) -> None:
    """Set up the environment by resetting it to initial state.

    This method is called during environment initialization and
    stores the initial observation from the reset.
    """
    super().setup()
    # Reset environment and wrap observation in EnvReset type
    self._obs = EnvReset(*self.env.reset())

observe ¶

observe() -> GymObs[O]

Get the current observation from the environment.

RETURNS	DESCRIPTION
`GymObs[O]`	The current observation, which can be:
`GymObs[O]`	EnvReset: After environment reset
`GymObs[O]`	EnvStep: After a step
`GymObs[O]`	tuple[EnvStep, EnvReset]: When episode ends and new one begins

Source code in src/pamiq_core/gym/env.py

@override
def observe(self) -> GymObs[O]:
    """Get the current observation from the environment.

    Returns:
        The current observation, which can be:
        - EnvReset: After environment reset
        - EnvStep: After a step
        - tuple[EnvStep, EnvReset]: When episode ends and new one begins
    """
    return self._obs

affect ¶

affect(action: GymAction[A]) -> None

Apply an action to the environment and update the observation.

PARAMETER	DESCRIPTION
`action`	The action to apply, containing the actual action value and a flag indicating if reset is needed TYPE: `GymAction[A]`

The observation is updated based on the step result and whether the episode has ended or a reset was requested.

Source code in src/pamiq_core/gym/env.py

@override
def affect(self, action: GymAction[A]) -> None:
    """Apply an action to the environment and update the observation.

    Args:
        action: The action to apply, containing the actual action value
               and a flag indicating if reset is needed

    The observation is updated based on the step result and whether
    the episode has ended or a reset was requested.
    """
    # Execute action in the environment
    out = self.env.step(action.action)
    obs = EnvStep(out[0], float(out[1]), out[2], out[3], out[4])
    # Check if episode ended or agent requested reset
    if obs.done or action.need_reset:
        # Package both the final step and the reset observation
        obs = (obs, EnvReset(*self.env.reset()))
    self._obs = obs

del ¶

__del__() -> None

Clean up resources by closing the Gymnasium environment.

Source code in src/pamiq_core/gym/env.py

def __del__(self) -> None:
    """Clean up resources by closing the Gymnasium environment."""
    if hasattr(self, "env"):
        self.env.close()

pamiq_core.gym.GymAgent ¶

GymAgent(agents: Mapping[str, Agent[Any, Any]] | None = None)

Bases: Agent[GymObs[O], GymAction[A]]

Base class for agents that interact with Gymnasium environments.

This abstract class provides the interface for agents to handle Gymnasium environment observations and produce actions.

Set self.need_reset=True to reset the environment after current step.

Example

class MyCartPoleAgent(GymAgent[np.ndarray, int]):
    def on_reset(self, obs, info):
        # Return initial action
        return 0

    def on_step(self, obs, reward, truncated, terminated, info):
        # Simple policy: move right if pole is tilting right
        return 1 if obs[2] > 0 else 0

Source code in src/pamiq_core/interaction/agent.py

def __init__(self, agents: Mapping[str, Agent[Any, Any]] | None = None) -> None:
    """Initialize the agent.

    Args:
        agents: Optional mapping of names to child agents. Child agents will inherit
            inference models and data collectors from the parent, and their states
            will be saved and loaded together with the parent.
    """
    self._agents: Mapping[str, Agent[Any, Any]] = {}
    if agents is not None:
        self._agents.update(agents)

setup ¶

setup() -> None

Initialize the agent and reset the need_reset flag.

Source code in src/pamiq_core/gym/agent.py

@override
def setup(self) -> None:
    """Initialize the agent and reset the need_reset flag."""
    super().setup()
    self.need_reset = False

on_reset `abstractmethod` ¶

on_reset(obs: O, info: dict[str, Any]) -> A

Handle environment reset and return initial action.

PARAMETER	DESCRIPTION
`obs`	Initial observation from the environment TYPE: `O`
`info`	Additional information from the environment TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`A`	The initial action to take

Source code in src/pamiq_core/gym/agent.py

@abstractmethod
def on_reset(self, obs: O, info: dict[str, Any]) -> A:
    """Handle environment reset and return initial action.

    Args:
        obs: Initial observation from the environment
        info: Additional information from the environment

    Returns:
        The initial action to take
    """
    pass

on_step `abstractmethod` ¶

on_step(
    obs: O, reward: float, terminated: bool, truncated: bool, info: dict[str, Any]
) -> A

Process a step observation and return next action.

PARAMETER	DESCRIPTION
`obs`	Current observation from the environment TYPE: `O`
`reward`	Reward received from the previous action TYPE: `float`
`truncated`	Whether the episode was truncated TYPE: `bool`
`truncated`	Whether the episode was truncated before completion TYPE: `bool`
`terminated`	Whether the episode terminated successfully TYPE: `bool`
`info`	Additional information from the environment TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`A`	The next action to take

Source code in src/pamiq_core/gym/agent.py

@abstractmethod
def on_step(
    self,
    obs: O,
    reward: float,
    terminated: bool,
    truncated: bool,
    info: dict[str, Any],
) -> A:
    """Process a step observation and return next action.

    Args:
        obs: Current observation from the environment
        reward: Reward received from the previous action
        truncated: Whether the episode was truncated
        truncated: Whether the episode was truncated before completion
        terminated: Whether the episode terminated successfully
        info: Additional information from the environment

    Returns:
        The next action to take
    """
    pass

step ¶

step(observation: GymObs[O]) -> GymAction[A]

Process observation and return action wrapped with reset flag.

Handles different observation types: - EnvReset: Initial observation after reset - EnvStep: Regular step observation - tuple: Combined step and reset (episode end)

PARAMETER	DESCRIPTION
`observation`	Current observation from the environment TYPE: `GymObs[O]`

RETURNS	DESCRIPTION
`GymAction[A]`	Action wrapped with need_reset flag

Source code in src/pamiq_core/gym/agent.py

@override
def step(self, observation: GymObs[O]) -> GymAction[A]:
    """Process observation and return action wrapped with reset flag.

    Handles different observation types:
    - EnvReset: Initial observation after reset
    - EnvStep: Regular step observation
    - tuple: Combined step and reset (episode end)

    Args:
        observation: Current observation from the environment

    Returns:
        Action wrapped with need_reset flag
    """
    match observation:
        case EnvReset():
            action = self._on_reset(**asdict(observation))
        case EnvStep():
            action = self.on_step(**asdict(observation))
        case tuple():
            # Process final step then reset
            self.on_step(**asdict(observation[0]))
            action = self._on_reset(**asdict(observation[1]))
    return GymAction(action, self.need_reset)

Gym🏋️

pamiq_core.gym.GymEnvironment ¶

setup ¶

observe ¶

affect ¶

__del__ ¶

pamiq_core.gym.GymAgent ¶

setup ¶

on_reset abstractmethod ¶

on_step abstractmethod ¶

step ¶

del ¶

on_reset `abstractmethod` ¶

on_step `abstractmethod` ¶