Module easyagents.core

This module contains the core datastructures shared between fronten and backend like the definition of all callbacks and agent configurations.

View Source
"""This module contains the core datastructures shared between fronten and backend like the definition

    of all callbacks and agent configurations.

"""

from abc import ABC

from typing import Callable, Optional, Dict, Tuple, List, Union

from enum import Flag, auto

import math

import easyagents.env

import easyagents.backends.monitor

import gym.core

import matplotlib.pyplot as plt

class GymContext(object):

    """Contains the context for gym api calls (wrapping a gym env instance)."""

    def __init__(self):

        self._monitor_env: Optional[easyagents.backends.monitor._MonitorEnv] = None

        self._totals = None

    def __str__(self):

        return f'MonitorEnv={self._monitor_env} Totals={self._totals}'

    @property

    def gym_env(self) -> Optional[gym.core.Env]:

        result = None

        if self._monitor_env:

            result = self._monitor_env.env

        return result

class PlotType(Flag):

    """Defines the point in time when a plot is created / updated.

    NONE: No plot is updated.

    PLAY_EPISODE: Called after the last step of each played episode. The gym environment is still

        accessible through agent_context.play-gym_env.

    PLAY_STEP: Called after each play step. The gym environment is still

        accessible through agent_context.play-gym_env.

    TRAIN_EVAL: Called after the last step of the last evaluation episode during training.

        The gym environment is accessible through agent_context.play.gym_env.

    TRAIN_ITERATION: Called after each train iteration. No gym environment is available.

    """

    NONE = 0

    PLAY_EPISODE = auto()

    PLAY_STEP = auto()

    TRAIN_EVAL = auto()

    TRAIN_ITERATION = auto()

class PyPlotContext(object):

    """Contain the context for the maplotlib.pyplot figure plotting.

    Attributes

        figure: the figure to plot to

        figsize: figure (width,height) in inches for the figure to be created.

        is_jupyter_active: True if we plot to jupyter notebook cell, False otherwise.

        max_columns: the max number of subplot columns in the pyplot figure

    """

    def __init__(self):

        self._created_subplots = PlotType.NONE

        self.figure: Optional[plt.Figure] = None

        self.figsize: (float, float) = (17, 6)

        self._call_jupyter_display = False

        self.is_jupyter_active = False

        self.max_columns = 3

    def __str__(self):

        figure_number = None

        figure_axes_len = 0

        if self.figure:

            figure_number = self.figure.number

            if self.figure.axes:

                figure_axes_len = len(self.figure.axes)

        return f'is_jupyter_active={self.is_jupyter_active} max_columns={self.max_columns} ' + \

               f'_created_subplots={self._created_subplots} figure={figure_number} axes={figure_axes_len} '

    def _is_subplot_created(self, plot_type: PlotType):

        """Yields true if a subplot of type plot_type was created by a plot callback."""

        result = ((self._created_subplots & plot_type) != PlotType.NONE)

        return result

class ModelConfig(object):

    """The model configurations, containing the name of the gym environment and the neural network architecture.

        Attributes:

            original_env_name: the name of the underlying gym environment, eg 'CartPole-v0'

            gym_env_name: the name of the actual gym environment used (a wrapper around the environment given

                by original_env_name)

            fc_layers: int tuple defining the number and size of each fully connected layer.

            seed: the seed to be used for example for the gym_env or None for no seed

    """

    _KEY_SEED = 'seed'

    _KEY_GYM_ENV = 'gym_env'

    _KEY_FC_LAYERS = 'fc_layers'

    def __init__(self,

                 gym_env_name: str,

                 fc_layers: Union[Tuple[int, ...], int, None] = None,

                 seed: Optional[int] = None):

        """

            Args:

                gym_env_name: the name of the registered gym environment to use, eg 'CartPole-v0'

                fc_layers: int tuple defining the number and size of each fully connected layer.

        """

        if fc_layers is None:

            fc_layers = (100, 100)

        if isinstance(fc_layers, int):

            fc_layers = (fc_layers,)

        assert isinstance(gym_env_name, str), "passed gym_env_name not a string."

        assert gym_env_name != "", "gym environment name is empty."

        assert easyagents.env._is_registered_with_gym(gym_env_name), \

            f'"{gym_env_name}" is not the name of an environment registered with OpenAI gym.' + \

            'Consider using easyagents.env.register_with_gym to register your environment.'

        assert fc_layers is not None, "fc_layers not set"

        assert isinstance(fc_layers, tuple), "fc_layers not a tuple"

        assert fc_layers, "fc_layers must contain at least 1 int"

        for i in fc_layers:

            assert isinstance(i, int) and i >= 1, f'{i} is not a valid size for a hidden layer'

        self.original_env_name = gym_env_name

        self.gym_env_name = None

        self.fc_layers = fc_layers

        self.seed = seed

    def __str__(self):

        return f'fc_layers={self.fc_layers} seed={self.seed} gym_env_name={self.gym_env_name}'

    @staticmethod

    def _from_dict(from_dict: Dict[str, object]):

        """Creates a new instance of ModelConfig based on the parameters contained in dict

        Returns:

            new instance of ModelConfig configured by dict

        """

        assert from_dict

        # noinspection PyTypeChecker

        fc_layers = tuple(from_dict[ModelConfig._KEY_FC_LAYERS])

        # noinspection PyTypeChecker

        result = ModelConfig(gym_env_name=str(from_dict[ModelConfig._KEY_GYM_ENV]), fc_layers=fc_layers,

                             seed=from_dict[ModelConfig._KEY_SEED])

        return result

    def _to_dict(self) -> Dict[str, object]:

        """saves this model configuration to a dict. The model_config can be recreated by a call to _from_dict

        Retunns:

            dict containing all parameters of this model_config (this does not include any policy)

        """

        result: Dict[str, object] = dict()

        result[ModelConfig._KEY_SEED] = self.seed

        result[ModelConfig._KEY_GYM_ENV] = self.original_env_name

        result[ModelConfig._KEY_FC_LAYERS] = self.fc_layers

        return result

class TrainContext(object):

    """Contains the configuration of an agents train method like the number of iterations or the learning rate

        along with data gathered sofar during the training which is identical for all implementations.

        Hints:

        o TrainContext contains all the parameters needed to control the train loop.

        o Subclasses of TrainContext may contain additional Agent (but not backend) specific parameters.

        Attributes:

            num_iterations: number of times the training is repeated (with additional data), unlimited if None

            max_steps_per_episode: maximum number of steps per episode

            learning_rate: the learning rate used in the next iteration's policy training (0,1]

            reward_discount_gamma: the factor by which a reward is discounted for each step (0,1]

            max_steps_in_buffer: size of the agents buffer in steps

            training_done: if true the train loop is terminated at the end of the current iteration

            iterations_done_in_training: the number of iterations completed so far (during training)

            episodes_done_in_iteration: the number of episodes completed in the current iteration

            episodes_done_in_training: the number of episodes completed over all iterations so far.

                The episodes played for evaluation are not included in this count.

            steps_done_in_training: the number of steps taken over all iterations so far

            steps_done_in_iteration: the number of steps taken in the current iteration

            num_iterations_between_eval: number of training iterations before the current policy is evaluated.

            num_episodes_per_eval: number of episodes played to estimate the average return and steps

            eval_rewards: dict containg the rewards statistics for each policy evaluation.

                Each entry contains the tuple (min, average, max) over the sum of rewards over all episodes

                played for the current evaluation. The dict is indexed by the current_episode.

            eval_steps: dict containg the steps statistics for each policy evaluation.

                Each entry contains the tuple (min, average, max) over the number of step over all episodes

                played for the current evaluation. The dict is indexed by the current_episode.

            loss: dict containing the loss for each iteration training. The dict is indexed by the current_episode.

    """

    def __init__(self):

        self.num_iterations: Optional[int] = None

        self.max_steps_per_episode: Optional = 1000

        self.num_iterations_between_eval: int = 10

        self.num_episodes_per_eval: int = 10

        self.learning_rate: float = 0.001

        self.reward_discount_gamma: float = 1.0

        self.max_steps_in_buffer: int = 100000

        self.training_done: bool

        self.iterations_done_in_training: int

        self.episodes_done_in_iteration: int

        self.episodes_done_in_training: int

        self.steps_done_in_training: int

        self.steps_done_in_iteration = 0

        self.loss: Dict[int, float]

        self.eval_rewards: Dict[int, Tuple[float, float, float]]

        self.eval_steps: Dict[int, Tuple[float, float, float]]

        self._reset()

    def __str__(self):

        return f'training_done={self.training_done} ' + \

               f'#iterations_done_in_training={self.iterations_done_in_training} ' + \

               f'#episodes_done_in_iteration={self.episodes_done_in_iteration} ' + \

               f'#steps_done_in_iteration={self.steps_done_in_iteration} ' + \

               f'#iterations={self.num_iterations} ' + \

               f'#max_steps_per_episode={self.max_steps_per_episode} ' + \

               f'#iterations_between_eval={self.num_iterations_between_eval} ' + \

               f'#episodes_per_eval={self.num_episodes_per_eval} ' + \

               f'#learning_rate={self.learning_rate} ' + \

               f'#reward_discount_gamma={self.reward_discount_gamma} ' + \

               f'#max_steps_in_buffer={self.max_steps_in_buffer} '

    def _reset(self):

        """Clears all values modified during a train() call."""

        self.training_done = False

        self.iterations_done_in_training = 0

        self.episodes_done_in_iteration = 0

        self.episodes_done_in_training = 0

        self.steps_done_in_training = 0

        self.steps_done_in_iteration = 0

        self.loss = dict()

        self.eval_rewards = dict()

        self.eval_steps = dict()

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        assert self.num_iterations is None or self.num_iterations > 0, "num_iterations not admissible"

        assert self.max_steps_per_episode > 0, "max_steps_per_episode not admissible"

        assert self.num_iterations_between_eval > 0, "num_iterations_between_eval not admissible"

        assert self.num_episodes_per_eval > 0, "num_episodes_per_eval not admissible"

        assert 0 < self.learning_rate <= 1, "learning_rate not in interval (0,1]"

        assert 0 < self.reward_discount_gamma <= 1, "reward_discount_gamma not in interval (0,1]"

    @property

    def num_iterations_between_plot(self):

        """number of iterations between 2 plot updates during training.

        Returns:

            number of iterations or 0 if no plot updates should take place.

            """

        result = 0

        if self.num_iterations_between_eval:

            result = math.ceil(self.num_iterations_between_eval / 3)

        return result

class EpisodesTrainContext(TrainContext):

    """Base class for all agent which evaluate a number of episodes during each iteration:

        The train loop proceeds roughly as follows:

            for i in num_iterations

                for e in num_episodes_per_iterations

                    play episode and record steps

                train policy for num_epochs_per_iteration epochs

                if current_episode % num_iterations_between_eval == 0:

                    evaluate policy

                if training_done

                    break

        Attributes:

            num_episodes_per_iteration: number of episodes played per training iteration

            num_epochs_per_iteration: number of times the data collected for the current iteration

                is used to retrain the current policy

    """

    def __init__(self):

        self.num_episodes_per_iteration: int = 10

        self.num_epochs_per_iteration: int = 10

        super().__init__()

    def __str__(self):

        return super().__str__() + \

               f'#episodes_per_iteration={self.num_episodes_per_iteration} ' + \

               f'#epochs_per_iteration={self.num_epochs_per_iteration} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert self.num_episodes_per_iteration > 0, "num_episodes_per_iteration not admissible"

        assert self.num_epochs_per_iteration > 0, "num_epochs_per_iteration not admissible"

class CemTrainContext(EpisodesTrainContext):

    """Holds the configuration and current training state for Cross-Entropy-Methode agents.

        Attributes:

            elite_set_fraction: fraction of the elite policy set.

            num_steps_buffer_preload: number of steps performed to initially load the policy buffer

    """

    def __init__(self):

        super().__init__()

        self.num_iterations = 100

        self.num_episodes_per_iteration: int = 50

        self.elite_set_fraction: float = 0.1

        self.num_steps_buffer_preload: int = 2000

    def __str__(self):

        return super().__str__() + f'#elite_set_fraction={self.elite_set_fraction} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert 1 >= self.elite_set_fraction > 0, "elite_set_fraction must be in interval (0,1]"

class PpoTrainContext(EpisodesTrainContext):

    """TrainContext for Actor-Critic type agents like Ppo or Sac.

    Attributes:

        actor_loss: loss observed during training of the actor network. dict is indexed by the current_episode.

        critic_loss: loss observed during training of the critic network. dict is indexed by the current_episode.

    """

    def __init__(self):

        super().__init__()

        self.actor_loss: Dict[int, float]

        self.critic_loss: Dict[int, float]

    def _reset(self):

        self.actor_loss = dict()

        self.critic_loss = dict()

        super()._reset()

class StepsTrainContext(TrainContext):

    """Base class for all agent which evaluate a number of steps during each iteration:

        The train loop proceeds roughly as follows:

            for i in num_iterations

                for s in num_steps_per_iterations

                    play episodes and record steps

                train policy for num_epochs_per_iteration epochs

                if current_episode % num_iterations_between_eval == 0:

                    evaluate policy

                if training_done

                    break

        Attributes:

            num_steps_per_iteration: number of steps played for each iteration

            num_steps_buffer_preload: number of initial collect steps to preload the buffer

            num_steps_sampled_from_buffer: the number of steps sampled from buffer for each iteration training

    """

    def __init__(self):

        super().__init__()

        self.num_iterations = 20000

        self.num_iterations_between_eval = 1000

        self.num_steps_per_iteration: int = 1

        self.num_steps_buffer_preload: int = 1000

        self.num_steps_sampled_from_buffer: int = 64

        self.max_steps_in_buffer = 100000

    def __str__(self):

        return super().__str__() + \

               f'#steps_per_iteration={self.num_steps_per_iteration} ' + \

               f'#steps_buffer_preload={self.num_steps_buffer_preload} ' + \

               f'#steps_sampled_from_buffer={self.num_steps_sampled_from_buffer} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert self.num_steps_per_iteration > 0, "num_steps_per_iteration not admissible"

class PlayContext(object):

    """Contains the current configuration of an agents play method like the number of episodes to play

        and the max number of steps per episode.

        The EasyAgent.play() method proceeds (roughly) as follow:

        for e in num_episodes

            play (while steps_done_in_episode < max_steps_per_episode)

            if playing_done

                break

        Attributes:

            num_episodes: number of episodes to play, unlimited if None

            max_steps_per_episode: maximum number of steps per episode, unlimited if None

            play_done: if true the play loop is terminated at the end of the current episode

            episodes_done: the number of episodes played (including the current episode).

            steps_done_in_episode: the number of steps taken in the current episode.

            steps_done: the number of steps played (over all episodes so far)

            actions: dict containing for each episode the actions taken in each step

            rewards: dict containing for each episode the rewards received in each step

            sum_of_rewards: dict containing for each episode the sum of rewards over all steps

            gym_env: the gym environment used to play

    """

    def __init__(self, train_context: Optional[TrainContext] = None):

        """

        Args:

             train_context: if set num_episodes, max_steps_per_episode and seed are set from train_context

        """

        self.num_episodes: Optional[int] = None

        self.max_steps_per_episode: Optional[int] = None

        if train_context is not None:

            self.num_episodes = train_context.num_episodes_per_eval

            self.max_steps_per_episode = train_context.max_steps_per_episode

        self.play_done: bool

        self.episodes_done: int

        self.steps_done_in_episode: int

        self.steps_done: int

        self.actions: Dict[int, List[object]]

        self.rewards: Dict[int, List[float]]

        self.sum_of_rewards: Dict[int, float]

        self.gym_env: Optional[gym.core.Env]

        self._reset()

    def __str__(self):

        return f'#episodes={self.num_episodes} ' + \

               f'max_steps_per_episode={self.max_steps_per_episode} ' + \

               f'play_done={self.play_done} ' + \

               f'episodes_done={self.episodes_done} ' + \

               f'steps_done_in_episode={self.steps_done_in_episode} ' + \

               f'steps_done={self.steps_done} '

    def _reset(self):

        """Clears all values modified during a train() call."""

        self.play_done: bool = False

        self.episodes_done: int = 0

        self.steps_done_in_episode: int = 0

        self.steps_done: int = 0

        self.actions: Dict[int, List[object]] = dict()

        self.rewards: Dict[int, List[float]] = dict()

        self.sum_of_rewards: Dict[int, float] = dict()

        self.gym_env: Optional[gym.core.Env] = None

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        assert (self.num_episodes is None) or (self.num_episodes > 0), "num_episodes not admissible"

        assert (self.max_steps_per_episode is None) or self.max_steps_per_episode > 0, \

            "max_steps_per_episode not admissible"

class AgentContext(object):

    """Collection of state and configuration settings for a EasyAgent instance.

    Attributes:

        model: model configuration including the name of the underlying gym_environment and the

            policy's neural network archtitecture.

        train: training configuration and current train state. None if not inside a train call.

        play: play / eval configuration and current state. None if not inside a play call (directly or

            due to a evaluation inside a train loop)

        gym: context for gym environment related calls.

        pyplot: the context containing the matplotlib.pyplot figure to plot to during training or playing

    """

    def __init__(self, model: ModelConfig):

        """

        Args:

            model: model configuration including the name of the underlying gym_environment and the

                policy's neural network archtitecture.

        """

        assert isinstance(model, ModelConfig), "model not set"

        self.model: ModelConfig = model

        self.train: Optional[TrainContext] = None

        self.play: Optional[PlayContext] = None

        self.gym: GymContext = GymContext()

        self.pyplot: PyPlotContext = PyPlotContext()

        self._is_policy_trained = False

        self._agent_saver: Optional[

            Callable[[Optional[str], Union[List[AgentCallback], AgentCallback, None]], str]] = None

    def __str__(self):

        result = f'agent_context:'

        result += f'\napi   =[{self.gym}]'

        if self.train is not None:

            result += f'\ntrain =[{self.train}] '

        if self.play is not None:

            result += f'\nplay  =[{self.play}] '

        if self.pyplot is not None:

            result += f'\npyplot=[{self.pyplot}] '

        result += f'\nmodel =[{self.model}] '

        return result

    @property

    def is_eval(self) -> bool:

        """Yields true if a policy evaluation inside an agent.train(...) call is in progress."""

        return (self.play is not None) and (self.train is not None)

    @property

    def is_play(self) -> bool:

        """Yields true if an agent.play(...) call is in progress, but not a policy evaluation"""

        return (self.play is not None) and (self.train is None)

    def _is_plot_ready(self, plot_type: PlotType) -> bool:

        """Yields true if any of the plots in plot_type is ready to be plotted.

        A plot_type is ready if a plot callback was registered for this type (like TRAIN_EVAL),

        the agent is in runtime state corresponding to the plot type (like in training and at the end of

        an evaluation period) and any frequency condition is met (like num_episodes_between_plot)

        """

        result = False

        if (plot_type & PlotType.PLAY_EPISODE) != PlotType.NONE:

            result = result | (self.is_play and self.pyplot._is_subplot_created(PlotType.PLAY_EPISODE))

        if (plot_type & PlotType.PLAY_STEP) != PlotType.NONE:

            result = result | (self.is_play and self.pyplot._is_subplot_created(PlotType.PLAY_STEP))

        if (plot_type & PlotType.TRAIN_EVAL) != PlotType.NONE:

            train_result = self.is_eval

            train_result = train_result and self.pyplot._is_subplot_created(PlotType.TRAIN_EVAL)

            train_result = train_result and (self.play.episodes_done == self.train.num_episodes_per_eval)

            result = result | train_result

        if (plot_type & PlotType.TRAIN_ITERATION) != PlotType.NONE:

            train_result = self.is_train

            train_result = train_result and self.pyplot._is_subplot_created(PlotType.TRAIN_ITERATION)

            train_result = train_result and \

                           self.train.num_iterations_between_plot > 0 and \

                           ((self.train.iterations_done_in_training % self.train.num_iterations_between_plot) == 0)

            result = result | train_result

        return result

    @property

    def is_train(self) -> bool:

        """Yields true if an agent.train(...) call is in progress, but not a policy evaluation."""

        return (self.train is not None) and (self.play is None)

class AgentCallback(ABC):

    """Base class for all callbacks monitoring the backend algorithms api calls or the api calls to the gym

        environment"""

    def on_api_log(self, agent_context: AgentContext, api_target: str, log_msg: str):

        """Logs a call to the api of the agents implementation library / framework."""

        pass

    def on_log(self, agent_context: AgentContext, log_msg: str):

        """Logs a general message"""

        pass

    def on_gym_init_begin(self, agent_context: AgentContext):

        """called when the monitored environment begins the instantiation of a new gym environment.

            Args:

                agent_context: api_context passed to calling agent

        """

    def on_gym_init_end(self, agent_context: AgentContext):

        """called when the monitored environment completed the instantiation of a new gym environment.

        Args:

            agent_context: api_context passed to calling agent

        """

        pass

    def on_gym_reset_begin(self, agent_context: AgentContext, **kwargs):

        """Before a call to gym.reset

            Args:

                agent_context: api_context passed to calling agent

                kwargs: the args to be passed to the underlying environment

        """

    def on_gym_reset_end(self, agent_context: AgentContext, reset_result: Tuple, **kwargs):

        """After a call to gym.reset was completed

        Args:

            agent_context: api_context passed to calling agent

            reset_result: object returned by gym.reset

            kwargs: args passed to gym.reset

        """

        pass

    def on_gym_step_begin(self, agent_context: AgentContext, action):

        """Before a call to gym.step

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

        """

        pass

    def on_gym_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """After a call to gym.step was completed

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

            step_result: (observation,reward,done,info) tuple returned by gym.step

        """

        pass

    def on_play_episode_begin(self, agent_context: AgentContext):

        """Called once at the start of new episode to be played (during play or eval, but not during train). """

    def on_play_episode_end(self, agent_context: AgentContext):

        """Called once after an episode is done or stopped (during play or eval, but not during train)."""

    def on_play_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.play() call (during play or eval, but not during train). """

    def on_play_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.play() call (during play or eval, but not during train)"""

    def on_play_step_begin(self, agent_context: AgentContext, action):

        """Called once before a new step is taken in the current episode (during play or eval, but not during train).

            Args:

                 agent_context: the context describing the agents current configuration

                 action: the action to be passed to the upcoming gym_env.step call

        """

    def on_play_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """Called once after a step is completed in the current episode (during play or eval, but not during train)."""

    def on_train_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.train() call. """

    def on_train_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.train() call"""

    def on_train_iteration_begin(self, agent_context: AgentContext):

        """Called once at the start of a new iteration. """

    def on_train_iteration_end(self, agent_context: AgentContext):

        """Called once after the current iteration is completed"""

class _PostProcessCallback(AgentCallback):

    pass

class _PreProcessCallback(AgentCallback):

    pass

Classes

AgentCallback

class AgentCallback(
    /,
    *args,
    **kwargs
)

Base class for all callbacks monitoring the backend algorithms api calls or the api calls to the gym environment

View Source
class AgentCallback(ABC):

    """Base class for all callbacks monitoring the backend algorithms api calls or the api calls to the gym

        environment"""

    def on_api_log(self, agent_context: AgentContext, api_target: str, log_msg: str):

        """Logs a call to the api of the agents implementation library / framework."""

        pass

    def on_log(self, agent_context: AgentContext, log_msg: str):

        """Logs a general message"""

        pass

    def on_gym_init_begin(self, agent_context: AgentContext):

        """called when the monitored environment begins the instantiation of a new gym environment.

            Args:

                agent_context: api_context passed to calling agent

        """

    def on_gym_init_end(self, agent_context: AgentContext):

        """called when the monitored environment completed the instantiation of a new gym environment.

        Args:

            agent_context: api_context passed to calling agent

        """

        pass

    def on_gym_reset_begin(self, agent_context: AgentContext, **kwargs):

        """Before a call to gym.reset

            Args:

                agent_context: api_context passed to calling agent

                kwargs: the args to be passed to the underlying environment

        """

    def on_gym_reset_end(self, agent_context: AgentContext, reset_result: Tuple, **kwargs):

        """After a call to gym.reset was completed

        Args:

            agent_context: api_context passed to calling agent

            reset_result: object returned by gym.reset

            kwargs: args passed to gym.reset

        """

        pass

    def on_gym_step_begin(self, agent_context: AgentContext, action):

        """Before a call to gym.step

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

        """

        pass

    def on_gym_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """After a call to gym.step was completed

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

            step_result: (observation,reward,done,info) tuple returned by gym.step

        """

        pass

    def on_play_episode_begin(self, agent_context: AgentContext):

        """Called once at the start of new episode to be played (during play or eval, but not during train). """

    def on_play_episode_end(self, agent_context: AgentContext):

        """Called once after an episode is done or stopped (during play or eval, but not during train)."""

    def on_play_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.play() call (during play or eval, but not during train). """

    def on_play_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.play() call (during play or eval, but not during train)"""

    def on_play_step_begin(self, agent_context: AgentContext, action):

        """Called once before a new step is taken in the current episode (during play or eval, but not during train).

            Args:

                 agent_context: the context describing the agents current configuration

                 action: the action to be passed to the upcoming gym_env.step call

        """

    def on_play_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """Called once after a step is completed in the current episode (during play or eval, but not during train)."""

    def on_train_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.train() call. """

    def on_train_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.train() call"""

    def on_train_iteration_begin(self, agent_context: AgentContext):

        """Called once at the start of a new iteration. """

    def on_train_iteration_end(self, agent_context: AgentContext):

        """Called once after the current iteration is completed"""

Ancestors (in MRO)

  • abc.ABC

Descendants

  • easyagents.core._PostProcessCallback
  • easyagents.core._PreProcessCallback
  • easyagents.callbacks.plot._PlotCallback
  • easyagents.callbacks.plot.Clear
  • easyagents.backends.core._BackendEvalCallback
  • easyagents.callbacks.duration.Fast
  • easyagents.callbacks.log._LogCallbackBase
  • easyagents.callbacks.log._CallbackCounts
  • easyagents.callbacks.save._SaveCallback

Methods

on_api_log
def on_api_log(
    self,
    agent_context: easyagents.core.AgentContext,
    api_target: str,
    log_msg: str
)

Logs a call to the api of the agents implementation library / framework.

View Source
    def on_api_log(self, agent_context: AgentContext, api_target: str, log_msg: str):

        """Logs a call to the api of the agents implementation library / framework."""

        pass
on_gym_init_begin
def on_gym_init_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment begins the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source
    def on_gym_init_begin(self, agent_context: AgentContext):

        """called when the monitored environment begins the instantiation of a new gym environment.

            Args:

                agent_context: api_context passed to calling agent

        """
on_gym_init_end
def on_gym_init_end(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment completed the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source
    def on_gym_init_end(self, agent_context: AgentContext):

        """called when the monitored environment completed the instantiation of a new gym environment.

        Args:

            agent_context: api_context passed to calling agent

        """

        pass
on_gym_reset_begin
def on_gym_reset_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    **kwargs
)

Before a call to gym.reset

Args: agent_context: api_context passed to calling agent kwargs: the args to be passed to the underlying environment

View Source
    def on_gym_reset_begin(self, agent_context: AgentContext, **kwargs):

        """Before a call to gym.reset

            Args:

                agent_context: api_context passed to calling agent

                kwargs: the args to be passed to the underlying environment

        """
on_gym_reset_end
def on_gym_reset_end(
    self,
    agent_context: easyagents.core.AgentContext,
    reset_result: Tuple,
    **kwargs
)

After a call to gym.reset was completed

Args: agent_context: api_context passed to calling agent reset_result: object returned by gym.reset kwargs: args passed to gym.reset

View Source
    def on_gym_reset_end(self, agent_context: AgentContext, reset_result: Tuple, **kwargs):

        """After a call to gym.reset was completed

        Args:

            agent_context: api_context passed to calling agent

            reset_result: object returned by gym.reset

            kwargs: args passed to gym.reset

        """

        pass
on_gym_step_begin
def on_gym_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Before a call to gym.step

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment

View Source
    def on_gym_step_begin(self, agent_context: AgentContext, action):

        """Before a call to gym.step

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

        """

        pass
on_gym_step_end
def on_gym_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

After a call to gym.step was completed

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment step_result: (observation,reward,done,info) tuple returned by gym.step

View Source
    def on_gym_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """After a call to gym.step was completed

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

            step_result: (observation,reward,done,info) tuple returned by gym.step

        """

        pass
on_log
def on_log(
    self,
    agent_context: easyagents.core.AgentContext,
    log_msg: str
)

Logs a general message

View Source
    def on_log(self, agent_context: AgentContext, log_msg: str):

        """Logs a general message"""

        pass
on_play_begin
def on_play_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.play() call (during play or eval, but not during train).

View Source
    def on_play_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.play() call (during play or eval, but not during train). """
on_play_end
def on_play_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.play() call (during play or eval, but not during train)

View Source
    def on_play_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.play() call (during play or eval, but not during train)"""
on_play_episode_begin
def on_play_episode_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of new episode to be played (during play or eval, but not during train).

View Source
    def on_play_episode_begin(self, agent_context: AgentContext):

        """Called once at the start of new episode to be played (during play or eval, but not during train). """
on_play_episode_end
def on_play_episode_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after an episode is done or stopped (during play or eval, but not during train).

View Source
    def on_play_episode_end(self, agent_context: AgentContext):

        """Called once after an episode is done or stopped (during play or eval, but not during train)."""
on_play_step_begin
def on_play_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Called once before a new step is taken in the current episode (during play or eval, but not during train).

Args: agent_context: the context describing the agents current configuration action: the action to be passed to the upcoming gym_env.step call

View Source
    def on_play_step_begin(self, agent_context: AgentContext, action):

        """Called once before a new step is taken in the current episode (during play or eval, but not during train).

            Args:

                 agent_context: the context describing the agents current configuration

                 action: the action to be passed to the upcoming gym_env.step call

        """
on_play_step_end
def on_play_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

Called once after a step is completed in the current episode (during play or eval, but not during train).

View Source
    def on_play_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """Called once after a step is completed in the current episode (during play or eval, but not during train)."""
on_train_begin
def on_train_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.train() call.

View Source
    def on_train_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.train() call. """
on_train_end
def on_train_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.train() call

View Source
    def on_train_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.train() call"""
on_train_iteration_begin
def on_train_iteration_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of a new iteration.

View Source
    def on_train_iteration_begin(self, agent_context: AgentContext):

        """Called once at the start of a new iteration. """
on_train_iteration_end
def on_train_iteration_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after the current iteration is completed

View Source
    def on_train_iteration_end(self, agent_context: AgentContext):

        """Called once after the current iteration is completed"""

AgentContext

class AgentContext(
    model: easyagents.core.ModelConfig
)

Collection of state and configuration settings for a EasyAgent instance.

Attributes: model: model configuration including the name of the underlying gym_environment and the policy's neural network archtitecture. train: training configuration and current train state. None if not inside a train call. play: play / eval configuration and current state. None if not inside a play call (directly or due to a evaluation inside a train loop) gym: context for gym environment related calls. pyplot: the context containing the matplotlib.pyplot figure to plot to during training or playing

View Source
class AgentContext(object):

    """Collection of state and configuration settings for a EasyAgent instance.

    Attributes:

        model: model configuration including the name of the underlying gym_environment and the

            policy's neural network archtitecture.

        train: training configuration and current train state. None if not inside a train call.

        play: play / eval configuration and current state. None if not inside a play call (directly or

            due to a evaluation inside a train loop)

        gym: context for gym environment related calls.

        pyplot: the context containing the matplotlib.pyplot figure to plot to during training or playing

    """

    def __init__(self, model: ModelConfig):

        """

        Args:

            model: model configuration including the name of the underlying gym_environment and the

                policy's neural network archtitecture.

        """

        assert isinstance(model, ModelConfig), "model not set"

        self.model: ModelConfig = model

        self.train: Optional[TrainContext] = None

        self.play: Optional[PlayContext] = None

        self.gym: GymContext = GymContext()

        self.pyplot: PyPlotContext = PyPlotContext()

        self._is_policy_trained = False

        self._agent_saver: Optional[

            Callable[[Optional[str], Union[List[AgentCallback], AgentCallback, None]], str]] = None

    def __str__(self):

        result = f'agent_context:'

        result += f'\napi   =[{self.gym}]'

        if self.train is not None:

            result += f'\ntrain =[{self.train}] '

        if self.play is not None:

            result += f'\nplay  =[{self.play}] '

        if self.pyplot is not None:

            result += f'\npyplot=[{self.pyplot}] '

        result += f'\nmodel =[{self.model}] '

        return result

    @property

    def is_eval(self) -> bool:

        """Yields true if a policy evaluation inside an agent.train(...) call is in progress."""

        return (self.play is not None) and (self.train is not None)

    @property

    def is_play(self) -> bool:

        """Yields true if an agent.play(...) call is in progress, but not a policy evaluation"""

        return (self.play is not None) and (self.train is None)

    def _is_plot_ready(self, plot_type: PlotType) -> bool:

        """Yields true if any of the plots in plot_type is ready to be plotted.

        A plot_type is ready if a plot callback was registered for this type (like TRAIN_EVAL),

        the agent is in runtime state corresponding to the plot type (like in training and at the end of

        an evaluation period) and any frequency condition is met (like num_episodes_between_plot)

        """

        result = False

        if (plot_type & PlotType.PLAY_EPISODE) != PlotType.NONE:

            result = result | (self.is_play and self.pyplot._is_subplot_created(PlotType.PLAY_EPISODE))

        if (plot_type & PlotType.PLAY_STEP) != PlotType.NONE:

            result = result | (self.is_play and self.pyplot._is_subplot_created(PlotType.PLAY_STEP))

        if (plot_type & PlotType.TRAIN_EVAL) != PlotType.NONE:

            train_result = self.is_eval

            train_result = train_result and self.pyplot._is_subplot_created(PlotType.TRAIN_EVAL)

            train_result = train_result and (self.play.episodes_done == self.train.num_episodes_per_eval)

            result = result | train_result

        if (plot_type & PlotType.TRAIN_ITERATION) != PlotType.NONE:

            train_result = self.is_train

            train_result = train_result and self.pyplot._is_subplot_created(PlotType.TRAIN_ITERATION)

            train_result = train_result and \

                           self.train.num_iterations_between_plot > 0 and \

                           ((self.train.iterations_done_in_training % self.train.num_iterations_between_plot) == 0)

            result = result | train_result

        return result

    @property

    def is_train(self) -> bool:

        """Yields true if an agent.train(...) call is in progress, but not a policy evaluation."""

        return (self.train is not None) and (self.play is None)

Instance variables

is_eval

Yields true if a policy evaluation inside an agent.train(...) call is in progress.

is_play

Yields true if an agent.play(...) call is in progress, but not a policy evaluation

is_train

Yields true if an agent.train(...) call is in progress, but not a policy evaluation.

CemTrainContext

class CemTrainContext(

)

Holds the configuration and current training state for Cross-Entropy-Methode agents.

Attributes: elite_set_fraction: fraction of the elite policy set. num_steps_buffer_preload: number of steps performed to initially load the policy buffer

View Source
class CemTrainContext(EpisodesTrainContext):

    """Holds the configuration and current training state for Cross-Entropy-Methode agents.

        Attributes:

            elite_set_fraction: fraction of the elite policy set.

            num_steps_buffer_preload: number of steps performed to initially load the policy buffer

    """

    def __init__(self):

        super().__init__()

        self.num_iterations = 100

        self.num_episodes_per_iteration: int = 50

        self.elite_set_fraction: float = 0.1

        self.num_steps_buffer_preload: int = 2000

    def __str__(self):

        return super().__str__() + f'#elite_set_fraction={self.elite_set_fraction} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert 1 >= self.elite_set_fraction > 0, "elite_set_fraction must be in interval (0,1]"

Ancestors (in MRO)

  • easyagents.core.EpisodesTrainContext
  • easyagents.core.TrainContext

Instance variables

num_iterations_between_plot

number of iterations between 2 plot updates during training.

Returns: number of iterations or 0 if no plot updates should take place.

EpisodesTrainContext

class EpisodesTrainContext(

)

Base class for all agent which evaluate a number of episodes during each iteration:

The train loop proceeds roughly as follows: for i in num_iterations for e in num_episodes_per_iterations play episode and record steps train policy for num_epochs_per_iteration epochs if current_episode % num_iterations_between_eval == 0: evaluate policy if training_done break

Attributes: num_episodes_per_iteration: number of episodes played per training iteration num_epochs_per_iteration: number of times the data collected for the current iteration is used to retrain the current policy

View Source
class EpisodesTrainContext(TrainContext):

    """Base class for all agent which evaluate a number of episodes during each iteration:

        The train loop proceeds roughly as follows:

            for i in num_iterations

                for e in num_episodes_per_iterations

                    play episode and record steps

                train policy for num_epochs_per_iteration epochs

                if current_episode % num_iterations_between_eval == 0:

                    evaluate policy

                if training_done

                    break

        Attributes:

            num_episodes_per_iteration: number of episodes played per training iteration

            num_epochs_per_iteration: number of times the data collected for the current iteration

                is used to retrain the current policy

    """

    def __init__(self):

        self.num_episodes_per_iteration: int = 10

        self.num_epochs_per_iteration: int = 10

        super().__init__()

    def __str__(self):

        return super().__str__() + \

               f'#episodes_per_iteration={self.num_episodes_per_iteration} ' + \

               f'#epochs_per_iteration={self.num_epochs_per_iteration} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert self.num_episodes_per_iteration > 0, "num_episodes_per_iteration not admissible"

        assert self.num_epochs_per_iteration > 0, "num_epochs_per_iteration not admissible"

Ancestors (in MRO)

  • easyagents.core.TrainContext

Descendants

  • easyagents.core.CemTrainContext
  • easyagents.core.PpoTrainContext

Instance variables

num_iterations_between_plot

number of iterations between 2 plot updates during training.

Returns: number of iterations or 0 if no plot updates should take place.

GymContext

class GymContext(

)

Contains the context for gym api calls (wrapping a gym env instance).

View Source
class GymContext(object):

    """Contains the context for gym api calls (wrapping a gym env instance)."""

    def __init__(self):

        self._monitor_env: Optional[easyagents.backends.monitor._MonitorEnv] = None

        self._totals = None

    def __str__(self):

        return f'MonitorEnv={self._monitor_env} Totals={self._totals}'

    @property

    def gym_env(self) -> Optional[gym.core.Env]:

        result = None

        if self._monitor_env:

            result = self._monitor_env.env

        return result

Instance variables

gym_env

ModelConfig

class ModelConfig(
    gym_env_name: str,
    fc_layers: Union[Tuple[int, ...], int, NoneType] = None,
    seed: Union[int, NoneType] = None
)

The model configurations, containing the name of the gym environment and the neural network architecture.

Attributes: original_env_name: the name of the underlying gym environment, eg 'CartPole-v0' gym_env_name: the name of the actual gym environment used (a wrapper around the environment given by original_env_name) fc_layers: int tuple defining the number and size of each fully connected layer. seed: the seed to be used for example for the gym_env or None for no seed

View Source
class ModelConfig(object):

    """The model configurations, containing the name of the gym environment and the neural network architecture.

        Attributes:

            original_env_name: the name of the underlying gym environment, eg 'CartPole-v0'

            gym_env_name: the name of the actual gym environment used (a wrapper around the environment given

                by original_env_name)

            fc_layers: int tuple defining the number and size of each fully connected layer.

            seed: the seed to be used for example for the gym_env or None for no seed

    """

    _KEY_SEED = 'seed'

    _KEY_GYM_ENV = 'gym_env'

    _KEY_FC_LAYERS = 'fc_layers'

    def __init__(self,

                 gym_env_name: str,

                 fc_layers: Union[Tuple[int, ...], int, None] = None,

                 seed: Optional[int] = None):

        """

            Args:

                gym_env_name: the name of the registered gym environment to use, eg 'CartPole-v0'

                fc_layers: int tuple defining the number and size of each fully connected layer.

        """

        if fc_layers is None:

            fc_layers = (100, 100)

        if isinstance(fc_layers, int):

            fc_layers = (fc_layers,)

        assert isinstance(gym_env_name, str), "passed gym_env_name not a string."

        assert gym_env_name != "", "gym environment name is empty."

        assert easyagents.env._is_registered_with_gym(gym_env_name), \

            f'"{gym_env_name}" is not the name of an environment registered with OpenAI gym.' + \

            'Consider using easyagents.env.register_with_gym to register your environment.'

        assert fc_layers is not None, "fc_layers not set"

        assert isinstance(fc_layers, tuple), "fc_layers not a tuple"

        assert fc_layers, "fc_layers must contain at least 1 int"

        for i in fc_layers:

            assert isinstance(i, int) and i >= 1, f'{i} is not a valid size for a hidden layer'

        self.original_env_name = gym_env_name

        self.gym_env_name = None

        self.fc_layers = fc_layers

        self.seed = seed

    def __str__(self):

        return f'fc_layers={self.fc_layers} seed={self.seed} gym_env_name={self.gym_env_name}'

    @staticmethod

    def _from_dict(from_dict: Dict[str, object]):

        """Creates a new instance of ModelConfig based on the parameters contained in dict

        Returns:

            new instance of ModelConfig configured by dict

        """

        assert from_dict

        # noinspection PyTypeChecker

        fc_layers = tuple(from_dict[ModelConfig._KEY_FC_LAYERS])

        # noinspection PyTypeChecker

        result = ModelConfig(gym_env_name=str(from_dict[ModelConfig._KEY_GYM_ENV]), fc_layers=fc_layers,

                             seed=from_dict[ModelConfig._KEY_SEED])

        return result

    def _to_dict(self) -> Dict[str, object]:

        """saves this model configuration to a dict. The model_config can be recreated by a call to _from_dict

        Retunns:

            dict containing all parameters of this model_config (this does not include any policy)

        """

        result: Dict[str, object] = dict()

        result[ModelConfig._KEY_SEED] = self.seed

        result[ModelConfig._KEY_GYM_ENV] = self.original_env_name

        result[ModelConfig._KEY_FC_LAYERS] = self.fc_layers

        return result

PlayContext

class PlayContext(
    train_context: Union[easyagents.core.TrainContext, NoneType] = None
)

Contains the current configuration of an agents play method like the number of episodes to play and the max number of steps per episode.

The EasyAgent.play() method proceeds (roughly) as follow:

for e in num_episodes play (while steps_done_in_episode < max_steps_per_episode) if playing_done break

Attributes: num_episodes: number of episodes to play, unlimited if None max_steps_per_episode: maximum number of steps per episode, unlimited if None play_done: if true the play loop is terminated at the end of the current episode episodes_done: the number of episodes played (including the current episode). steps_done_in_episode: the number of steps taken in the current episode. steps_done: the number of steps played (over all episodes so far)

actions: dict containing for each episode the actions taken in each step
rewards: dict containing for each episode the rewards received in each step
sum_of_rewards: dict containing for each episode the sum of rewards over all steps
gym_env: the gym environment used to play
View Source
class PlayContext(object):

    """Contains the current configuration of an agents play method like the number of episodes to play

        and the max number of steps per episode.

        The EasyAgent.play() method proceeds (roughly) as follow:

        for e in num_episodes

            play (while steps_done_in_episode < max_steps_per_episode)

            if playing_done

                break

        Attributes:

            num_episodes: number of episodes to play, unlimited if None

            max_steps_per_episode: maximum number of steps per episode, unlimited if None

            play_done: if true the play loop is terminated at the end of the current episode

            episodes_done: the number of episodes played (including the current episode).

            steps_done_in_episode: the number of steps taken in the current episode.

            steps_done: the number of steps played (over all episodes so far)

            actions: dict containing for each episode the actions taken in each step

            rewards: dict containing for each episode the rewards received in each step

            sum_of_rewards: dict containing for each episode the sum of rewards over all steps

            gym_env: the gym environment used to play

    """

    def __init__(self, train_context: Optional[TrainContext] = None):

        """

        Args:

             train_context: if set num_episodes, max_steps_per_episode and seed are set from train_context

        """

        self.num_episodes: Optional[int] = None

        self.max_steps_per_episode: Optional[int] = None

        if train_context is not None:

            self.num_episodes = train_context.num_episodes_per_eval

            self.max_steps_per_episode = train_context.max_steps_per_episode

        self.play_done: bool

        self.episodes_done: int

        self.steps_done_in_episode: int

        self.steps_done: int

        self.actions: Dict[int, List[object]]

        self.rewards: Dict[int, List[float]]

        self.sum_of_rewards: Dict[int, float]

        self.gym_env: Optional[gym.core.Env]

        self._reset()

    def __str__(self):

        return f'#episodes={self.num_episodes} ' + \

               f'max_steps_per_episode={self.max_steps_per_episode} ' + \

               f'play_done={self.play_done} ' + \

               f'episodes_done={self.episodes_done} ' + \

               f'steps_done_in_episode={self.steps_done_in_episode} ' + \

               f'steps_done={self.steps_done} '

    def _reset(self):

        """Clears all values modified during a train() call."""

        self.play_done: bool = False

        self.episodes_done: int = 0

        self.steps_done_in_episode: int = 0

        self.steps_done: int = 0

        self.actions: Dict[int, List[object]] = dict()

        self.rewards: Dict[int, List[float]] = dict()

        self.sum_of_rewards: Dict[int, float] = dict()

        self.gym_env: Optional[gym.core.Env] = None

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        assert (self.num_episodes is None) or (self.num_episodes > 0), "num_episodes not admissible"

        assert (self.max_steps_per_episode is None) or self.max_steps_per_episode > 0, \

            "max_steps_per_episode not admissible"

PlotType

class PlotType(
    /,
    *args,
    **kwargs
)

Defines the point in time when a plot is created / updated.

NONE: No plot is updated. PLAY_EPISODE: Called after the last step of each played episode. The gym environment is still accessible through agent_context.play-gym_env. PLAY_STEP: Called after each play step. The gym environment is still accessible through agent_context.play-gym_env. TRAIN_EVAL: Called after the last step of the last evaluation episode during training. The gym environment is accessible through agent_context.play.gym_env. TRAIN_ITERATION: Called after each train iteration. No gym environment is available.

View Source
class PlotType(Flag):

    """Defines the point in time when a plot is created / updated.

    NONE: No plot is updated.

    PLAY_EPISODE: Called after the last step of each played episode. The gym environment is still

        accessible through agent_context.play-gym_env.

    PLAY_STEP: Called after each play step. The gym environment is still

        accessible through agent_context.play-gym_env.

    TRAIN_EVAL: Called after the last step of the last evaluation episode during training.

        The gym environment is accessible through agent_context.play.gym_env.

    TRAIN_ITERATION: Called after each train iteration. No gym environment is available.

    """

    NONE = 0

    PLAY_EPISODE = auto()

    PLAY_STEP = auto()

    TRAIN_EVAL = auto()

    TRAIN_ITERATION = auto()

Ancestors (in MRO)

  • enum.Flag
  • enum.Enum

Class variables

NONE
PLAY_EPISODE
PLAY_STEP
TRAIN_EVAL
TRAIN_ITERATION

PpoTrainContext

class PpoTrainContext(

)

TrainContext for Actor-Critic type agents like Ppo or Sac.

Attributes: actor_loss: loss observed during training of the actor network. dict is indexed by the current_episode. critic_loss: loss observed during training of the critic network. dict is indexed by the current_episode.

View Source
class PpoTrainContext(EpisodesTrainContext):

    """TrainContext for Actor-Critic type agents like Ppo or Sac.

    Attributes:

        actor_loss: loss observed during training of the actor network. dict is indexed by the current_episode.

        critic_loss: loss observed during training of the critic network. dict is indexed by the current_episode.

    """

    def __init__(self):

        super().__init__()

        self.actor_loss: Dict[int, float]

        self.critic_loss: Dict[int, float]

    def _reset(self):

        self.actor_loss = dict()

        self.critic_loss = dict()

        super()._reset()

Ancestors (in MRO)

  • easyagents.core.EpisodesTrainContext
  • easyagents.core.TrainContext

Instance variables

num_iterations_between_plot

number of iterations between 2 plot updates during training.

Returns: number of iterations or 0 if no plot updates should take place.

PyPlotContext

class PyPlotContext(

)

Contain the context for the maplotlib.pyplot figure plotting.

Attributes figure: the figure to plot to figsize: figure (width,height) in inches for the figure to be created. is_jupyter_active: True if we plot to jupyter notebook cell, False otherwise. max_columns: the max number of subplot columns in the pyplot figure

View Source
class PyPlotContext(object):

    """Contain the context for the maplotlib.pyplot figure plotting.

    Attributes

        figure: the figure to plot to

        figsize: figure (width,height) in inches for the figure to be created.

        is_jupyter_active: True if we plot to jupyter notebook cell, False otherwise.

        max_columns: the max number of subplot columns in the pyplot figure

    """

    def __init__(self):

        self._created_subplots = PlotType.NONE

        self.figure: Optional[plt.Figure] = None

        self.figsize: (float, float) = (17, 6)

        self._call_jupyter_display = False

        self.is_jupyter_active = False

        self.max_columns = 3

    def __str__(self):

        figure_number = None

        figure_axes_len = 0

        if self.figure:

            figure_number = self.figure.number

            if self.figure.axes:

                figure_axes_len = len(self.figure.axes)

        return f'is_jupyter_active={self.is_jupyter_active} max_columns={self.max_columns} ' + \

               f'_created_subplots={self._created_subplots} figure={figure_number} axes={figure_axes_len} '

    def _is_subplot_created(self, plot_type: PlotType):

        """Yields true if a subplot of type plot_type was created by a plot callback."""

        result = ((self._created_subplots & plot_type) != PlotType.NONE)

        return result

StepsTrainContext

class StepsTrainContext(

)

Base class for all agent which evaluate a number of steps during each iteration:

The train loop proceeds roughly as follows: for i in num_iterations for s in num_steps_per_iterations play episodes and record steps train policy for num_epochs_per_iteration epochs if current_episode % num_iterations_between_eval == 0: evaluate policy if training_done break

Attributes: num_steps_per_iteration: number of steps played for each iteration num_steps_buffer_preload: number of initial collect steps to preload the buffer num_steps_sampled_from_buffer: the number of steps sampled from buffer for each iteration training

View Source
class StepsTrainContext(TrainContext):

    """Base class for all agent which evaluate a number of steps during each iteration:

        The train loop proceeds roughly as follows:

            for i in num_iterations

                for s in num_steps_per_iterations

                    play episodes and record steps

                train policy for num_epochs_per_iteration epochs

                if current_episode % num_iterations_between_eval == 0:

                    evaluate policy

                if training_done

                    break

        Attributes:

            num_steps_per_iteration: number of steps played for each iteration

            num_steps_buffer_preload: number of initial collect steps to preload the buffer

            num_steps_sampled_from_buffer: the number of steps sampled from buffer for each iteration training

    """

    def __init__(self):

        super().__init__()

        self.num_iterations = 20000

        self.num_iterations_between_eval = 1000

        self.num_steps_per_iteration: int = 1

        self.num_steps_buffer_preload: int = 1000

        self.num_steps_sampled_from_buffer: int = 64

        self.max_steps_in_buffer = 100000

    def __str__(self):

        return super().__str__() + \

               f'#steps_per_iteration={self.num_steps_per_iteration} ' + \

               f'#steps_buffer_preload={self.num_steps_buffer_preload} ' + \

               f'#steps_sampled_from_buffer={self.num_steps_sampled_from_buffer} '

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        super()._validate()

        assert self.num_steps_per_iteration > 0, "num_steps_per_iteration not admissible"

Ancestors (in MRO)

  • easyagents.core.TrainContext

Instance variables

num_iterations_between_plot

number of iterations between 2 plot updates during training.

Returns: number of iterations or 0 if no plot updates should take place.

TrainContext

class TrainContext(

)

Contains the configuration of an agents train method like the number of iterations or the learning rate along with data gathered sofar during the training which is identical for all implementations.

Hints: o TrainContext contains all the parameters needed to control the train loop. o Subclasses of TrainContext may contain additional Agent (but not backend) specific parameters.

Attributes: num_iterations: number of times the training is repeated (with additional data), unlimited if None max_steps_per_episode: maximum number of steps per episode learning_rate: the learning rate used in the next iteration's policy training (0,1] reward_discount_gamma: the factor by which a reward is discounted for each step (0,1] max_steps_in_buffer: size of the agents buffer in steps

training_done: if true the train loop is terminated at the end of the current iteration
iterations_done_in_training: the number of iterations completed so far (during training)
episodes_done_in_iteration: the number of episodes completed in the current iteration
episodes_done_in_training: the number of episodes completed over all iterations so far.
    The episodes played for evaluation are not included in this count.
steps_done_in_training: the number of steps taken over all iterations so far
steps_done_in_iteration: the number of steps taken in the current iteration

num_iterations_between_eval: number of training iterations before the current policy is evaluated.
num_episodes_per_eval: number of episodes played to estimate the average return and steps
eval_rewards: dict containg the rewards statistics for each policy evaluation.
    Each entry contains the tuple (min, average, max) over the sum of rewards over all episodes
    played for the current evaluation. The dict is indexed by the current_episode.
eval_steps: dict containg the steps statistics for each policy evaluation.
    Each entry contains the tuple (min, average, max) over the number of step over all episodes
    played for the current evaluation. The dict is indexed by the current_episode.
loss: dict containing the loss for each iteration training. The dict is indexed by the current_episode.
View Source
class TrainContext(object):

    """Contains the configuration of an agents train method like the number of iterations or the learning rate

        along with data gathered sofar during the training which is identical for all implementations.

        Hints:

        o TrainContext contains all the parameters needed to control the train loop.

        o Subclasses of TrainContext may contain additional Agent (but not backend) specific parameters.

        Attributes:

            num_iterations: number of times the training is repeated (with additional data), unlimited if None

            max_steps_per_episode: maximum number of steps per episode

            learning_rate: the learning rate used in the next iteration's policy training (0,1]

            reward_discount_gamma: the factor by which a reward is discounted for each step (0,1]

            max_steps_in_buffer: size of the agents buffer in steps

            training_done: if true the train loop is terminated at the end of the current iteration

            iterations_done_in_training: the number of iterations completed so far (during training)

            episodes_done_in_iteration: the number of episodes completed in the current iteration

            episodes_done_in_training: the number of episodes completed over all iterations so far.

                The episodes played for evaluation are not included in this count.

            steps_done_in_training: the number of steps taken over all iterations so far

            steps_done_in_iteration: the number of steps taken in the current iteration

            num_iterations_between_eval: number of training iterations before the current policy is evaluated.

            num_episodes_per_eval: number of episodes played to estimate the average return and steps

            eval_rewards: dict containg the rewards statistics for each policy evaluation.

                Each entry contains the tuple (min, average, max) over the sum of rewards over all episodes

                played for the current evaluation. The dict is indexed by the current_episode.

            eval_steps: dict containg the steps statistics for each policy evaluation.

                Each entry contains the tuple (min, average, max) over the number of step over all episodes

                played for the current evaluation. The dict is indexed by the current_episode.

            loss: dict containing the loss for each iteration training. The dict is indexed by the current_episode.

    """

    def __init__(self):

        self.num_iterations: Optional[int] = None

        self.max_steps_per_episode: Optional = 1000

        self.num_iterations_between_eval: int = 10

        self.num_episodes_per_eval: int = 10

        self.learning_rate: float = 0.001

        self.reward_discount_gamma: float = 1.0

        self.max_steps_in_buffer: int = 100000

        self.training_done: bool

        self.iterations_done_in_training: int

        self.episodes_done_in_iteration: int

        self.episodes_done_in_training: int

        self.steps_done_in_training: int

        self.steps_done_in_iteration = 0

        self.loss: Dict[int, float]

        self.eval_rewards: Dict[int, Tuple[float, float, float]]

        self.eval_steps: Dict[int, Tuple[float, float, float]]

        self._reset()

    def __str__(self):

        return f'training_done={self.training_done} ' + \

               f'#iterations_done_in_training={self.iterations_done_in_training} ' + \

               f'#episodes_done_in_iteration={self.episodes_done_in_iteration} ' + \

               f'#steps_done_in_iteration={self.steps_done_in_iteration} ' + \

               f'#iterations={self.num_iterations} ' + \

               f'#max_steps_per_episode={self.max_steps_per_episode} ' + \

               f'#iterations_between_eval={self.num_iterations_between_eval} ' + \

               f'#episodes_per_eval={self.num_episodes_per_eval} ' + \

               f'#learning_rate={self.learning_rate} ' + \

               f'#reward_discount_gamma={self.reward_discount_gamma} ' + \

               f'#max_steps_in_buffer={self.max_steps_in_buffer} '

    def _reset(self):

        """Clears all values modified during a train() call."""

        self.training_done = False

        self.iterations_done_in_training = 0

        self.episodes_done_in_iteration = 0

        self.episodes_done_in_training = 0

        self.steps_done_in_training = 0

        self.steps_done_in_iteration = 0

        self.loss = dict()

        self.eval_rewards = dict()

        self.eval_steps = dict()

    def _validate(self):

        """Validates the consistency of all values, raising an exception if an inadmissible combination is detected."""

        assert self.num_iterations is None or self.num_iterations > 0, "num_iterations not admissible"

        assert self.max_steps_per_episode > 0, "max_steps_per_episode not admissible"

        assert self.num_iterations_between_eval > 0, "num_iterations_between_eval not admissible"

        assert self.num_episodes_per_eval > 0, "num_episodes_per_eval not admissible"

        assert 0 < self.learning_rate <= 1, "learning_rate not in interval (0,1]"

        assert 0 < self.reward_discount_gamma <= 1, "reward_discount_gamma not in interval (0,1]"

    @property

    def num_iterations_between_plot(self):

        """number of iterations between 2 plot updates during training.

        Returns:

            number of iterations or 0 if no plot updates should take place.

            """

        result = 0

        if self.num_iterations_between_eval:

            result = math.ceil(self.num_iterations_between_eval / 3)

        return result

Descendants

  • easyagents.core.EpisodesTrainContext
  • easyagents.core.StepsTrainContext

Instance variables

num_iterations_between_plot

number of iterations between 2 plot updates during training.

Returns: number of iterations or 0 if no plot updates should take place.