

Module easyagents.callbacks.save

View Source

import os

import sys

from typing import List, Tuple

import easyagents.core as core

import easyagents.backends.core as bcore

class _SaveCallback(core.AgentCallback):

    """Base class for all agent savent callbacks.

        Attributes:

            directory: the absolute path of the directory containing the persisted policies.

            saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

    """

    def __init__(self, directory: str = None):

        """Saves the best policies (along with the agent definition) in directory.

        If directory is None the policies are written in a temp directory.

        Args:

            directory: the directory to save to, if None a temp directory is created.

        """

        directory = directory if directory else bcore._get_temp_path()

        self.directory: str = bcore._mkdir(directory)

        self.saved_agents: List[Tuple[int, float, str]] = []

    def __str__(self):

        return self.directory

    def _save(self, agent_context: core.AgentContext):

        """Saves the current policy in directory."""

        assert agent_context

        assert agent_context.train, "TrainContext not set."

        tc = agent_context.train

        min_rewards, avg_reward, max_rewards = tc.eval_rewards[tc.episodes_done_in_training]

        current_dir = f'episode_{tc.episodes_done_in_training}-avg_reward_{avg_reward}'

        current_dir = os.path.join(self.directory, current_dir)

        agent_context._agent_saver(directory=current_dir)

        self.saved_agents.append((tc.episodes_done_in_training, avg_reward, current_dir))

class Best(_SaveCallback):

    """After each policy evaluation the policy is saved if average reward is larger than all previous average

        rewards. The policies can then be loaded using agents.load()

        Attributes:

            directory: the absolute path of the directory containing the persisted policies.

            saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

    """

    def __init__(self, directory: str = None):

        """Saves the best policies (along with the agent definition) in directory.

        If directory is None the policies are written in a temp directory.

        Args:

            directory: the directory to save to, if None a temp directory is created.

        """

        super().__init__(directory=directory)

        self._best_avg_reward = -sys.float_info.max

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            min_rewards, avg_reward, max_rewards = tc.eval_rewards[tc.episodes_done_in_training]

            if avg_reward > self._best_avg_reward:

                self._save(agent_context=agent_context)

                self._best_avg_reward = avg_reward

class Every(_SaveCallback):

    """Saves the current policy every n evaluations.

        Attributes:

            directory: the absolute path of the directory containing the persisted policies.

            saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

    """

    def __init__(self, num_evals_between_save: int = 1, directory: str = None):

        """Saves the current policy every n evaluations. In terms of episodes: the policy is saved every

        num_eval_between_saves * num_iterations_between_eval * num_episodes_per_iteration

        Args:

            num_evals_between_save: the number of evaluations between saves.

        """

        assert num_evals_between_save > 0

        super().__init__(directory=directory)

        self.num_evals_between_save: int = num_evals_between_save

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            if tc.num_iterations % tc.num_iterations_between_eval == 0:

                evals = tc.num_iterations / tc.num_iterations_between_eval

                if evals % self.num_evals_between_save == 0:

                    self._save(agent_context=agent_context)

Classes

Best

class Best(
    directory: str = None
)

After each policy evaluation the policy is saved if average reward is larger than all previous average rewards. The policies can then be loaded using agents.load()

Attributes: directory: the absolute path of the directory containing the persisted policies. saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

View Source

class Best(_SaveCallback):

    """After each policy evaluation the policy is saved if average reward is larger than all previous average

        rewards. The policies can then be loaded using agents.load()

        Attributes:

            directory: the absolute path of the directory containing the persisted policies.

            saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

    """

    def __init__(self, directory: str = None):

        """Saves the best policies (along with the agent definition) in directory.

        If directory is None the policies are written in a temp directory.

        Args:

            directory: the directory to save to, if None a temp directory is created.

        """

        super().__init__(directory=directory)

        self._best_avg_reward = -sys.float_info.max

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            min_rewards, avg_reward, max_rewards = tc.eval_rewards[tc.episodes_done_in_training]

            if avg_reward > self._best_avg_reward:

                self._save(agent_context=agent_context)

                self._best_avg_reward = avg_reward

Ancestors (in MRO)

easyagents.callbacks.save._SaveCallback
easyagents.core.AgentCallback
abc.ABC

Methods

on_api_log

def on_api_log(
    self,
    agent_context: easyagents.core.AgentContext,
    api_target: str,
    log_msg: str
)

Logs a call to the api of the agents implementation library / framework.

View Source

    def on_api_log(self, agent_context: AgentContext, api_target: str, log_msg: str):

        """Logs a call to the api of the agents implementation library / framework."""

        pass

on_gym_init_begin

def on_gym_init_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment begins the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source

    def on_gym_init_begin(self, agent_context: AgentContext):

        """called when the monitored environment begins the instantiation of a new gym environment.

            Args:

                agent_context: api_context passed to calling agent

        """

on_gym_init_end

def on_gym_init_end(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment completed the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source

    def on_gym_init_end(self, agent_context: AgentContext):

        """called when the monitored environment completed the instantiation of a new gym environment.

        Args:

            agent_context: api_context passed to calling agent

        """

        pass

on_gym_reset_begin

def on_gym_reset_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    **kwargs
)

Before a call to gym.reset

Args: agent_context: api_context passed to calling agent kwargs: the args to be passed to the underlying environment

View Source

    def on_gym_reset_begin(self, agent_context: AgentContext, **kwargs):

        """Before a call to gym.reset

            Args:

                agent_context: api_context passed to calling agent

                kwargs: the args to be passed to the underlying environment

        """

on_gym_reset_end

def on_gym_reset_end(
    self,
    agent_context: easyagents.core.AgentContext,
    reset_result: Tuple,
    **kwargs
)

After a call to gym.reset was completed

Args: agent_context: api_context passed to calling agent reset_result: object returned by gym.reset kwargs: args passed to gym.reset

View Source

    def on_gym_reset_end(self, agent_context: AgentContext, reset_result: Tuple, **kwargs):

        """After a call to gym.reset was completed

        Args:

            agent_context: api_context passed to calling agent

            reset_result: object returned by gym.reset

            kwargs: args passed to gym.reset

        """

        pass

on_gym_step_begin

def on_gym_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Before a call to gym.step

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment

View Source

    def on_gym_step_begin(self, agent_context: AgentContext, action):

        """Before a call to gym.step

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

        """

        pass

on_gym_step_end

def on_gym_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

After a call to gym.step was completed

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment step_result: (observation,reward,done,info) tuple returned by gym.step

View Source

    def on_gym_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """After a call to gym.step was completed

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

            step_result: (observation,reward,done,info) tuple returned by gym.step

        """

        pass

on_log

def on_log(
    self,
    agent_context: easyagents.core.AgentContext,
    log_msg: str
)

Logs a general message

View Source

    def on_log(self, agent_context: AgentContext, log_msg: str):

        """Logs a general message"""

        pass

on_play_begin

def on_play_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.play() call (during play or eval, but not during train).

View Source

    def on_play_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.play() call (during play or eval, but not during train). """

on_play_end

def on_play_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.play() call (during play or eval, but not during train)

View Source

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            min_rewards, avg_reward, max_rewards = tc.eval_rewards[tc.episodes_done_in_training]

            if avg_reward > self._best_avg_reward:

                self._save(agent_context=agent_context)

                self._best_avg_reward = avg_reward

on_play_episode_begin

def on_play_episode_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of new episode to be played (during play or eval, but not during train).

View Source

    def on_play_episode_begin(self, agent_context: AgentContext):

        """Called once at the start of new episode to be played (during play or eval, but not during train). """

on_play_episode_end

def on_play_episode_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after an episode is done or stopped (during play or eval, but not during train).

View Source

    def on_play_episode_end(self, agent_context: AgentContext):

        """Called once after an episode is done or stopped (during play or eval, but not during train)."""

on_play_step_begin

def on_play_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Called once before a new step is taken in the current episode (during play or eval, but not during train).

Args: agent_context: the context describing the agents current configuration action: the action to be passed to the upcoming gym_env.step call

View Source

    def on_play_step_begin(self, agent_context: AgentContext, action):

        """Called once before a new step is taken in the current episode (during play or eval, but not during train).

            Args:

                 agent_context: the context describing the agents current configuration

                 action: the action to be passed to the upcoming gym_env.step call

        """

on_play_step_end

def on_play_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

Called once after a step is completed in the current episode (during play or eval, but not during train).

View Source

    def on_play_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """Called once after a step is completed in the current episode (during play or eval, but not during train)."""

on_train_begin

def on_train_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.train() call.

View Source

    def on_train_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.train() call. """

on_train_end

def on_train_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.train() call

View Source

    def on_train_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.train() call"""

on_train_iteration_begin

def on_train_iteration_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of a new iteration.

View Source

    def on_train_iteration_begin(self, agent_context: AgentContext):

        """Called once at the start of a new iteration. """

on_train_iteration_end

def on_train_iteration_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after the current iteration is completed

View Source

    def on_train_iteration_end(self, agent_context: AgentContext):

        """Called once after the current iteration is completed"""

Every

class Every(
    num_evals_between_save: int = 1,
    directory: str = None
)

Saves the current policy every n evaluations.

Attributes: directory: the absolute path of the directory containing the persisted policies. saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

View Source

class Every(_SaveCallback):

    """Saves the current policy every n evaluations.

        Attributes:

            directory: the absolute path of the directory containing the persisted policies.

            saved_agents: list of tuples (episode, avg_rewards, directory) for each saved agent

    """

    def __init__(self, num_evals_between_save: int = 1, directory: str = None):

        """Saves the current policy every n evaluations. In terms of episodes: the policy is saved every

        num_eval_between_saves * num_iterations_between_eval * num_episodes_per_iteration

        Args:

            num_evals_between_save: the number of evaluations between saves.

        """

        assert num_evals_between_save > 0

        super().__init__(directory=directory)

        self.num_evals_between_save: int = num_evals_between_save

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            if tc.num_iterations % tc.num_iterations_between_eval == 0:

                evals = tc.num_iterations / tc.num_iterations_between_eval

                if evals % self.num_evals_between_save == 0:

                    self._save(agent_context=agent_context)

Ancestors (in MRO)

easyagents.callbacks.save._SaveCallback
easyagents.core.AgentCallback
abc.ABC

Methods

on_api_log

def on_api_log(
    self,
    agent_context: easyagents.core.AgentContext,
    api_target: str,
    log_msg: str
)

Logs a call to the api of the agents implementation library / framework.

View Source

    def on_api_log(self, agent_context: AgentContext, api_target: str, log_msg: str):

        """Logs a call to the api of the agents implementation library / framework."""

        pass

on_gym_init_begin

def on_gym_init_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment begins the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source

    def on_gym_init_begin(self, agent_context: AgentContext):

        """called when the monitored environment begins the instantiation of a new gym environment.

            Args:

                agent_context: api_context passed to calling agent

        """

on_gym_init_end

def on_gym_init_end(
    self,
    agent_context: easyagents.core.AgentContext
)

called when the monitored environment completed the instantiation of a new gym environment.

Args: agent_context: api_context passed to calling agent

View Source

    def on_gym_init_end(self, agent_context: AgentContext):

        """called when the monitored environment completed the instantiation of a new gym environment.

        Args:

            agent_context: api_context passed to calling agent

        """

        pass

on_gym_reset_begin

def on_gym_reset_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    **kwargs
)

Before a call to gym.reset

Args: agent_context: api_context passed to calling agent kwargs: the args to be passed to the underlying environment

View Source

    def on_gym_reset_begin(self, agent_context: AgentContext, **kwargs):

        """Before a call to gym.reset

            Args:

                agent_context: api_context passed to calling agent

                kwargs: the args to be passed to the underlying environment

        """

on_gym_reset_end

def on_gym_reset_end(
    self,
    agent_context: easyagents.core.AgentContext,
    reset_result: Tuple,
    **kwargs
)

After a call to gym.reset was completed

Args: agent_context: api_context passed to calling agent reset_result: object returned by gym.reset kwargs: args passed to gym.reset

View Source

    def on_gym_reset_end(self, agent_context: AgentContext, reset_result: Tuple, **kwargs):

        """After a call to gym.reset was completed

        Args:

            agent_context: api_context passed to calling agent

            reset_result: object returned by gym.reset

            kwargs: args passed to gym.reset

        """

        pass

on_gym_step_begin

def on_gym_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Before a call to gym.step

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment

View Source

    def on_gym_step_begin(self, agent_context: AgentContext, action):

        """Before a call to gym.step

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

        """

        pass

on_gym_step_end

def on_gym_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

After a call to gym.step was completed

Args: agent_context: api_context passed to calling agent action: the action to be passed to the underlying environment step_result: (observation,reward,done,info) tuple returned by gym.step

View Source

    def on_gym_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """After a call to gym.step was completed

        Args:

            agent_context: api_context passed to calling agent

            action: the action to be passed to the underlying environment

            step_result: (observation,reward,done,info) tuple returned by gym.step

        """

        pass

on_log

def on_log(
    self,
    agent_context: easyagents.core.AgentContext,
    log_msg: str
)

Logs a general message

View Source

    def on_log(self, agent_context: AgentContext, log_msg: str):

        """Logs a general message"""

        pass

on_play_begin

def on_play_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.play() call (during play or eval, but not during train).

View Source

    def on_play_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.play() call (during play or eval, but not during train). """

on_play_end

def on_play_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.play() call (during play or eval, but not during train)

View Source

    def on_play_end(self, agent_context: core.AgentContext):

        if agent_context.is_eval:

            tc = agent_context.train

            if tc.num_iterations % tc.num_iterations_between_eval == 0:

                evals = tc.num_iterations / tc.num_iterations_between_eval

                if evals % self.num_evals_between_save == 0:

                    self._save(agent_context=agent_context)

on_play_episode_begin

def on_play_episode_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of new episode to be played (during play or eval, but not during train).

View Source

    def on_play_episode_begin(self, agent_context: AgentContext):

        """Called once at the start of new episode to be played (during play or eval, but not during train). """

on_play_episode_end

def on_play_episode_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after an episode is done or stopped (during play or eval, but not during train).

View Source

    def on_play_episode_end(self, agent_context: AgentContext):

        """Called once after an episode is done or stopped (during play or eval, but not during train)."""

on_play_step_begin

def on_play_step_begin(
    self,
    agent_context: easyagents.core.AgentContext,
    action
)

Called once before a new step is taken in the current episode (during play or eval, but not during train).

Args: agent_context: the context describing the agents current configuration action: the action to be passed to the upcoming gym_env.step call

View Source

    def on_play_step_begin(self, agent_context: AgentContext, action):

        """Called once before a new step is taken in the current episode (during play or eval, but not during train).

            Args:

                 agent_context: the context describing the agents current configuration

                 action: the action to be passed to the upcoming gym_env.step call

        """

on_play_step_end

def on_play_step_end(
    self,
    agent_context: easyagents.core.AgentContext,
    action,
    step_result: Tuple
)

Called once after a step is completed in the current episode (during play or eval, but not during train).

View Source

    def on_play_step_end(self, agent_context: AgentContext, action, step_result: Tuple):

        """Called once after a step is completed in the current episode (during play or eval, but not during train)."""

on_train_begin

def on_train_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the entry of an agent.train() call.

View Source

    def on_train_begin(self, agent_context: AgentContext):

        """Called once at the entry of an agent.train() call. """

on_train_end

def on_train_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once before exiting an agent.train() call

View Source

    def on_train_end(self, agent_context: AgentContext):

        """Called once before exiting an agent.train() call"""

on_train_iteration_begin

def on_train_iteration_begin(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once at the start of a new iteration.

View Source

    def on_train_iteration_begin(self, agent_context: AgentContext):

        """Called once at the start of a new iteration. """

on_train_iteration_end

def on_train_iteration_end(
    self,
    agent_context: easyagents.core.AgentContext
)

Called once after the current iteration is completed

View Source

    def on_train_iteration_end(self, agent_context: AgentContext):

        """Called once after the current iteration is completed"""