Wrappers

Sinergym has several wrappers in order to add some functionality in the environment that it doesn’t have by default. Currently, we have developed a normalization wrapper, multi-observation wrapper and Logger wrapper. The code can be found in sinergym/sinergym/utils/wrappers.py. You can implement your own wrappers inheriting from gym.Wrapper or some of its variants:

"""Implementation of custom Gym environments."""

import numpy as np
import gym

from collections import deque
from sinergym.utils.common import CSVLogger
from sinergym.utils.common import RANGES_5ZONE
from stable_baselines3.common.env_util import is_wrapped


class NormalizeObservation(gym.ObservationWrapper):

    def __init__(self, env, ranges=RANGES_5ZONE):
        """Observations normalized to range [0, 1].

        Args:
            env (object): Original Sinergym environment.
            ranges: Observation variables ranges to apply normalization (rely on environment)
        """
        super(NormalizeObservation, self).__init__(env)
        self.unwrapped_observation = None
        self.ranges = ranges

    def observation(self, obs):
        """Applies normalization to observation.

        Args:
            obs (object): Original observation.

        Returns:
            object: Normalized observation.
        """
        # Save original obs in class attribute
        self.unwrapped_observation = obs.copy()

        # NOTE: If you want to record day, month and hour, you should add that
        # variables as keys
        for i, variable in enumerate(self.env.variables['observation']):
            # normalization (handle DivisionbyZero Error)
            if(self.ranges[variable][1] - self.ranges[variable][0] == 0):
                obs[i] = max(
                    self.ranges[variable][0], min(
                        obs[i], self.ranges[variable][1]))
            else:
                obs[i] = (obs[i] - self.ranges[variable][0]) / \
                    (self.ranges[variable][1] - self.ranges[variable][0])

            # If value is out
            if np.isnan(obs[i]):
                obs[i] = 0
            elif obs[i] > 1:
                obs[i] = 1
            elif obs[i] < 0:
                obs[i] = 0
        # Return obs values in the SAME ORDER than obs argument.
        return np.array(obs)

    def get_unwrapped_obs(self):
        """Get last environment observation without normalization.

        Returns:
            object: Last original observation.
        """
        return self.unwrapped_observation


class MultiObsWrapper(gym.Wrapper):

    def __init__(self, env, n=5, flatten=True):
        """Stack of observations.

        Args:
            env (object): Original Gym environment.
            n (int, optional): Number of observations to be stacked. Defaults to 5.
            flatten (bool, optional): Whether or not flat the observation vector. Defaults to True.
        """
        super(MultiObsWrapper, self).__init__(env)
        self.n = n
        self.ind_flat = flatten
        self.history = deque([], maxlen=n)
        shape = env.observation_space.shape
        new_shape = (shape[0] * n,) if flatten else ((n,) + shape)
        self.observation_space = gym.spaces.Box(
            low=-5e6, high=5e6, shape=new_shape, dtype=np.float32)

    def reset(self):
        """Resets the environment.

        Returns:
            list: Stacked previous observations.
        """
        obs = self.env.reset()
        for _ in range(self.n):
            self.history.append(obs)
        return self._get_obs()

    def step(self, action):
        """Performs the action in the new environment."""

        observation, reward, done, info = self.env.step(action)
        self.history.append(observation)
        return self._get_obs(), reward, done, info

    def _get_obs(self):
        """Get observation history.

        Returns:
            np.array: Array of previous observations.
        """
        if self.ind_flat:
            return np.array(self.history).reshape(-1,)
        else:
            return np.array(self.history)


class LoggerWrapper(gym.Wrapper):

    def __init__(self, env, flag=True):
        """CSVLogger to log interactions with environment.

        Args:
            env (object): Original Gym environment.
            flag (bool, optional): State of logger (activate or deactivate).
        """
        gym.Wrapper.__init__(self, env)
        # Headers for csv logger
        monitor_header_list = ['timestep,month,day,hour'] + env.variables['observation'] + \
            env.variables['action'] + ['time (seconds)', 'reward',
                                       'power_penalty', 'comfort_penalty', 'done']
        self.monitor_header = ''
        for element_header in monitor_header_list:
            self.monitor_header += element_header + ','
        self.monitor_header = self.monitor_header[:-1]
        self.progress_header = 'episode_num,cumulative_reward,mean_reward,cumulative_power_consumption,mean_power_consumption,cumulative_comfort_penalty,mean_comfort_penalty,cumulative_power_penalty,mean_power_penalty,comfort_violation (%),length(timesteps),time_elapsed(seconds)'

        # Create simulation logger, by default is active (flag=True)
        self.logger = CSVLogger(
            monitor_header=self.monitor_header,
            progress_header=self.progress_header,
            log_progress_file=env.simulator._env_working_dir_parent +
            '/progress.csv',
            flag=flag)

    def step(self, action):
        """Step the environment. Logging new information

        Args:
            action: Action executed in step

        Returns:
            (np.array(),float,bool,dict) tuple
        """
        obs, reward, done, info = self.env.step(action)
        # We added some extra values (month,day,hour) manually in env, so we
        # need to delete them.
        if is_wrapped(self, NormalizeObservation):
            # Record action and new observation in simulator's csv
            self.logger.log_step_normalize(timestep=info['timestep'],
                                           date=[info['month'],
                                                 info['day'], info['hour']],
                                           observation=obs[:-3],
                                           action=info['action_'],
                                           simulation_time=info['time_elapsed'],
                                           reward=reward,
                                           total_power_no_units=info['total_power_no_units'],
                                           comfort_penalty=info['comfort_penalty'],
                                           done=done)
            # Record original observation too
            self.logger.log_step(
                timestep=info['timestep'],
                date=[
                    info['month'],
                    info['day'],
                    info['hour']],
                observation=self.env.get_unwrapped_obs()[
                    :-3],
                action=info['action_'],
                simulation_time=info['time_elapsed'],
                reward=reward,
                total_power_no_units=info['total_power_no_units'],
                comfort_penalty=info['comfort_penalty'],
                power=info['total_power'],
                done=done)
        else:
            # Only record observation without normalization
            self.logger.log_step(timestep=info['timestep'],
                                 date=[info['month'],
                                       info['day'], info['hour']],
                                 observation=obs[:-3],
                                 action=info['action_'],
                                 simulation_time=info['time_elapsed'],
                                 reward=reward,
                                 total_power_no_units=info['total_power_no_units'],
                                 comfort_penalty=info['comfort_penalty'],
                                 power=info['total_power'],
                                 done=done)

        return obs, reward, done, info

    def reset(self):
        """Resets the environment. Recording episode summary in logger
        """
        # It isn't first episode simulation, so we can logger last episode
        if self.env.simulator._episode_existed:
            self.env.simulator.logger_main.debug(
                'End of episode, recording summary (progress.csv) if logger is active')
            self.logger.log_episode(episode=self.env.simulator._epi_num)

        # Then, reset environment
        obs = self.env.reset()

        # Create monitor.csv for information of this episode
        self.env.simulator.logger_main.debug(
            'Creating monitor.csv for current episode (episode ' + str(
                self.env.simulator._epi_num) + ') if logger is active')
        self.logger.set_log_file(
            self.env.simulator._eplus_working_dir + '/monitor.csv')
        # Store initial state of simulation
        self.logger.log_step(timestep=0,
                             date=[obs[-2], obs[-3], obs[-1]],
                             observation=obs[:-3],
                             action=[None for _ in range(
                                 len(self.env.variables['action']))],
                             simulation_time=0,
                             reward=None,
                             total_power_no_units=None,
                             comfort_penalty=None,
                             power=None,
                             done=False)

        return obs

    def close(self):
        """Close env. Recording last episode summary.
        """
        # Record last episode summary before end simulation
        self.env.simulator.logger_main.debug(
            'End of episode, recording summary (progress.csv) if logger is active')
        self.logger.log_episode(episode=self.env.simulator._epi_num)

        # Then, close env
        self.env.close()

    def activate_logger(self):
        """Activate logger if its flag False.
        """
        self.logger.activate_flag()

    def deactivate_logger(self):
        """Deactivate logger if its flag True.
        """
        self.logger.deactivate_flag()

An usage of these wrappers could be the next:

import gym
import sinergym
import numpy as np

from sinergym.utils.wrappers import MultiObsWrapper, NormalizeObservation, LoggerWrapper

default_env = gym.make('Eplus-demo-v1')

# apply wrappers
env = MultiObsWrapper(LoggerWrapper(NormalizeObservation(default_env)))

for i in range(1):
    obs = env.reset()
    rewards = []
    done = False
    current_month = 0
    while not done:
        a = env.action_space.sample()
        obs, reward, done, info = env.step(a)
        rewards.append(reward)
        if info['month'] != current_month:  # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)
    print('Episode ', i, 'Mean reward: ', np.mean(
        rewards), 'Cumulative reward: ', sum(rewards))
env.close()

Warning

The order of wrappers if you are going to use several at the same time is really important. The correct order is Normalization - Logger - MultiObs and subsets (for example, Normalization - Multiobs is valid).

Note

For more information about Sinergym Logger, visit Logger