Controllers

Sinergym has a section to implement your own controllers. Currently, we have developed a random agent and a rule-based agent. You can find this code in sinergym/sinergym/utils/controllers.py. it is very useful in order to perform benchmarks as a reference point to study DRL algorithms:

"""Implementation of basic controllers."""

import os
import pkg_resources

from datetime import datetime

from ..utils.common import parse_variables


class RandomController(object):

    def __init__(self, env):
        """Random agent. It selects available actions randomly.

        Args:
            env (object): Simulation environment.
        """
        self.env = env

    def act(self, observation=None):
        """Selects a random action from the environment's `action_space`.

        Args:
            observation (object, optional): Perceived observation. Defaults to None.

        Returns:
            object: Action chosen.
        """
        action = self.env.action_space.sample()
        return action


class RuleBasedController(object):

    def __init__(
        self, env, range_comfort_winter=(
            20.0, 23.5), range_comfort_summer=(
            23.0, 26.0)):
        """Agent whose actions are based on static rules.

        Args:
            env (object): Simulation environment.
            range_comfort_winter (tuple, optional): Comfort temperature range for cool season. Defaults to (20.0, 23.5).
            range_comfort_summer (tuple, optional): Comfort temperature range for hot season. Defaults to (23.0, 26.0).
        """

        year = 2021

        self.env = env
        self.range_comfort_winter = range_comfort_winter
        self.range_comfort_summer = range_comfort_summer

        self.variables_path = self.env.variables_path
        self.variables = parse_variables(self.variables_path)
        self.variables['observation'].extend(['day', 'month', 'hour'])

        self.summer_start_date = datetime(year, 6, 1)
        self.summer_final_date = datetime(year, 9, 30)

    def act(self, observation):
        """Select action based on outdoor air drybulb temperature.

        Args:
            observation (object): Perceived observation.

        Returns:
            object: Action chosen.
        """
        obs_dict = dict(zip(self.variables['observation'], observation))
        out_temp = obs_dict['Site Outdoor Air Drybulb Temperature (Environment)']

        if out_temp < 15:  # t < 15
            action = (19, 21)
        elif out_temp < 20:  # 15 <= t < 20
            action = (20, 22)
        elif out_temp < 26:  # 20 <= t < 26
            action = (21, 23)
        elif out_temp < 30:  # 26 <= t < 30
            action = (26, 30)
        else:  # t >= 30
            action = (24, 26)

        return action

The functionality is very simple; given an environment observation, these instances return an action to interact with the environment. You can develop your own controllers or modify rules of RuleBasedController, for example. An usage of these controllers could be the next:

import gym
import numpy as np

from sinergym.utils.controllers import RuleBasedController

env = gym.make('Eplus-continuous-mixed-v1')

# create rule-controlled agent
agent = RuleBasedController(env)

for i in range(1):
    obs = env.reset()
    rewards = []
    done = False
    current_month = 0
while not done:
    action = agent.act(obs)
    obs, reward, done, info = env.step(action)
    rewards.append(reward)
    if info['month'] != current_month:  # display results every month
        current_month = info['month']
        print('Reward: ', sum(rewards), info)
print(
    'Episode ',
    i,
    'Mean reward: ',
    np.mean(rewards),
    'Cumulative reward: ',
    sum(rewards))

env.close()