Controllers
Sinergym has a section to implement your own controllers. Currently, we have developed a random agent and a rule-based agent. You can find this code in sinergym/sinergym/utils/controllers.py. it is very useful in order to perform benchmarks as a reference point to study DRL algorithms:
"""Implementation of basic controllers."""
import os
import pkg_resources
from datetime import datetime
from ..utils.common import parse_variables
class RandomController(object):
def __init__(self, env):
"""Random agent. It selects available actions randomly.
Args:
env (object): Simulation environment.
"""
self.env = env
def act(self, observation=None):
"""Selects a random action from the environment's `action_space`.
Args:
observation (object, optional): Perceived observation. Defaults to None.
Returns:
object: Action chosen.
"""
action = self.env.action_space.sample()
return action
class RuleBasedController(object):
def __init__(
self, env, range_comfort_winter=(
20.0, 23.5), range_comfort_summer=(
23.0, 26.0)):
"""Agent whose actions are based on static rules.
Args:
env (object): Simulation environment.
range_comfort_winter (tuple, optional): Comfort temperature range for cool season. Defaults to (20.0, 23.5).
range_comfort_summer (tuple, optional): Comfort temperature range for hot season. Defaults to (23.0, 26.0).
"""
year = 2021
self.env = env
self.range_comfort_winter = range_comfort_winter
self.range_comfort_summer = range_comfort_summer
self.variables_path = self.env.variables_path
self.variables = parse_variables(self.variables_path)
self.variables['observation'].extend(['day', 'month', 'hour'])
self.summer_start_date = datetime(year, 6, 1)
self.summer_final_date = datetime(year, 9, 30)
def act(self, observation):
"""Select action based on outdoor air drybulb temperature.
Args:
observation (object): Perceived observation.
Returns:
object: Action chosen.
"""
obs_dict = dict(zip(self.variables['observation'], observation))
out_temp = obs_dict['Site Outdoor Air Drybulb Temperature (Environment)']
if out_temp < 15: # t < 15
action = (19, 21)
elif out_temp < 20: # 15 <= t < 20
action = (20, 22)
elif out_temp < 26: # 20 <= t < 26
action = (21, 23)
elif out_temp < 30: # 26 <= t < 30
action = (26, 30)
else: # t >= 30
action = (24, 26)
return action
The functionality is very simple; given an environment observation, these instances return an action to interact with the environment. You can develop your own
controllers or modify rules of RuleBasedController
, for example. An usage of these controllers could be the next:
import gym
import numpy as np
from sinergym.utils.controllers import RuleBasedController
env = gym.make('Eplus-continuous-mixed-v1')
# create rule-controlled agent
agent = RuleBasedController(env)
for i in range(1):
obs = env.reset()
rewards = []
done = False
current_month = 0
while not done:
action = agent.act(obs)
obs, reward, done, info = env.step(action)
rewards.append(reward)
if info['month'] != current_month: # display results every month
current_month = info['month']
print('Reward: ', sum(rewards), info)
print(
'Episode ',
i,
'Mean reward: ',
np.mean(rewards),
'Cumulative reward: ',
sum(rewards))
env.close()