utils的问题.自定义环境中的validate_py_environment

我想请您帮助理解我不能运行utils的原因。Validate_py_environment在我的自定义环境中成功。如果我手动运行一系列操作，环境就会正常工作。

这是我的环境(这是一个包含家庭消费，光伏发电机和电池组的微电网):

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import tensorflow as tf
import numpy as np
from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts
tf.compat.v1.enable_v2_behavior()
class MicroGridEnv(py_environment.PyEnvironment):
"""
This class simulates the net amount of energy of the microgrid viewed from
the battery. That is the difference between generation and consumption of
energy
1. Actions: We have 2 actions. Action 0: charge the battery, and action 1: discharge the battery
2. Observations: [Net Load, Energy available in battery, prognostic of Net Load (next 24 h),
n° of cycles]
3. Reward: - 1 per each cycle reached, - 2 if there's a lack of supply for the customer
"""
def __init__(self):
self._action_spec = array_spec.BoundedArraySpec(
shape=(),
dtype=np.int32,
minimum=0, maximum= 1,
name='action'
)
self._observation_spec =  array_spec.BoundedArraySpec(
shape=(4,),
dtype=np.float32,
minimum=[-1.0, 0.0, -1.0, 0],
maximum=[1.0, 1.0, 1.0, 1e35],
name='observation'
)
self._state = [0.0, 0.0, 0.0, 0.0]
self._episode_ended = False
def action_spec(self):
return self._action_spec
def observation_spec(self):
return self._observation_spec
def _reset(self):
self._state = [0.0, 0.0, 0.0, 0.0]
self._episode_ended = False
return ts.restart(np.array([self._state], dtype=np.float32))
def _step(self, action):
if self._episode_ended:
# The last action ended the episode. Ignore the current action and start
# a new episode.
return self.reset()
# First, we see if the episode has ended
if self._state[0] < 0:
self._episode_ended = True

# Make sure battery actions are witten here
if action == 1:
self._state += np.array([0.1, -0.1, 0.0, 0.0], dtype=np.float32)
elif action == 0:
self._state += np.array([-0.1, 0.1, 0.0, 0.0], dtype=np.float32)
else:
raise ValueError('action should be 0 or 1')
print(self._state)
# After battery acts, we have to update system dynamics:
# - Shutdown condition (episode ends if there's a lack of supply)
if self._state[0] < 0:
self._episode_ended = True
if self._episode_ended:
reward = np.array([self._state[0] - self._state[3]], dtype=np.float32)
return ts.termination(
self._state,
reward
)
else:
return ts.transition(
self._state,
reward=0
)

下面是主代码，执行两种方法来测试环境:

environment = MicroGridEnv()
print(f"environment MicrogridEnv created")
print(f"action_spec: {environment.action_spec()}")
print(f"time_step_spec.observation: {environment.time_step_spec().observation}")
print(f"time_step_spec.step_type: {environment.time_step_spec().step_type}")
print(f"time_step_spec.discount: {environment.time_step_spec().discount}")
print(f"time_step_spec.reward: {environment.time_step_spec().reward}")
print('Simulating 5 actions')
action = np.array(1, dtype=np.int32)
time_step = environment.reset()
print(time_step)
time_step = environment.step(action)
print(time_step)
time_step = environment.step(action)
print(time_step)
action = np.array(0, dtype=np.int32)
time_step = environment.step(action)
print(time_step)
time_step = environment.step(action)
print(time_step)
time_step = environment.step(action)
print(time_step)
utils.validate_py_environment(environment, episodes=5)

输出如下:

environment MicrogridEnv created
action_spec: BoundedArraySpec(shape=(), dtype=dtype('int32'), name='action', minimum=0, maximum=1)
time_step_spec.observation: BoundedArraySpec(shape=(4,), dtype=dtype('float32'), name='observation', minimum=[-1.  0. -1.  0.], maximum=[1.e+00 1.e+00 1.e+00 1.e+35])
time_step_spec.step_type: ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')
time_step_spec.discount: BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0)
time_step_spec.reward: ArraySpec(shape=(), dtype=dtype('float32'), name='reward')
Simulating 5 actions
TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([[0., 0., 0., 0.]], dtype=float32),
'reward': array(0., dtype=float32),
'step_type': array(0)})
[ 0.1 -0.1  0.   0. ]
TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([ 0.1, -0.1,  0. ,  0. ]),
'reward': array(0., dtype=float32),
'step_type': array(1)})
[ 0.2 -0.2  0.   0. ]
TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([ 0.2, -0.2,  0. ,  0. ]),
'reward': array(0., dtype=float32),
'step_type': array(1)})
[ 0.1 -0.1  0.   0. ]
TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([ 0.1, -0.1,  0. ,  0. ]),
'reward': array(0., dtype=float32),
'step_type': array(1)})
[0. 0. 0. 0.]
TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([0., 0., 0., 0.]),
'reward': array(0., dtype=float32),
'step_type': array(1)})
[-0.1  0.1  0.   0. ]
TimeStep(
{'discount': array([0.], dtype=float32),
'observation': array([-0.1,  0.1,  0. ,  0. ]),
'reward': array([-0.1], dtype=float32),
'step_type': array([2])})
Traceback (most recent call last):
File "c:/Users/evilc/PycharmProjects/MG_Research/MG_environment.py", line 126, in <module>
utils.validate_py_environment(environment, episodes=5)
File "C:Usersevilcanaconda3envsresearch_2021libsite-packagestf_agentsenvironmentsutils.py", line 72, in validate_py_environment
raise ValueError(
ValueError: Given `time_step`: TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([[0., 0., 0., 0.]], dtype=float32),
'reward': array(0., dtype=float32),
'step_type': array(0)}) does not match expected `time_step_spec`: TimeStep(
{'discount': BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0),
'observation': BoundedArraySpec(shape=(4,), dtype=dtype('float32'), name='observation', minimum=[-1.  0. -1.  0.], maximum=[1.e+00 1.e+00 1.e+00 1.e+35]),       
'reward': ArraySpec(shape=(), dtype=dtype('float32'), name='reward'),
'step_type': ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')})

提前感谢你能给我的任何帮助。

致以最亲切的问候!

ValueError: Given `time_step`: TimeStep(
{'discount': array(1., dtype=float32),
'observation': **array([[0., 0., 0., 0.]], dtype=float32)**,
'reward': array(0., dtype=float32),
'step_type': array(0)}) does not match expected `time_step_spec`: TimeStep(
{'discount': BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0),
'observation': BoundedArraySpec(shape=(4,), dtype=dtype('float32'), name='observation', minimum=[-1.  0. -1.  0.], maximum=[1.e+00 1.e+00 1.e+00 1.e+35]),       
'reward': ArraySpec(shape=(), dtype=dtype('float32'), name='reward'),
'step_type': ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')})

看一看:

ValueError: Giventime_step: TimeStep

array([[0., 0., 0., 0.]], dtype=float32)

是预期的观测值，您在不指定dtype=float32

的情况下返回观测值

TimeStep(
{'discount': array(1., dtype=float32),
'observation': array([ 0.1, -0.1,  0. ,  0. ]),
'reward': array(0., dtype=float32),
'step_type': array(1)})

相关内容

最新更新

热门标签：