env在env.observation_space中返回dictionary



我有以下代码。我尝试了多种方法来解析数据,但都失败了。

env = MtEnv()
inputlist = []
balance = Input(shape=(1,), name="balance")
inputlist.append(balance)
equity = Input(shape=(1,), name="equity")
inputlist.append(equity)
margin = Input(shape=(1,), name="margin")
inputlist.append(margin)
features = Input(shape=(1,), name="features")
inputlist.append(features)
orders = Input(shape=(1,), name="orders")
inputlist.append(orders)
inputlist.sort(key=lambda inp: inp._keras_history.layer.name)
balance_embedding = Dense(units=40,activation='relu')(balance)
equity_embedding = Dense(units=40,activation='relu')(equity)
margin_embedding = Dense(units=40,activation='relu')(margin)
features_embedding = Dense(units=40,activation='relu')(features)
orders_embedding = Dense(units=40,activation='relu')(orders)
combined = tensorflow.concat([balance_embedding, equity_embedding, margin_embedding, features_embedding, orders_embedding],1)
x= Dense(512, activation='relu')(combined)
x= Dense(256, activation='relu')(x)
x= Dense(128, activation='relu')(x)
output = Dense(env.action_space.shape[0], activation="linear")(x)
data = env.observation_space
model = Model(inputs=inputlist, outputs=output)
#print(data.shape)
model.summary()
from keras.engine.base_layer import policy
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
def build_agent(model,actions):
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1., value_min=0.1, value_test=0.2, nb_steps=10000)
memory= SequentialMemory(limit=2000, window_length=3)
dqn = DQNAgent(model=model,memory=memory, policy=policy, enable_dueling_network=True,dueling_type='avg', nb_actions = actions, nb_steps_warmup=1000 )
return dqn
dqn = build_agent(model, env.action_space.shape[0])
from keras.optimizers import Adam
dqn.compile(Adam(lr=0.0001))
dqn.fit(env,nb_steps=40000,visualize=False, verbose=1)

我得到的错误是ValueError:检查模型输入时出错:传递给模型的Numpy数组列表不是模型预期的大小。对于输入['balance'、'equity'、'margin'、'features'、'orders'],应看到5个数组,但却得到了以下1个数组的列表:[array([[{'balance':array([0](,'equity':array,[0.,0.,0.,0..,0,[0….

我知道环境返回一个Dict((,其中包含输入。我一直在尝试以某种方式将字典数据加载到1层(如果需要的话,可以加载5层(。

使用来自baseline3的A2C或PPO模型可以毫无问题地运行该环境。

您可以做一些更简单的事情:

import numpy as np
from tensorflow.keras import Input, Model
from keras.layers import Dense
import tensorflow as tf
balance = Input(shape=(1,), name="balance")
equity = Input(shape=(1,), name="equity")
margin = Input(shape=(1,), name="margin")
features = Input(shape=(1,), name="features")
orders = Input(shape=(1,), name="orders")
balance_embedding = Dense(units=40,activation='relu')(balance)
equity_embedding = Dense(units=40,activation='relu')(equity)
margin_embedding = Dense(units=40,activation='relu')(margin)
features_embedding = Dense(units=40,activation='relu')(features)
orders_embedding = Dense(units=40,activation='relu')(orders)
combined = tf.concat([balance_embedding, equity_embedding, margin_embedding, features_embedding, orders_embedding],1)
x= Dense(512, activation='relu')(combined)
x= Dense(256, activation='relu')(x)
x= Dense(128, activation='relu')(x)
output = Dense(1, activation="linear")(x)
model = Model(inputs=[balance, equity, margin, features, orders], outputs=output)
model.summary()

注意:我已经更改了上一个密集层中的节点数,因为我不知道env.action_space.shape[0]的值。

总结:

Model: "model"
__________________________________________________________________________________________________
Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
balance (InputLayer)           [(None, 1)]          0           []                               
                            
equity (InputLayer)            [(None, 1)]          0           []                               
                            
margin (InputLayer)            [(None, 1)]          0           []                               
                            
features (InputLayer)          [(None, 1)]          0           []                               
                            
orders (InputLayer)            [(None, 1)]          0           []                               
                            
dense_9 (Dense)                (None, 40)           80          ['balance[0][0]']                
                            
dense_10 (Dense)               (None, 40)           80          ['equity[0][0]']                 
                            
dense_11 (Dense)               (None, 40)           80          ['margin[0][0]']                 
                            
dense_12 (Dense)               (None, 40)           80          ['features[0][0]']               
                            
dense_13 (Dense)               (None, 40)           80          ['orders[0][0]']                 

                      
tf.concat_1 (TFOpLambda)       (None, 200)          0           ['dense_9[0][0]',                
'dense_10[0][0]',               
'dense_11[0][0]',               
'dense_12[0][0]',               
'dense_13[0][0]']               
                            
dense_14 (Dense)               (None, 512)          102912      ['tf.concat_1[0][0]']            
                            
dense_15 (Dense)               (None, 256)          131328      ['dense_14[0][0]']               
                            
dense_16 (Dense)               (None, 128)          32896       ['dense_15[0][0]']               
                            
dense_17 (Dense)               (None, 1)            129         ['dense_16[0][0]']               
                            
==================================================================================================
Total params: 267,665
Trainable params: 267,665
Non-trainable params: 0

最新更新