tf.data.数据集迭代器返回 Tensor( "IteratorGetNext:1", shape=(None, 16), dtype=int32),但无法获取张量的值



我正在尝试编写一个自定义模型,在该模型中我正在编写自定义train_step函数

我正在从像这样的自定义数据生成器创建一个"tf.data.Dataset">

tds = tf.data.Dataset.from_generator(tdg.__iter__,args=None,output_types = (tf.float32,tf.int32),output_shapes = (tf.TensorShape([16,64,64,3]),tf.TensorShape([16])))
tds = tds.batch(1)

在自定义DataGenerator中,__iter__方法被定义为

def __iter__(self):
for item in (self[i] for i in range(len(self))):
yield item

但是当我试图检索train_step函数内部的数据时,使用x,y = data,我得到

Tensor("IteratorGetNext:0", shape=(None, 16, 64, 64, 3), dtype=float32)

Tensor("IteratorGetNext:1", shape=(None, 16), dtype=int32)作为输出

如果我运行print(x[0]),那么我得到

Tensor("strided_slice:0", shape=(16,), dtype=int32)

我没有得到具有numpy()属性的张量

这哪里出了问题??

这适用于tf.data

for data_batch, label in tfds:
print(image_batch.numpy().shape)
for data in data_batch:
print(image.numpy().shape)

你可以测试这样的东西-你的结构中可能需要对形状进行一些更正-我不知道你的代码(,或者根据你的任务进行更正:

# https://stackoverflow.com/questions/63660618/tf-data-dataset-iterator-returning-tensoriteratorgetnext1-shape-none-16/72104494#72104494
# my test
import tensorflow as tf
import tensorflow.keras
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
import pandas as pd
class my_DataGenerator(tf.keras.utils.Sequence):
# https://github.com/mahmoudyusof/facial_keypoint_detection/blob/master/1.1%20Getting%20the%20data%20ready%20(the%20right%20way).ipynb
def __init__(self, train_len=64, batch_size=16, shuffle=False):   # , csv_file='data.csv'
#self.indecies = 0
self.is_epoch_0=False
self.train_len= train_len   # output_size
self.batch_size= batch_size
self.shuffle = shuffle
#self.keypts_frame = pd.read_csv(csv_file)
self.on_epoch_end()
def on_epoch_end(self):
#""" This function gets called after each epoch"""        
# all possible indecies in the dataframe
#self.indecies = np.arange(len(self.keypts_frame))
#    self.indecies = np.arange(self.train_len)
#    if self.shuffle:
#        np.random.shuffle(self.indecies)
print('on_epoch_end')
self.is_epoch_0=False
def __len__(self):
""" The generator returns one batch at a time so it makes sence that it would have
a length equal to the number of samples divided by the batch size
giving the total number of batches
"""
#self.data_frame = shuffle(self.data_frame)
return math.ceil(self.train_len/self.batch_size)
# return int(len(self.keypts_frame) / self.batch_size)
def __getitem__(self,idx):    # will  return the tuple (X,y)
""" This is where the magic hapenes, the model will call this function using the
indexing operator 'generator[0]' or whatever.
then and only then will the generator load the batch into memory and the garbage collector
will remove it on the next iteration        
"""
_feature = np.empty((self.batch_size, *self.train_len, 1))
_label = np.empty((self.batch_size, 16, 1))
# get the indecies of the current batch only
indecies = self.indecies[idx*self.batch_size:(idx+1)*self.batch_size]
_feature, _label = self[indecies]    # __call__ STUFF COULD BE HERE
return _feature, _label
#to error avoid: `generator` must be a Python callable.
def __call__(self):
# Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 20.0 batches).
for t in range(0,20,1):
labels= np.random.randint(0,5,16)
labels= np.array(labels)
labels= labels[:, np.newaxis]
labels= np.c_[ labels, np.ones(16) ]
print(labels)
#
yield np.random.sample(size =(16,64,64,3)), 
labels
tdg=  my_DataGenerator(128,32)
NUM_CLASSES= 5      #tf.unique(tdg.classes)
training_set = tf.data.Dataset.from_generator(tdg,
(tf.float32, tf.int32),
(tf.TensorShape([None,64,64,3]),tf.TensorShape([None,2]))
)
print('tdg train_len:',tdg.train_len)
print('tdg batch_size:',tdg.batch_size)
testing_set  = tf.data.Dataset.from_generator(tdg,
(tf.float32, tf.int32),
(tf.TensorShape([None,64,64,3]),tf.TensorShape([None,2]))
)
# model https://stackoverflow.com/a/71914845/15893581
inputs = Input(shape=(64,64,3))
x = Conv2D(32, (4, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(2, activation='softmax')(x)
keras_model =  tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
keras_training_history = keras_model.fit(
training_set,
steps_per_epoch= (128/32),   #training_set.train_len/training_set.batch_size,
epochs=5,
validation_data=testing_set,
validation_steps= (128/32),   #testing_set.train_len/training_set.batch_size,
verbose=1)
# ?! steps_per_epoch= (64/ 16)   #training_set.train_len/training_set.batch_size,
print(keras_training_history.history)
# Reduced WARNING!
# was WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 20.0 batches). You may need to use the repeat() function when building your dataset.

尽管在我看来结果很奇怪(随着损失的增加,精度越来越高——这正常吗?(:

{'loss': [5.840527057647705, 47.09394836425781, 213.2418212890625, 738.29443359375, 2104.241455078125], 'acc': [0.6875, 0.828125, 0.84375, 0.84375, 0.859375], 'val_loss': [19.316448211669922, 122.56965637207031, 452.21417236328125, 1395.7388916015625, 3683.197021484375], 'val_acc': [0.703125, 0.78125, 0.859375, 0.78125, 0.90625]}

这里可以看到简短的例子。。。主要在类dataGenerator中,仅init&目标&len方法很重要。。。其他方法(iteron_ech_end(需要在您自己的最小可重复示例中进行测试。。。可以添加_划时代_结束检查

p.S.

或者,您可以将类重构为简单的gen_function,而不是类-例如-但我怀疑它在训练中是否表现良好(尽管这里的gen函数似乎有效(

我认为您可能没有正确定义生成器。您通过定义__iter__方法来定义可迭代,但这可能不是必需的。我的理解是生成器是迭代器,但迭代器不是生成器。要在类中实现生成器,我认为您可能需要这样的东西:

class GeneratorClass:
def __init__(self, *args)
do stuff
def actual_generator(self):
# actual generator you will pass
do stuff
yield other stuff
import tensorflow as tf
gen_class = GeneratorClass()
ds = tf.data.Dataset.from_generator(gen_class.actual_generator())

相关内容

最新更新