将ImageDataGenerator与回归输出一起使用



我想使用TensorFlow的ImageDataGenerator.flow_from_directory()加载我的数据集,但我的输出不是分类,而是回归。所以我使用了class_mode=None,所以没有为我的数据分配标签,但现在我必须为我的训练示例添加标签,我不知道如何添加(我将标签作为列表(。有办法绕过这个吗?

示例代码:

labels = [0.75, 21.60, 10.12] # example labels
# load dataset from directory
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
train_data = image_generator.flow_from_directory(batch_size=batch_size, directory=train_x_dir, target_size=(224, 224), class_mode=None, shuffle=False)
# assign labels to training examples
# ???

由于我没有得到直接的答案,我认为这在TF 2.3中无法完成。

因此,我引用了AerysS提到的一个线程,特别是来自用户timehaven的回答,并使用他的代码使用Keras的load_imgimg_to_array从Panda数据帧生成批处理。代码是为Python 2.7编写的,所以我对它进行了一些更改,它适用于Python 3.6.8。

data_generator.py

from __future__ import print_function
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import numpy as np
import pandas as pd
import bcolz
import threading
import os
import sys
import glob
import shutil

bcolz_lock = threading.Lock()
# old_blosc_nthreads = bcolz.blosc_set_nthreads(1)
# assert bcolz.blosc_set_nthreads(1) == 1
def safe_bcolz_open(fname, idx=None, debug=False):
with bcolz_lock:
if idx is None:
X2 = bcolz.open(fname)
else:
X2 = bcolz.open(fname)[idx]
if debug:
df_debug = pd.DataFrame(X2, index=idx)
assert X2.shape[0] == len(idx)
assert X2.shape == df_debug.shape
df_debug = df_debug.astype(int)
test_idx = (df_debug.subtract(df_debug.index.values, axis=0) == 0).all(axis=1)
assert test_idx.all(), df_debug[~test_idx]
return X2

class threadsafe_iter:
def __init__(self, it):
self.it = it
self.lock = threading.Lock()
assert self.lock is not bcolz_lock
def __iter__(self):
return self
def next(self):
with self.lock:
return self.it.next()
def __next__(self):
with self.lock:
return next(self.it)

def threadsafe_generator(f):
def g(*a, **kw):
return threadsafe_iter(f(*a, **kw))
return g

@threadsafe_generator
def generator_from_df(df, batch_size, target_size, features=None,
debug_merged=False):
if features is not None:
assert os.path.exists(features)
assert safe_bcolz_open(features).shape[0] == df.shape[0], "Features rows must match df!"
nbatches, n_skipped_per_epoch = divmod(df.shape[0], batch_size)
count = 1
epoch = 0
# New epoch.
while 1:
df = df.sample(frac=1)  # frac=1 is same as shuffling df.
epoch += 1
i, j = 0, batch_size
# Mini-batches within epoch.
mini_batches_completed = 0
for _ in range(nbatches):
sub = df.iloc[i:j]
try:
X = np.array([(2 * (img_to_array(load_img(f, target_size=target_size)) / 255.0 - 0.5)) for f in sub.imgpath])
Y = sub.target.values
if features is None:
mini_batches_completed += 1
yield X, Y
else:
X2 = safe_bcolz_open(features, sub.index.values, debug=debug_merged)
mini_batches_completed += 1
yield [X, X2], Y
except IOError as err:
count -= 1
i = j
j += batch_size
count += 1

培训

from data_generator import generator_from_df
def construct_dataframe(img_path, labels_path):
data = {}
data['imgpath'] = glob(os.path.join(img_path, '*.png'))
data['target'] = load_labels(labels_path)
return pd.DataFrame(data)
train_df = construct_dataframe(train_x_dir, train_y_dir)
train_generator = generator_from_df(train_df, batch_size, (img_size, img_size))
# load and compile model
# ...
model.fit(train_generator, ...)

最新更新