我正在尝试使用sklearn和使用pandas数据框架进行k-fold交叉验证。这并不像预期的那样工作。看起来数据由于某些原因无法访问,即使它存在。代码可以工作一段时间,但无法完成一个完整的epoch。
错误如下:
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 1s 0us/step
94781440/94765736 [==============================] - 1s 0us/step
458/610 [=====================>........] - ETA: 21s - loss: 0.1640 - accuracy: 0.1621
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-7-28b7c7367434> in <module>()
60 validation_data=valid_gen,
61 validation_steps=len(test_index)//valid_batch_size,
---> 62 verbose=1)
...
UnknownError: Graph execution error:
2 root error(s) found.
(0) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_2]]
(1) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_13498]
下面是我要做的代码:
# using google colab
! pip install --upgrade --no-cache-dir gdown
! gdown 1_DgB2a2Q7eYJpXtKWfl4XPUgTIW1sXw1
! unzip -qq Train.zip
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import csv
import pandas as pd
# create a pandas data frame of images, age, gender and race
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.layers import Dense, Flatten, GlobalAveragePooling2D, Multiply, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow import keras
from datetime import datetime
from tensorflow import keras
from tqdm import tqdm
import pandas as pd
def get_data_generator(df, indices, batch_size=16):
images, labels = [], []
while True:
for i in indices:
# print(i," - ",end="")
r = df.iloc[i]
file_, label = r['file'], r['label']
im_gray = Image.open(file_).convert('L')
im_gray = im_gray.resize((360, 360))
im = np.zeros(shape=(360, 360,3))
im[:,:,0] = im_gray
im[:,:,1] = im_gray
im[:,:,2] = im_gray
im = np.array(im) / 255.0
images.append(im)
new_label = label/100.0
labels.append(new_label)
if len(images) >= batch_size:
yield np.array(images), np.array(labels)
images, labels = [], []
np.random.seed(42)
EPOCHS = 1
MODEL_NAME = 'ResNet50'
IMG_SIZE = '360x360'
all_train_imgs = glob.glob('Train/*')
# print("Length of all training images = ",len(all_train_imgs))
all_training_files_name = []
all_training_perc = []
with open('Train.csv') as f:
contents = f.readlines()
for item in contents:
# make the changes in the folder here
img_name = "Train/"+item.split(',')[0]
perc_infc = float(item.split(',')[1])
num_pat = item.split(',')[2]
# print(img_name," - ",perc_infc," - ",num_pat)
all_training_files_name.append(img_name)
all_training_perc.append(perc_infc)
attributes = {'label':all_training_perc, 'file':all_training_files_name}
df_all = pd.DataFrame(attributes)
df_all = df_all.dropna()
print(df_all.head())
kf = KFold(n_splits=5)
kf.get_n_splits(all_training_files_name)
fold_no = 0
for train_index, test_index in kf.split(all_training_files_name):
fold_no += 1
#########################################################################################
OUTPUT = 1
frozen = ResNet50 (weights="imagenet", input_shape=(360,360,3), include_top=False)
trainable = frozen.output
trainable = Dropout(0.5)(GlobalAveragePooling2D()(trainable))
trainable = Dense(2048, activation="relu")(trainable)
trainable = Dense(1024, activation="relu")(trainable)
trainable = Dense(OUTPUT, activation="sigmoid")(trainable)
model = Model(inputs=frozen.input, outputs=trainable)
opt = Adam(learning_rate=1e-5)
model.compile(optimizer=opt, loss=tf.keras.losses.MeanAbsoluteError(),#loss='binary_crossentropy',
#experimental_run_tf_function=False,
metrics = ['accuracy']
)
#########################################################################################
batch_size = 4
valid_batch_size = 4
df_train = df_all.loc[train_index.astype(int)]
df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_train, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_val, test_index, batch_size=valid_batch_size)
callbacks = [
ModelCheckpoint("./model_checkpoint", monitor='val_loss'),
#ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4)
]
# for storing logs into tensorboard
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
history = model.fit(train_gen,
steps_per_epoch=len(train_index)//batch_size,
epochs=EPOCHS,
callbacks=[tensorboard_callback,callbacks],
validation_data=valid_gen,
validation_steps=len(test_index)//valid_batch_size,
verbose=1)
下面是在google colab中复制的代码:https://colab.research.google.com/drive/11C-GP6xCB3CCwvz6gj8gy6mTOJIc3Zld?usp=sharing
我弄清楚了,数据框架创建时出现了一些错误。对于这个问题,只需使用完整的数据框架,就可以进行以下更改。
#df_train = df_all.loc[train_index.astype(int)]
#df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_all, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_all, test_index, batch_size=valid_batch_size)