我正在使用autokeras运行代码,以找到最适合我的细胞数据的模型。这是针对运行autokeras 0.3.5的Linux Ubuntu系统16.04,我在输入数据的形状方面遇到了问题。这是我的代码:
#define data directories
train_data_dir = 'malaria100/train'
test_data_dir = 'malaria100/test'
# declare the number of samples in each category
nb_train_samples = 24760
nb_test_samples = 2730
num_classes = 2
img_rows_orig = 100
img_cols_orig = 100
def load_training_data():
labels = os.listdir(train_data_dir)
total = len(labels)
X_train = np.ndarray((nb_train_samples, img_rows_orig,
img_cols_orig, 3), dtype=np.uint8)
Y_train = np.zeros((nb_train_samples,), dtype='uint8')
i = 0
print('-'*30)
print('Creating training images...')
print('-'*30)
j = 0
for label in labels:
image_names_train = os.listdir(os.path.join(train_data_dir,
label))
total = len(image_names_train)
print(label, total)
for image_name in image_names_train:
img = cv2.imread(os.path.join(train_data_dir, label,
image_name), cv2.IMREAD_COLOR)
img = np.array([img])
X_train[i] = img
Y_train[i] = j
if i % 100 == 0:
print('Done: {0}/{1} images'.format(i, total))
i += 1
j += 1
print(i)
print('Loading done.')
print('Transform targets to keras compatible format.')
Y_train = np_utils.to_categorical(Y_train[:nb_train_samples],
num_classes)
np.save('imgs_train.npy', X_train, Y_train) #save as numpy files
return X_train, Y_train
def load_test_data():
labels = os.listdir(test_data_dir)
X_test = np.ndarray((nb_test_samples, img_rows_orig, img_cols_orig, 3), dtype=np.uint8)
Y_test = np.zeros((nb_test_samples,), dtype='uint8')
i = 0
print('-'*30)
print('Creating test images...')
print('-'*30)
j = 0
for label in labels:
image_names_test = os.listdir(os.path.join(test_data_dir,
label))
total = len(image_names_test)
print(label, total)
for image_name in image_names_test:
img = cv2.imread(os.path.join(test_data_dir, label,
image_name), cv2.IMREAD_COLOR)
img = np.array([img])
X_test[i] = img
Y_test[i] = j
if i % 100 == 0:
print('Done: {0}/{1} images'.format(i, total))
i += 1
j += 1
print(i)
print('Loading done.')
print('Transform targets to keras compatible format.');
Y_test = np_utils.to_categorical(Y_test[:nb_test_samples],
num_classes)
np.save('imgs_test.npy', X_test, Y_test) #save as numpy files
return X_test, Y_test
def run():
OUTPUT_PATH = "output"
TRAINING_TIMES = [60 * 60, 60 * 60 * 2, 60 * 60 * 4, 60 * 60 *
8,60 * 60 * 12, 60 * 60 * 24,]
X_train, Y_train = load_training_data()
X_test, Y_test = load_test_data()
X_train = X_train.astype("float") / 255.0
X_test = X_test.astype("float") / 255.0
labelNames = ["abnormal", "normal"]
for seconds in TRAINING_TIMES:
print("[INFO] training model for {} seconds
max...".format(seconds))
clf = ak.ImageClassifier(verbose=True, augment=False)
clf.fit(X_train, Y_train, time_limit=seconds)
clf.final_fit(X_train, Y_train, X_test, Y_test,
retrain=True)
# evaluate the Auto-Keras model
score = clf.evaluate(X_test, Y_test)
predictions = clf.predict(X_test)
report = classification_report(Y_test,
predictions,target_names=labelNames)
p = os.path.sep.join(OUTPUT_PATH, "{}.txt".format(seconds))
f = open(p, "w")
f.write(report)
f.write("nscore: {}".format(score))
f.close()
if __name__ == '__main__':
run()
错误的完整列表如下所示:
File "<ipython-input-4-f6faa80ae27f>", line 27, in <module>
run()
File "<ipython-input-4-f6faa80ae27f>", line 13, in run
clf.fit(X_train, Y_train, time_limit=seconds)
File "/home/rajaramans2/anaconda3/lib/python3.6/site-packages/autokeras/image/image_supervised.py", line 114, in fit
super().fit(x, y, time_limit)
File "/home/rajaramans2/anaconda3/lib/python3.6/site-packages/autokeras/supervised.py", line 115, in fit
random_state=42)
File "/home/rajaramans2/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_split.py", line 2184, in train_test_split
arrays = indexable(*arrays)
File "/home/rajaramans2/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 260, in indexable
check_consistent_length(*result)
File "/home/rajaramans2/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 235, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [24760, 49520]
输入数据的形状如下:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
(24760, 100, 100, 3) (2730, 100, 100, 3) (24760, 2) (2730, 2)
Autokeras imageclassifier 不接受分类标签。对此进行注释并将 Y 的形状保留为 (num_samples,1) 可以解决此问题。