tf.keras.preprocessing.image.ImageDataGenerator不加载Google Col



我正在训练一个模型将图像分为10个不同的标签。要加载数据,我使用ImageDataGenerator。

tensorflow.keras.preprocessing.image import ImageDataGenerator              
train_dir = '/content/drive/MyDrive/Colab Notebooks/EuroSAT/Train/'
train_datagen = ImageDataGenerator(rescale=1./255,
horizontal_flip=True, vertical_flip=True)
train_generator = train_datagen.flow_from_directory(train_dir, batch_size=16, 
class_mode='categorical', target_size=(64, 64), 
subset ='training', shuffle = False)

但是每个类别中几乎有3000张图片,而ImageDataGenerator总共只加载了5443张图片。

找到5827张图片,属于10个类。

我该怎么做才能绕过去呢?

可能是您有不支持的图像格式或损坏的图像文件的情况。这种情况经常发生,比如你通过谷歌或必应下载图片。由于我经常这样做,我开发了下面提供的一个函数,用于检查包含子目录(如果使用ImageDataGenerator(),则为类目录)中的图像的目录,flow_from_directory。它检查文件是否为有效的映像文件,并具有用户定义的适当扩展名列表中指定的扩展名。代码如下所示。它有点长,因为它对输入等做了很多检查。注意,如果它检测到扩展名为jfiif的文件,它会将其重命名为jpg,因为它们是相同的格式。参数convert_ext可以设置为根据指定的扩展名将所有图像转换为新的图像格式,例如'bmp'。如果设置为None,则图像保留其原始格式。

import os
import shutil
import cv2
def check_file_extension (source_dir, good_ext_list, delete=False, convert_ext=None): 
# source_dir is the directory containing the class sub directories that hold the images
# good_ext_list is a list of strings you specify as good extensions for the ImageDataGenerator
# this list should be ['jpg', 'jpeg', 'bmp', 'png', 'tiff']
# delete is a boolean, if set to True image files that have invalid extensions or are not valid
# image files will be deleted.
# the function return a list. If delete=False this is a list of all files that have invalid
# extensions or are not valid image files
# if convert_ext is set to other than None, it should be a string indicating the new image format
# the files will be converted to, for example "jpg"
processed_count=0 # will be total number of files found
good_count=0 # will be total number of valid image files found
bad_file_list=[]  # will be a list of all files processed that had invalid extensions
removed_count=0  # will be the number of files deleted if delete is set to true
class_list=os.listdir(source_dir)
if len(class_list)==0:
print('directory ', source_dir, ' is empty *** Program Terminating')
return None
print('{0:^20s}{1}{2:^17s}{1}{3:^14s}{1}{4:^15s}'.format('Class Directory',' ', 'Files Processed', 'Files Verified', 'Files Removed'))
for klass in class_list:         
class_path=os.path.join(source_dir, klass)
if os.path.isdir(class_path)==False:# check if this is a directory if it is not print a warning
print ('*** Warning *** there are files in ', source_dir, ' it should only contain sub directories' )
else:
class_file_count=0 # will be number of files found in the class directory            
class_good_count=0 # will be the number of good files found in the class directory
class_removed_count =0                                   
f_list=os.listdir(class_path) # get a list of files in the class directory            
for f in f_list:
f_path=os.path.join(class_path,f)
if os.path.isfile(f_path)==False: # check if it is a file if it is a directory print a warning
print ('*** Warning *** there is a directory in ', class_path, ' there should only be files there')
else:
class_file_count +=1 #increment class file counter
index=f.rfind('.')
fname=f[:index]        
fext=f[index+1:].lower()
if fext not in good_ext_list and fext !='jfif':
if delete:
os.remove(f_path)
class_removed_count +=1 # increment removed file counter

else:
bad_file_list.append(f_path) # don't delete but put the path in list of files with bad extensions

else:
if fext =='jfif':   # if ext= jfif change it to jpg
fnew_path=os.path.join(class_path, fname + '.' + 'jpg')
shutil.copy(f_path,fnew_path )
os.remove(f_path)
else:
try:
img=cv2.imread(f_path)
shape=img.shape
if convert_ext !=None:
fnew_path=os.path.join(class_path, fname + '.' + convert_ext)                                    
cv2.imwrite(fnew_path,img)
os.remove (f_path)
class_good_count +=1                                
except:
if delete:
os.remove(f_path)
class_removed_count +=1
else:
bad_file_list.append(f_path)
print('{0:^20s}{1}{2:^17s}{1}{3:^14s}{1}{4:^15s}'.format(klass,' ', str(class_file_count),str(class_good_count), str(class_removed_count)) )
processed_count=processed_count + class_file_count
good_count=good_count + class_good_count
removed_count=removed_count+ class_removed_count 
print('processed ', processed_count, ' files  ', good_count, 'files were verified  ', removed_count, ' files were removed')

return bad_file_list

下面是使用

的例子
source_dir=r'c:temppeoplestorage'
good_ext_list=['jpg', 'jpeg', 'bmp', 'tiff', 'png']
new_ext='bmp'
bad_file_list=check_file_extension (source_dir, good_ext_list, delete=False,convert_ext=new_ext )
print (bad_file_list)

下面的是典型输出

Class Directory     Files Processed  Files Verified  Files Removed 
savory               20               20              0       
unsavory              21               20              0       
processed  41  files   40 files were verified   0  files were removed
['c:\temp\people\storage\unsavory\040.xyz']

最新更新