我一直在尝试制作一个用于手语分类的随机森林模型。数据集是平衡的,准确率为98%,但它总是预测同一类。
这就是我获取和处理数据的方式:
train_dir = "../input/asl-alphabet/asl_alphabet_train/asl_alphabet_train/"
test_dir = "../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/"
train_len = 87000
image_size=50
def get_data(folder):
X = np.empty((train_len, image_size, image_size, 3), dtype = np.float32)
y = np.empty((train_len), dtype = int)
cnt = 0
for folderName in os.listdir(folder):
if not folderName.startswith('.'):
if folderName in ['A']:
label = 0
elif folderName in ['B']:
label = 1
elif folderName in ['C']:
label = 2
elif folderName in ['D']:
label = 3
elif folderName in ['E']:
label = 4
elif folderName in ['F']:
label = 5
elif folderName in ['G']:
label = 6
elif folderName in ['H']:
label = 7
elif folderName in ['I']:
label = 8
elif folderName in ['J']:
label = 9
elif folderName in ['K']:
label = 10
elif folderName in ['L']:
label = 11
elif folderName in ['M']:
label = 12
elif folderName in ['N']:
label = 13
elif folderName in ['O']:
label = 14
elif folderName in ['P']:
label = 15
elif folderName in ['Q']:
label = 16
elif folderName in ['R']:
label = 17
elif folderName in ['S']:
label = 18
elif folderName in ['T']:
label = 19
elif folderName in ['U']:
label = 20
elif folderName in ['V']:
label = 21
elif folderName in ['W']:
label = 22
elif folderName in ['X']:
label = 23
elif folderName in ['Y']:
label = 24
elif folderName in ['Z']:
label = 25
elif folderName in ['del']:
label = 26
elif folderName in ['nothing']:
label = 27
elif folderName in ['space']:
label = 28
else:
label = 29
for image_filename in tqdm(os.listdir(folder + folderName)):
img_file = cv2.imread(folder + folderName + '/' + image_filename)
if img_file is not None:
img_file = skimage.transform.resize(img_file, (image_size, image_size, 3))
img_arr = np.asarray(img_file).reshape((-1, image_size, image_size, 3))
X[cnt] = img_arr
y[cnt] = label
cnt += 1
return X,y
letters, labels= get_data(train_dir)
print("The shape of letters is : ", letters.shape)
#preprocessing
import tensorflow as tf
X_train, X_test, y_train, y_test = train_test_split(letters, labels, test_size=0.3, random_state=42, stratify=labels)
X_valid, X_train = X_train[:20000] / 255., X_train[20000:] / 255.
y_valid, y_train = y_train[:20000], y_train[20000:]
X_test = X_test / 255.
#reshape to 2d array
nsamples, nx, ny, nrgb = X_train.shape
x_train2 = X_train.reshape((nsamples,nx*ny*nrgb))
#so,eventually,model.predict() should also be a 2d input
nsamples, nx, ny, nrgb = X_test.shape
x_test2 = X_test.reshape((nsamples,nx*ny*nrgb))
这就是我制作模型的方式:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=20,
n_estimators=100, oob_score=True)
model.fit(x_train2,y_train)
y_pred=model.predict(x_test2)
y_pred
#y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
accuracy_score(y_pred,y_test)
print(classification_report(y_pred,y_test))
为了测试我使用的预测:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
img_arr=cv2.resize(img_arr,(64,64))
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])
我尝试过实现网格搜索,但没能实现。(我把它写在Kaggle上,它告诉"你的笔记本试图分配比可用内存更多的内存"(
我对此还很陌生,所以我对代码中的任何内容都不完全确定。
在训练和测试之间缺少图像的预处理。您需要在测试集上应用相同的步骤,以便您的模型获得相同类型的输入。尝试以下操作:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
# Preprocess test image
img_arr = skimage.transform.resize(img_arr, (50, 50, 3))
img_arr = img_arr/255.
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])