Filter在我的简单Python代码中挑选出预测



我有这个简单的Python代码,可以预测面部的情绪(从这里取来,以防需要运行它),并将其显示在相机上面部周围的矩形上。但问题是它有很多噪音。比如Fearful -- Sad -- -- Sad之类的。我想让预测变得平滑,过滤掉个别的预测。如果n的预测数在一行中显示为Sad,那么如何将其显示为Sad?

您只需要更改最后几行,因为开头的部分都是用于预测的。

import numpy as np
import argparse
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# command line argument
ap = argparse.ArgumentParser()
ap.add_argument("--mode",help="train/display")
mode = ap.parse_args().mode
# plots accuracy and loss curves
def plot_model_history(model_history):
"""
Plot Accuracy and Loss curves given the model_history
"""
fig, axs = plt.subplots(1,2,figsize=(15,5))
# summarize history for accuracy
axs[0].plot(range(1,len(model_history.history['accuracy'])+1),model_history.history['accuracy'])
axs[0].plot(range(1,len(model_history.history['val_accuracy'])+1),model_history.history['val_accuracy'])
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_xticks(np.arange(1,len(model_history.history['accuracy'])+1),len(model_history.history['accuracy'])/10)
axs[0].legend(['train', 'val'], loc='best')
# summarize history for loss
axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
axs[1].legend(['train', 'val'], loc='best')
fig.savefig('plot.png')
plt.show()
# Define data generators
train_dir = 'data/train'
val_dir = 'data/test'
num_train = 28709
num_val = 7178
batch_size = 64
num_epoch = 50
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(48,48),
batch_size=batch_size,
color_mode="grayscale",
class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
val_dir,
target_size=(48,48),
batch_size=batch_size,
color_mode="grayscale",
class_mode='categorical')
# Create the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))
# If you want to train the same model or try other models, go for this
if mode == "train":
model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.0001, decay=1e-6),metrics=['accuracy'])
model_info = model.fit_generator(
train_generator,
steps_per_epoch=num_train // batch_size,
epochs=num_epoch,
validation_data=validation_generator,
validation_steps=num_val // batch_size)
plot_model_history(model_info)
model.save_weights('model.h5')
# emotions will be displayed on your face from the webcam feed
elif mode == "display":
model.load_weights('model.h5')
# prevents openCL usage and unnecessary logging messages
cv2.ocl.setUseOpenCL(False)
# dictionary which assigns each label an emotion (alphabetical order)
emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"}
# start the webcam feed
cap = cv2.VideoCapture(1)
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
text = "Sad"
if ("Happy" in text) or ("Sad" in text):
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imshow('Video', cv2.resize(frame,(1600,960),interpolation = cv2.INTER_CUBIC))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

我会列一个预测列表并采用模式,如下所示:

prediction_history = []
LOOKBACK = 5 # how far you want to look back
# in loop:
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]

特别是在你的代码中:

import os
...
prediction_history = []
LOOKBACK = 5 # how far you want to look back
...
cap = cv2.VideoCapture(1)
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)

# updates
prediction_history.append(int(np.argmax(prediction)))
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]

if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
text = "Sad"
if ("Happy" in text) or ("Sad" in text):
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imshow('Video', cv2.resize(frame,(1600,960),interpolation = cv2.INTER_CUBIC))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

我想到的另一个选项是跳过初始的argmax,取所有帧的总数(或平均值)。

prediction_history.append(prediction)代替prediction_history.append(int(np.argmax(prediction)))

将原始输出附加到预测历史记录而不是argmax。这会给你一个像这样的列表:

prediction_history = [
[.1, .1, .1, .5, .4, .3, .1] # argmax = 3
[.1, .2, .1, .5, .4, .3, .1] # argmax = 3
[.1, .4, .1, .5, .4, .3, .1] # argmax = 3
[.2, .6, .1, .5, .4, .3, .1] # argmax = 2
[.1, .3, .1, .1, .4, .3, .1] # argmax = 2
[.1, .6, .1, .1, .4, .3, .1] # argmax = 2
[.1, .3, .1, .1, .4, .3, .1] # argmax = 4
]
# sum: [.8, 2.5, .7, 2.3, 2.6, 2.1, .7] --> 4
most_common_index = int(np.argmax(np.sum(prediction_history[-LOOKBACK:], 0)))
# mode (other answer) of argmax: [3,3,3,2,2,2,4] --> 3

this is also a nice framework for doing simple moving averages or other things -- I think you may end up with a more continuous state ie not [1,2,1,2,1,2,1...]

考虑到你希望之前的结果影响结果,我想说,如果你有机会重新构建这个东西,我会考虑一个RNN或类似的东西,让ML算法处理选择滚动类别

最新更新