将视频表示为2D阵列，其中每列表示一个帧-OpenCV和Python

我的目标是将视频转换为2D矩阵X，其中列向量表示帧。因此矩阵的维数为：X.shape---->(#帧的特征，#帧的总数(

我需要这个表单，因为我想在X上应用不同的ML算法。要获得X，我按如下步骤进行：

使用OpenCV库在python中上传视频并保存所有帧。
循环｛

a) Frame (=3D array with dimensions height, width, depth=3 rbg) is converted into a 1D vector x 
b) Append vector x to Matrix X
}

对于步骤2b(，我使用

video_matrix = np.column_stack((video_matrix, frame_vector))

对于640x320帧，此操作大约需要0.5秒。对于一个3分钟(8000帧(的小视频，计算X几乎需要150分钟。有没有办法让它更快？

第一部分代码：

video = cv2.VideoCapture('path/video.mp4') 
if not os.path.exists('data'): 
os.makedirs('data') 
counter = 0 
while(True):   
# reading from frame 
ret,frame = video.read() 

if ret: 
# if video is still left continue creating images 
name = './data/frame' + str(counter) + '.jpg'
#print ('Creating...' + name) 

# writing the extracted images 
cv2.imwrite(name, frame) 

# increasing counter so that it will 
# show how many frames are created 
counter += 1
else: 
break

# Release all space and windows once done 
video.release() 
cv2.destroyAllWindows()

第二部分是减缓

video_matrix = np.zeros(width * height * 3) # initialize 1D array which will become the 2D array; first column will be deleted at the end
for i in range(counter): # loops over the total amount of frames

current_frame = np.asarray(Image.open('./data/frame'+str(i)+'.jpg')) # 3D-array = current frame
frame_vector = image_to_vector(current_frame) #convert frame into a 1D array
video_matrix = np.column_stack((video_matrix, frame_vector)) # append frame x to a matrix X that will represent the video

video_matrix = np.delete(video_matrix, 0, 1) # delete the initialized zero column

不要重复将单个帧附加到累积的数据中。这将花费你O(n^2(，也就是说，程序的运行速度会越来越慢。numpy无法在适当的位置放大数组。它每次都必须创建一个副本。复制工作量随着每增加一帧而增加。

将每个帧附加到python列表中。看完视频后，将整个列表转换为numpy数组一次。

以下是生成"视频数据"的Python(Keras(代码&通过基于DL的分类模型之前的预处理：

import numpy as np
# preparing dataset
X_train = []
Y_train = []
labels = enumerate([‘left’, ‘right’, ‘up’, ‘down’]) #4 classes
num_vids = 30
num_imgs = 30
img_size = 20
min_object_size = 1
max_object_size = 5
# video frames with left moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x>0:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x-1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(0)
# video frames with right moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x+1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(1)
# video frames with up moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while y>0:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
y = y-1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(2)
# video frames with down moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while y<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
y = y+1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(3)
# data pre-processing
from keras.utils import np_utils
X_train=np.array(X_train, dtype=np.float32) /255
X_train=X_train.reshape(X_train.shape[0], num_imgs, img_size, img_size, 1)
print(X_train.shape)
Y_train=np.array(Y_train, dtype=np.uint8)
Y_train = Y_train.reshape(X_train.shape[0], 1)
print(Y_train.shape)
Y_train = np_utils.to_categorical(Y_train, 4)

它应该对你有帮助。

相关内容

最新更新

热门标签：