我正在进行一个实验,我有3个固定的相机同步记录一个人在有限的空间里进化(这是完全由每个相机的场覆盖)。我使用预训练的AI模型在每帧检测20多个姿势地标,代表每个视频中的数百个2D点。我的目标是使用OpenCV (Python)将这些地标表示为3D点云。我知道这在使用cv2.triangulatePoints()的2个摄像机的情况下是可行的。我使用这种方法为每个可能的相机对(相机1 -相机2,相机2 -相机3,…)获得一个3D点集。但这些集合似乎不容易合并在一起,因为它们是在不同的坐标系中表示的。我怎样才能"三角化"呢?使用来自3个摄像头的数据的3D点,而不是任意一对?
我尝试使用通过cv2.recoverPose()获得的外部矩阵来合并3D集合,才意识到这是不可行。然后我运行了cv2.solvePnP(),得到了旋转和平移矩阵,但不知道究竟该怎么做。
下面是我使用的代码类型:def getProjectionMat(pts_l, pts_r, K_l, K_r):
pts_l_norm = cv2.undistortPoints(np.expand_dims(pts_l, axis=1), cameraMatrix=K_l, distCoeffs=None)
pts_r_norm = cv2.undistortPoints(np.expand_dims(pts_r, axis=1), cameraMatrix=K_r, distCoeffs=None)
E, mask = cv2.findEssentialMat(pts_l_norm, pts_r_norm, focal=1.0, pp=(0., 0.), method=cv2.RANSAC, prob=0.999, threshold=3.0)
points, R, t, mask = cv2.recoverPose(E, pts_l_norm, pts_r_norm)
M_r = np.hstack((R, t))
M_l = np.hstack((np.eye(3, 3), np.zeros((3, 1))))
P_l = np.dot(K_l, M_l)
P_r = np.dot(K_r, M_r)
return P_l, P_r, R, t
def get3DPoints(points1, points2, P1, P2):
point_4d_hom = cv2.triangulatePoints(P1, P2, np.expand_dims(points1, axis=1), np.expand_dims(points2, axis=1))
point_4d = point_4d_hom / np.tile(point_4d_hom[-1, :], (4, 1))
return point_4d[:3, :].T
#ptpL1 are 2D points of camera 1, ptpL2 are corresponding points in camera 2, etc.
#K1, K2, etc. are intrinsic matrices
P1_1, P2_1, xR1, xt1=getProjectionMat(ptpL1, ptpL2, K1, K2) #projection between cams 1 and 2
P1_2, P2_2, xR2, xt2=getProjectionMat(ptpl2, ptpl3, K2, K3) #... between cams 2 and 3
P1_3, P2_3, xR3, xt3=getProjectionMat(ptpl1, ptpL3, K1, K3) #... between cams 1 and 3
#get2DPointForCam(x, markOfInterest) returns the 2D coordinates of the given landmark for cam x
for markOfInterest in marksOfInterest:
point2D1=get2DPointForCam(1, markOfInterest)
point2D2=get2DPointForCam(2, markOfInterest)
point2D3=get2DPointForCam(3, markOfInterest)
point3D1=get3DPoints(point2D1, point2D2, P1_1, P2_2)[0]
point3D2=get3DPoints(point2D2, point2D3, P1_2, P2_2)[0]
point3D3=get3DPoints(point2D1, point2D3, P1_3, P2_3)[0]
#problem : point3D1 and point3D3 are close, but point3D2 has a completely different value. Still, when projected onto camera2, it is well positionned.
更新:根据@Micka的建议,我(重新)尝试使用solvePNP来计算每个相机相对于一些姿势标记的位置。我希望这能让我把每对相机的三角测量转换成一个共同的坐标系统。但是,这将再次失败(参见下面注释有问题结果的代码):
def getProjectionMat(pts_l, pts_r, K_l, K_r):
pts_l_norm = cv2.undistortPoints(np.expand_dims(pts_l, axis=1), cameraMatrix=K_l, distCoeffs=None)
pts_r_norm = cv2.undistortPoints(np.expand_dims(pts_r, axis=1), cameraMatrix=K_r, distCoeffs=None)
E, mask = cv2.findEssentialMat(pts_l_norm, pts_r_norm, focal=1.0, pp=(0., 0.), method=cv2.RANSAC, prob=0.999, threshold=3.0)
points, R, t, mask = cv2.recoverPose(E, pts_l_norm, pts_r_norm)
M_r = np.hstack((R, t))
M_l = np.hstack((np.eye(3, 3), np.zeros((3, 1))))
P_l = np.dot(K_l, M_l)
P_r = np.dot(K_r, M_r)
return P_l, P_r, R, t
def triangulate3DPoints(points1, points2, P1, P2):
point_4d_hom = cv2.triangulatePoints(P1, P2, np.expand_dims(points1, axis=1), np.expand_dims(points2, axis=1))
point_4d = point_4d_hom / np.tile(point_4d_hom[-1, :], (4, 1))
return point_4d[:3, :].T
def getPoseMatrixFromPNP(points3D, points2D, K):
_, vector_rotation, vector_translation = cv2.solvePnP(np.array(points3D), np.array(points2D), K, np.zeros((4,1)), flags=0)
matrix_rotation, _ = cv2.Rodrigues(vector_rotation)
return np.linalg.inv(np.vstack((np.hstack((matrix_rotation, vector_translation)), np.array([[0,0,0,1]]))))
def toHomogenous(vec):
return np.array([vec[0], vec[1], vec[2], 1])
def project_3D_point_to_2D(point_3D, P):
point_3D_homogeneous = np.hstack((point_3D, np.ones(1)))
point_2D_homogeneous = np.dot(P, point_3D_homogeneous.T).T
point_2D = point_2D_homogeneous[:2] / point_2D_homogeneous[2]
return point_2D
P1_1, P2_1, xR1, xt1=getProjectionMat(ptpL1, ptpL2, K1, K2)
P1_2, P2_2, xR2, xt2=getProjectionMat(ptpL2, ptpL3, K2, K3)
P1_3, P2_3, xR3, xt3=getProjectionMat(ptpL1, ptpL3, K1, K3)
marksOfInterest=[mp.solutions.pose.PoseLandmark.NOSE] #for example
#c12Dpoints, etc., will contain 2D coordinates of each landmark respectively for cam 1, 2 and 3
c12Dpoints=[]
c22Dpoints=[]
c32Dpoints=[]
#resTriangulated will contain 3D coordinates of each landmark, as estimated using cv2.triangulatePoints() between cams 1 and 2
resTriangulated=[]
for markOfInterest in marksOfInterest:
marks1=getSpecificLandMarks(landMarks1, markOfInterest)
marks2=getSpecificLandMarks(landMarks2, markOfInterest)
marks3=getSpecificLandMarks(landMarks3, markOfInterest)
for idFrame, e1 in enumerate(marks1):
e2 = marks2[idFrame]
e3 = marks3[idFrame]
if e1 is not None and e2 is not None and e3 is not None:
p1=[[e1.x*width, e1.y*height]]
p2=[[e2.x*width, e2.y*height]]
p3=[[e3.x*width, e3.y*height]]
triangulated=triangulate3DPoints(p1, p2, P1_1, P2_1)[0]
resTriangulated.append(triangulated)
c12Dpoints.append(p1)
c22Dpoints.append(p2)
c32Dpoints.append(p3)
pose1=getPoseMatrixFromPNP(resTriangulated, c12Dpoints, K1)
pose2=getPoseMatrixFromPNP(resTriangulated, c22Dpoints, K2)
pose3=getPoseMatrixFromPNP(resTriangulated, c32Dpoints, K3)
posList=[]
for markOfInterest in marksOfInterest:
marks1=getSpecificLandMarks(landMarks1, markOfInterest)
marks2=getSpecificLandMarks(landMarks2, markOfInterest)
marks3=getSpecificLandMarks(landMarks3, markOfInterest)
for idFrame, e1 in enumerate(marks1):
e2 = marks2[idFrame]
e3 = marks3[idFrame]
if e1 is not None and e2 is not None and e3 is not None:
p1=[[e1.x*width, e1.y*height]]
p2=[[e2.x*width, e2.y*height]]
p3=[[e3.x*width, e3.y*height]]
triangulated1=triangulate3DPoints(p1, p2, P1_1, P2_1)[0]
triangulated2=triangulate3DPoints(p2, p3, P1_2, P2_2)[0]
triangulated3=triangulate3DPoints(p1, p3, P1_3, P2_3)[0]
worldCoordP1=(pose1 @ toHomogenous(triangulated1).reshape(4,1).flatten())[:-1]
worldCoordP2=(pose2 @ toHomogenous(triangulated2).reshape(4,1).flatten())[:-1]
worldCoordP3=(pose1 @ toHomogenous(triangulated3).reshape(4,1).flatten())[:-1]
#problem: worldCoordP2 is very different from worldCoordP1 and worldCoordP3 (the latter being quite different too). The three variables should be close as they represent the same point (although estimated differently) in the same coordinate system.
不确定OpenCV是否有一个准备使用的函数。在几本计算机视觉书籍中讨论了多相机三角测量。如
Richard Szeliski,计算机视觉:算法与应用,第7.1章三角测量
https://szeliski.org/Book/