nvcc中间链路故障



我一直在努力修复使用CMake构建nvcc项目时出现的中间链接错误。我一直在升级以前的一个项目以利用CUDA,并能够从主机代码成功地调用该库中的函数。当我试图从设备代码中调用该库的函数时,我会得到中间链接错误。我用__device____host__描述符注释了所有函数。

顺便说一句,这个一个ROS项目,所以我使用了一些catkin CMake函数。

这是ParticleFilter代码的一个片段,它调用主机和设备函数:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <curand_kernel.h>
#include <iostream>
#include <davinci_kinematics_cuda/davinci_fwd_kinematics.cuh>
__host__
ParticleFilter::ParticleFilter(const unsigned int numParticles, const std::vector<double> &initialJointState, const unsigned int threads,
const unsigned int blocks) {
/* random other work here */

// This works fine (compiles and runs), it is calling host code from the other file
kinematics = davinci_kinematics_cuda::Forward();
std::cout << kinematics.fwd_kin_solve(initialJointState.data()).translation() << std::endl;
}
__global__
void printParticlesKernel(double *particles, const unsigned int numParticles, const unsigned int dimensions, const size_t pitch) {
int locationStart = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
// This fails, will not link
davinci_kinematics_cuda::Forward kinematics = davinci_kinematics_cuda::Forward(); 
for (int n = locationStart; n < numParticles; n += stride) {
double *particle = (double*) ((char*) particles + n * pitch);

/* random other work here */
// this fails, will not link
auto translation = kinematics.fwd_kin_solve(particle).translation();
printf("%f %f %fn", translation[0], translation[1], translation[2]);
}
}

这是来自运动学文件:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
namespace davinci_kinematics_cuda {
// use member fncs to compute and multiply successive transforms
__host__ __device__
Forward::Forward() {
/* random initialization here */
}
__host__ __device__
Eigen::Affine3d Forward::fwd_kin_solve(const double *q_vec, const unsigned int desired_joint) {
/* other work here */
}
}

这是ParticleFilter文件的相关CMake部分。

cmake_minimum_required(VERSION 2.8.10)
project(tool_tracking LANGUAGES CUDA CXX)
# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
find_package(CUDA REQUIRED) 
# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
set(CUDA_SEPARABLE_COMPILATION ON)
find_package(catkin REQUIRED COMPONENTS
message_generation
roscpp
std_msgs
sensor_msgs
geometry_msgs
cwru_opencv_common
tool_model
cwru_davinci_control
cwru_davinci_kinematics
xform_utils
tf
tool_segmentation
)

catkin_package(
INCLUDE_DIRS
include
LIBRARIES 
tool_tracking_particle
CATKIN_DEPENDS
message_runtime
std_msgs
sensor_msgs
geometry_msgs
cwru_opencv_common
tool_model
cwru_davinci_control
cwru_davinci_kinematics
xform_utils
tf
)
include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )
cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})

这是来自CMake:的错误

/usr/bin/cmake -H/home/ethan/catkin_ws/src/cwru_davinci_tool_tracking/tool_tracking -B/home/ethan/catkin_ws/build/tool_tracking --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/progress.marks
/usr/bin/make -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
/usr/bin/make -f CMakeFiles/test_particlefilter.dir/build.make CMakeFiles/test_particlefilter.dir/depend
make[2]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
[ 20%] Building NVCC intermediate link file CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o
/usr/local/cuda-11.0/bin/nvcc -lcudadevrt -m64 -ccbin /usr/bin/cc -dlink /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_Particle.cu.o -o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/./test_particlefilter_intermediate_link.o -Xcompiler -fPIC
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7ForwardC1Ev' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7Forward13fwd_kin_solveEPKdj' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
CMakeFiles/test_particlefilter.dir/build.make:1468: recipe for target 'CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o' failed
make[2]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[2]: *** [CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o] Error 255
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/test_particlefilter.dir/all' failed
make[1]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[1]: *** [CMakeFiles/test_particlefilter.dir/all] Error 2
Makefile:140: recipe for target 'all' failed
make: *** [all] Error 2

如何修复未定义引用的错误?看起来像是一个链接错误,但我对编译/链接过程不够熟悉,无法进一步解决问题。如果我需要发布运动学文件中的CMake,我也可以。

这是的问题,也是对其他读者最有帮助的部分。Catkin将CMake配置为默认情况下构建共享库,但CUDA可分离编译和nvlink仅适用于静态图书馆。您需要将CUDA库(在您的情况下,cwru_davinci_kinematics中的库(始终设置为静态。您可以通过在add_library调用中添加STATIC关键字来完成此操作,如:

add_library(my_cuda_lib STATIC source1.cu ...)

如果你";链接";对于CMake中与CUDA共享的库,它将忽略它。这实际上是nvcc的文档行为。请参见此处:https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#libraries

设备链接器能够读取静态主机库格式(在Linux和Mac OS X上为.a,在Windows上为.lib(。它忽略任何动态(.so.dll(库。


这里的另一个主要教训是,将CMake设置为古代版本势必会导致问题。在重现您的问题时,我被迫从源代码构建OpenCV 3(它不在Ubuntu 20.04 LTS中(,并且在引入CMP0074的3.12版本之前,没有好的方法覆盖特定包的搜索路径。

升级您的最低CMake版本。理想情况下,您可以在软件存储库中升级到最新版本,并将文件的最小值设置为该值。与3.5之前的CMake版本兼容有的好处,我认为这可以扩展到3.16(Ubuntu 20.04 LTS中的版本(。由于您使用的是CUDA,因此3.18是最合适的。更糟糕的是,你的许多项目都将最低限额设定在2.8.12以下;CMake将很快删除与此版本的兼容性。


以下是我为使其在Ubuntu 20.04 LTS上构建所做的精确更改。我使用了以下构建脚本,放置在ROS工作区中并从中执行:

#!/usr/bin/bash
source /opt/ros/noetic/setup.bash
export CUDACXX=/usr/local/cuda/bin/nvcc
export OpenCV_ROOT=$(readlink -f opencv-install)
[ -f "$CUDACXX" ] || { echo "Invalid CUDACXX: $CUDACXX"; exit; }
[ -d "$OpenCV_ROOT" ] || { echo "Invalid OpenCV_ROOT: $OpenCV_ROOT"; exit; }
rm -rf build devel
catkin build tool_tracking --cmake-args 
-Wno-dev 
-DCMAKE_POLICY_DEFAULT_CMP0074=NEW 
-DCMAKE_CUDA_ARCHITECTURES=75

目录opencv-install是通过构建我自己的OpenCV 3创建的(因为Ubuntu 20.04只有v4(。步骤是:

$ git clone -b 3.4.14 git@github.com:opencv/opencv.git
$ git clone -b 3.4.14 git@github.com:opencv/opencv_contrib.git
$ cmake -G Ninja -S opencv -B opencv-build/ -DOPENCV_EXTRA_MODULES_PATH=$(readlink -f opencv_contrib)/modules -DBUILD_opencv_cnn_3dobj=OFF -DBUILD_opencv_face=OFF -DBUILD_opencv_hdf=OFF -DBUILD_opencv_hfs=OFF -DBUILD_opencv_julia=OFF -DBUILD_opencv_matlab=OFF -DBUILD_opencv_ovis=OFF -DBUILD_opencv_reg=OFF -DBUILD_opencv_sfm=OFF -DBUILD_opencv_text=OFF -DBUILD_opencv_wechat_qrcode=OFF -DBUILD_opencv_ximgproc=OFF
$ cmake --build opencv-build
$ cmake --install opencv-build --prefix opencv-install

这将禁用具有重要/不相关依赖关系的额外模块。

该脚本将环境变量OpenCV_ROOT设置为将CMake引导到此本地安装的OpenCV版本。因为文件中指定的CMake的最低版本太低,所以我还必须设置CMAKE_POLICY_DEFAULT_CMP0074=NEW,以便遵守OpenCV_ROOT

以下是我对您的CMake代码所做的更改:

src/cwru_dvinci_kinetics/CMakeLists.txt

--- a/src/cwru_davinci_kinematics/CMakeLists.txt
+++ b/src/cwru_davinci_kinematics/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.10)
+cmake_minimum_required(VERSION 3.18)
project(cwru_davinci_kinematics)

#This is needed as part of the migration to ros jade and later
@@ -26,18 +26,16 @@ find_package(catkin REQUIRED COMPONENTS roscpp roslib roslint tf tf2 tf2_eigen)

SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")

-# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
-find_package(CUDA) 
-message(STATUS "CUDA_FOUND=${CUDA_FOUND}")
-if(CUDA_FOUND)
-       message(STATUS "Found CUDA, setting nvcc compilation flags")
-       
-       # set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS         
-       set(CUDA_NVCC_FLAGS CACHE STRING "nvcc flags" FORCE)
-       set(CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
+include(CheckLanguage)
+check_language(CUDA)
+if (CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+
# fPIC fixes some linker issues with nvcc code / objects
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
-       set(CUDA_SEPARABLE_COMPILATION ON)
+       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
+       set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+  find_package(CUDAToolkit REQUIRED)
endif()

include_directories(
@@ -48,7 +46,7 @@ include_directories(
${YAML_CPP_INCLUDE_DIRS}
)

-if (CUDA_FOUND)
+if (CMAKE_CUDA_COMPILER)
catkin_package(
DEPENDS ${Eigen3_DEP}
LIBRARIES 
@@ -82,14 +80,17 @@ target_link_libraries(davinci_kinematics
davinci_kinematic_definitions
)

-if (CUDA_FOUND)
-       cuda_add_library(davinci_kinematics_cuda src/davinci_fwd_kinematics.cu)
-       cuda_add_library(davinci_kinematics_definitions_cuda src/davinci_kinematic_definitions.cu)
-       
-       target_link_libraries(davinci_kinematics_cuda
-               ${catkin_LIBRARIES}
-               davinci_kinematics_definitions_cuda
-       )
+if (CMAKE_CUDA_COMPILER)
+  add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
+  add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)
+
+  target_link_libraries(
+    davinci_kinematics_cuda
+    PRIVATE
+      CUDA::curand
+      ${catkin_LIBRARIES}
+      davinci_kinematics_definitions_cuda
+  )
endif()

# Examples

这里的重要行是:

add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)

我还在这里对CMake代码进行了现代化改造,因为内置的CUDA语言支持已经相当先进了。

src/cwru_dvinci_tool_tracking/tool_track/CMakeLists.txt

--- a/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
+++ b/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
@@ -1,18 +1,11 @@
-cmake_minimum_required(VERSION 2.8.10)
-project(tool_tracking LANGUAGES CUDA CXX)
+cmake_minimum_required(VERSION 3.18)
+project(tool_tracking LANGUAGES C CXX CUDA)

-# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
-find_package(CUDA REQUIRED) 
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

-# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
-set(CUDA_SEPARABLE_COMPILATION ON)
+find_package(OpenCV 3 REQUIRED)

-#find_package(catkin_simple REQUIRED)
-## Find catkin macros and libraries
-## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
-## is used, also find other catkin packages
-find_package(OpenCV REQUIRED)
find_package(catkin REQUIRED COMPONENTS
message_generation
roscpp
@@ -28,11 +21,12 @@ find_package(catkin REQUIRED COMPONENTS
tool_segmentation
)

+find_package(CUDAToolkit REQUIRED)

catkin_package(
INCLUDE_DIRS
include
-       LIBRARIES 
+       LIBRARIES
tool_tracking_particle
CATKIN_DEPENDS
message_runtime
@@ -47,13 +41,7 @@ catkin_package(
tf
)

-include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
-include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )
-
-#cuda_add_library(tool_tracking_particle src/ParticleFilter.cu src/Particle.cu)
-#add_executable(particle src/tracking_particle.cpp)
-#target_link_libraries(particle tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} davinci_kinematics_cuda 
-#                      davinci_kinematics_definitions_cuda)
-
-cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
-target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})
+add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
+target_include_directories(test_particlefilter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS})
+target_include_directories(test_particlefilter PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(test_particlefilter PRIVATE ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} CUDA::curand)

我还在这里对CMake代码进行了现代化改造,因为内置的CUDA语言支持已经相当先进了。

其他变更

我在所有其他地方将最低CMake版本从2.8.x提升到3.0.2,以抑制警告。我还为所有没有find_package(OpenCV ...)的调用添加了一个版本号3

Boost不再有python3包;现在只是python。我对src/vision_opencv/cv_bridge/CMakeLists.txt进行了以下更改:

--- a/src/vision_opencv/cv_bridge/CMakeLists.txt
+++ b/src/vision_opencv/cv_bridge/CMakeLists.txt
@@ -1,18 +1,15 @@
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.0.2)
project(cv_bridge)

find_package(catkin REQUIRED COMPONENTS rosconsole sensor_msgs)

if(NOT ANDROID)
find_package(PythonLibs)
-  if(PYTHONLIBS_VERSION_STRING VERSION_LESS 3)
-    find_package(Boost REQUIRED python)
-  else()
-    find_package(Boost REQUIRED python3)
-  endif()
+  find_package(Boost REQUIRED python)
else()
-find_package(Boost REQUIRED)
+  find_package(Boost REQUIRED)
endif()
+
find_package(OpenCV 3 REQUIRED
COMPONENTS
opencv_core

最新更新