错误:'atomic_t'未在此范围内声明 其他



请帮忙... 在opencl程序中,我收到这些错误。 我没有"oclUtils.h"头文件。S0 我没有把它包含在我的程序中。这就是我遇到这个问题的原因吗?我也在发送我的内核。 如果这是原因,我如何在不使用oclUtils.h的情况下使用原子函数?

*"__kernel void BLAS_susdot_kernel(__global float *x,__global int *index,__global float        *y,const int n,__global float* dot_p)                    n" 
"{                                                                                      n" 
"    int block_x = get_group_id(0);                                                                     n" 
"    int thread_x = get_local_id(0);                                                                    n" 
"    int i = get_global_id(0);                                                                          n" 
"    float dot_value, old = *dot_p;                                                                     n" 
"    int warp_thread_id = i & (32-1);                                                                   n" 
"    __local float tmp[512];                                                                                    n" 
"    __local float share_dot_p;                                                                         n" 
"    if(thread_x == 0)                          n" 
"       share_dot_p = 0.0;                              n" 
"    if (i < n)                                 n" 
"   {                                        n" 
"        tmp[i]= x[i]*y[index[i]];           n" 
"        if(warp_thread_id <16 && (i+16)< n) n" 
"            tmp[i]+=tmp[i+16];              n" 
"        if(warp_thread_id <8 && (i+8)< n )  n" 
"            tmp[i]+=tmp[i+8];               n" 
"        if(warp_thread_id <4 && (i+4)< n )  n" 
"            tmp[i]+=tmp[i+4];               n" 
"        if(warp_thread_id <2 && (i+2)< n)   n" 
"            tmp[i]+=tmp[i+2];               n" 
"        if(warp_thread_id==0 && (i+1)< n)   n" 
"        {                                   n" 
"            tmp[i]+=tmp[i+1];               n" 
"            do                              n" 
"            {                               n" 
"                dot_value = old;            n" 
"                old = convert_int( atomic_cmpxchg((volatile __global unsigned int*)&share_dot_p, convert_int(dot_value), convert_int(tmp[i] + dot_value)));  n" 
"            } while (dot_value != old);     n" 
"            //share_dot_p +=tmp[i];         n" 
"        }                                   n" 
"        if(thread_x==0)                  n" 
"        {                                   n" 
"                                            n" 
"            do                              n" 
"            {                               n" 
"                dot_value = old;            n" 
"                old = convert_int(atomic_cmpxchg((volatile __global unsigned int *) dot_p, convert_int(dot_value), convert_int(share_dot_p+dot_value)));  n" 
"            } while(dot_value != old);                                                 n" 
"               // *dot_p += share_dot_p;                                               n" 
"                                                                                       n" 
"        }                                                                              n" 
"   }                                                                                   n" 
"}                                                                                      n" 
"n";*

我得到的错误:

Build Program Info: ptxas application ptx input, line 160; error   : Label expected for argument 0 of instruction 'call'
ptxas application ptx input, line 160; error   : Call target not recognized
ptxas application ptx input, line 160; error   : Function 'atomic_cmpxchg' not declared in this scope
ptxas application ptx input, line 160; error   : Call target not recognized
ptxas application ptx input, line 185; error   : Label expected for argument 0 of instruction 'call'
ptxas application ptx input, line 185; error   : Call target not recognized
ptxas application ptx input, line 185; error   : Function 'atomic_cmpxchg' not declared in this scope
ptxas application ptx input, line 185; error   : Call target not recognized
ptxas application ptx input, line 161; error   : Unknown symbol 'atomic_cmpxchg'
ptxas application ptx input, line 186; error   : Unknown symbol 'atomic_cmpxchg'
ptxas fatal   : Ptx assembly aborted due to errors
error   : Ptx compilation failed: gpu='sm_13', device code='cuModuleLoadDataEx_4'
: Considering profile 'compute_13' for gpu='sm_13' in 'cuModuleLoadDataEx_4'
: Retrieving binary for 'cuModuleLoadDataEx_4', for gpu='sm_13', usage mode='  '
: Considering profile 'compute_13' for gpu='sm_13' in 'cuModuleLoadDataEx_4'
: Control flags for 'cuModuleLoadDataEx_4' disable search path
: Ptx binary found for 'cuModuleLoadDataEx_4', architecture='compute_13'
: Ptx compilation for 'cuModuleLoadDataEx_4', for gpu='sm_13', ocg options='  '

由于您使用的是atomic_cmpxchg,它是 OpenCL 中可选扩展的一部分,因此您需要检查您的设备是否支持它,然后在内核代码中启用它:

  1. 检查cl_khr_global_int32_base_atomics是否列在clGetDeviceInfo(..., CL_DEVICE_EXTENSIONS, ...)返回的扩展中

  2. 将以下内容添加到内核代码的顶部:

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable

请注意,如果使用具有 __local 位或 64 位操作数的原子函数,则可能需要启用其他扩展:

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable

此外,请确保您使用的是 OpenCL 1.1或更高版本,因为原子函数在 OpenCL 1.0 中的命名方式不同。例如,如果您使用 NVidia 的nvcc实用程序脱机编译内核,请确保指定-arch命令行开关。

这些特定错误与 oclUtils.h 无关。这是一个 NVidia 头文件,只有在主机(而不是内核)代码中调用 ocl* 函数时才需要该文件。

最新更新