cuda数组是否可以同时具有cudaArraySurfaceLoadStore和cudaArrayLayered标志



对于计算能力为2.0及更高版本的设备,使用cudaArraySurfaceLoadStore标志创建的CUDA阵列可以使用曲面函数中描述的函数通过曲面对象或曲面引用进行读写。

另一方面,分配有标志cudaArrayLayered的CUDA阵列可以具有分层结构。

是否也可以同时具有这两个标志的CUDA数组,以便在特定的内核中写入并在第二个内核中访问(分层(?

是。在所有情况下,您都需要使用分层访问功能,例如,您需要使用surf?Dlayeredwrite()向其写入,也需要使用类似的方法从中读取

最小示例,2D分层:

$ cat t2106.cu
#include <helper_cuda.h>
const int layers = 2;
const int xdim = 3;
const int ydim = 3;
__device__ float d_arr[xdim*ydim*layers] = {1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8};
__global__ void write_kernel(cudaSurfaceObject_t surf)
{
for(int i = 0; i < layers; i++){
for(int j = 0; j < xdim; j++){
for(int k = 0; k < ydim; k++){
surf2DLayeredwrite<float>(d_arr[i*xdim*ydim+j*ydim+k], surf, j*sizeof(float), k, i);
}
}
}
}
__global__ void read_kernel(cudaSurfaceObject_t surf)
{
for(int i = 0; i < layers; i++){
for(int j = 0; j < xdim; j++){
for(int k = 0; k < ydim; k++){
printf(" layer:%d x:%d y:%d val: %fn", i, j, k, surf2DLayeredread<float>(surf, j*sizeof(float), k, i));
}
}
}
}

int main(int argc, char **argv)
{
cudaChannelFormatDesc channelDesc =  cudaCreateChannelDesc<float>();
cudaArray *dev_cu_array;
cudaSurfaceObject_t surf;
cudaResourceDesc res_desc;
memset(&res_desc, 0, sizeof(res_desc));
cudaExtent extent = make_cudaExtent(xdim, ydim, layers);
checkCudaErrors(cudaMalloc3DArray(&dev_cu_array,
&channelDesc,
extent,
cudaArrayLayered|cudaArraySurfaceLoadStore));
res_desc.resType = cudaResourceTypeArray;
res_desc.res.array.array = dev_cu_array;
checkCudaErrors(cudaCreateSurfaceObject(&surf, &res_desc));
write_kernel<<<1,1>>>(surf);
read_kernel<<<1,1>>>(surf);
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDestroySurfaceObject(surf));
checkCudaErrors(cudaFreeArray(dev_cu_array));
}
$ nvcc -o t2106 t2106.cu -I/usr/local/cuda/samples/common/inc
$ compute-sanitizer ./t2106
========= COMPUTE-SANITIZER
layer:0 x:0 y:0 val: 1.000000
layer:0 x:0 y:1 val: 1.100000
layer:0 x:0 y:2 val: 1.200000
layer:0 x:1 y:0 val: 1.300000
layer:0 x:1 y:1 val: 1.400000
layer:0 x:1 y:2 val: 1.500000
layer:0 x:2 y:0 val: 1.600000
layer:0 x:2 y:1 val: 1.700000
layer:0 x:2 y:2 val: 1.800000
layer:1 x:0 y:0 val: 2.000000
layer:1 x:0 y:1 val: 2.100000
layer:1 x:0 y:2 val: 2.200000
layer:1 x:1 y:0 val: 2.300000
layer:1 x:1 y:1 val: 2.400000
layer:1 x:1 y:2 val: 2.500000
layer:1 x:2 y:0 val: 2.600000
layer:1 x:2 y:1 val: 2.700000
layer:1 x:2 y:2 val: 2.800000
========= ERROR SUMMARY: 0 errors
$

相关内容

  • 没有找到相关文章

最新更新