OpenCL矩阵中值滤波器



我有一个问题,我正试图实现中值过滤器。要解决的任务是计算矩阵上的中值滤波器,这必须重复x次。我遇到的问题是,当矩阵大于2048 × 2048时,结果不再正确。

__kernel void saxpy_kernel(float alpha, int size, __global float *in,
__global float *out, int loops) {
int i = get_global_id(0);
int index = 0;
for (int loop = 0; loop < loops; loop++) {
for (int j = 0; j < size; j++) {
index = j * size + i;
float center = in[index];
float centerTop = (index > size) ? in[index - size] : 0;
float centerLeft =
(index > 0 && (index - 1) % size < (size - 1)) ? in[index - 1] : 0;
float centerRight = ((index + 1) % size > 0) ? in[index + 1] : 0;
float centerDown =
(index + size < (size * size)) ? centerDown = in[index + size] : 0;
// write in buffer
out[index] =
alpha * (center + centerTop + centerLeft + centerRight + centerDown);
}
if (loop < loops - 1) {
barrier(CLK_GLOBAL_MEM_FENCE);
for (int j = 0; j < size; j++) {
index = j * size + i;
*(in + index) = *(out + index);
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
}
}

内核加载如下:

size_t global_size[] = {VECTOR_SIZE, VECTOR_SIZE, 0};                                         //My matrix pattern
size_t group_pattern[] = {1, VECTOR_SIZE, 0};                                                 //My workgroups pattern
size_t local_size[] = {global_size[0] / group_pattern[0], global_size[1] / group_pattern[1]}; //My workgroups pattern
clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL);

我希望有人能帮助我。我很乐意收到任何其他可以优化的反馈。

问题是代码中的global_size[]是3D的,并且有3D范围的最大大小限制,例如2048x2048x64(clinfo中的最大工作项大小)。使global_size为1维,并使用线性索引(n=size_x*y+x)来访问您的2D矩阵坐标。

最新更新