Swift Metal Shader由于在执行过程中向数组值添加数字时出错,命令缓冲区的执行被中止



我正在尝试一些非常简单的算法,使用金属GPU加速来计算数组中的一些值。着色器在某些情况下会抛出错误,我将对此进行解释。错误:由于执行过程中出现错误,命令缓冲区的执行被中止。忽略(导致先前/过多GPU错误((IOAF代码4(

着色器仅在将值添加到数组索引处的现有值时才会引发此错误。示例:

这不会导致错误:

kernel void shader (device int *wPointsIntensity [[buffer(0)]],
const device uint *wPointsXCoord [[buffer(1)]],
const device uint *wPointsYCoord [[buffer(2)]],
device float *pixelSignalIntensity [[buffer(3)]],
device float *pixelDistance [[buffer(4)]],
const device uint& noOfPoints [[ buffer(5) ]],
const device uint& width [[ buffer(6) ]],
const device uint& height [[ buffer(7) ]],
uint id [[ thread_position_in_grid ]]) {
//this does not throw error
for (uint wpIndex = 0; wpIndex < noOfPoints; wpIndex++) {
for (uint heightIndex = 0; heightIndex < height; heightIndex++) {
for (uint widthIndex = 0; widthIndex < width; widthIndex++) {
uint pixelIndex = heightIndex * width + widthIndex;
pixelDistance[pixelIndex] = float(pixelIndex);
pixelSignalIntensity[pixelIndex] = float(pixelIndex);
}}}}

而如果你改变

pixelDistance[pixelIndex]=浮动(pixelIndex(;带有

pixelDistance[pixelIndex]+=浮动(pixelIndex(;

它将抛出一个错误。

这是swift代码:

var wPointsValues = [Int32](repeating:0, count: wPoints.count)
var wPointsXLocations = [Int32](repeating:0, count: wPoints.count)
var wPointsYLocations = [Int32](repeating:0, count: wPoints.count)
for i in 0..<wPoints.count {
wPointsValues[i] = Int32(wPoints[i].signalIntensity)
wPointsXLocations[i] = Int32(wPoints[i].location.x)
wPointsYLocations[i] = Int32(wPoints[i].location.y)
}
var numberOfWPoints:Int32 = Int32(wPoints.count)
var int32Width = Int32(width)
var int32Height = Int32(height)
//output arrays
let numberOfResults = wPoints.count * Int(width) * Int(height)
var wPointsSignalIntensity = [Float32](repeating:0.0, count: numberOfResults)
var wPointsDistance = [Float32](repeating:0.0, count: numberOfResults)

//local variables
var signalDensity:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
var signalDistance:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
//create input buffers
let inWPointSignalValues = device.makeBuffer(bytes: wPointsValues, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointXCoordBuffer = device.makeBuffer(bytes: wPointsXLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointYCoordBuffer = device.makeBuffer(bytes: wPointsYLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
//create putput buffers
let outPixelSignalIntensityBuffer = device.makeBuffer(bytes: wPointsSignalIntensity, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let outPixelDistanceBuffer = device.makeBuffer(bytes: wPointsDistance, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let commandBuffer = (mtlCommmandQueue?.makeCommandBuffer())!
let computeCommandEncoder = (commandBuffer.makeComputeCommandEncoder())!
computeCommandEncoder.setComputePipelineState(mtlComputePipelineFilter!)
//set input buffers
computeCommandEncoder.setBuffer(inWPointSignalValues, offset: 0, index: 0)
computeCommandEncoder.setBuffer(inWPointXCoordBuffer, offset: 0, index: 1)
computeCommandEncoder.setBuffer(inWPointYCoordBuffer, offset: 0, index: 2)
//set output buffers
computeCommandEncoder.setBuffer(outPixelSignalIntensityBuffer, offset: 0, index: 3)
computeCommandEncoder.setBuffer(outPixelDistanceBuffer, offset: 0, index: 4)
//set constants
computeCommandEncoder.setBytes(&numberOfWPoints, length: MemoryLayout<Int32>.stride, index: 5)
computeCommandEncoder.setBytes(&int32Width, length: MemoryLayout<Int32>.stride, index: 6)
computeCommandEncoder.setBytes(&int32Height, length: MemoryLayout<Int32>.stride, index: 7)

let threadsPerGroup = MTLSize(width:2,height:2,depth:2)
let numThreadgroups = MTLSize(width:2, height:2, depth:2)
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
let endBufferAllocation = mach_absolute_time()
print("time for creating and setting buffert: time: (Double(endBufferAllocation - start) / Double(NSEC_PER_SEC))")
computeCommandEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
let allComplete = mach_absolute_time()
self.signalDistance = (outPixelDistanceBuffer?.contents())!
self.signalDensity = (outPixelSignalIntensityBuffer?.contents())!

我有这个问题很长时间了,程序间歇性崩溃。事实证明,我正在访问内核中尚未由缓冲区分配的内存。在内核中,我正在执行for循环0.<5(即为每个线程输出5个值(,但是没有将num_threads除以5。

当它没有崩溃时,它给出了正确的答案,除了"由于执行过程中的错误,命令缓冲区的执行被中止。导致GPU挂起错误(IOAF代码3("之外,没有其他错误。

最新更新