Swift、macOS、带有2个GPU的mac,矩阵运算在一个GPU上工作,而在另一个GPU不工作



根据苹果菜单中的"关于这款Mac",我正在我的MacBook Pro(Retina,15英寸,2015年年中(上运行macOS,它有两个GPU。一个GPU是AMD Radeon R9 M370X 2 GB,另一个是Intel Iris Pro 1536 MB——我想是标准芯片吗?它们是我买的时候放在里面的芯片,没有我自己加的。

我使用Swift MPS库进行矩阵计算;它在英特尔GPU上工作得很好,但当我选择Radeon时,我只会从每次操作中返回零,没有错误报告。我四处寻找有关它的文件,但什么也找不到。到目前为止,我唯一的线索是Radeon报告"未集成"(或者至少,我认为它是基于macOS上Finding GPU的示例代码,这与苹果的文档一样有用,意味着不太(。如果我读对了那一页,这就是我的两个GPU告诉我的

Device Intel Iris Pro Graphics; caps: headful, not discrete, integrated, not external

Device AMD Radeon R9 M370X; caps: headful, discrete, not integrated, not external

我找不到任何医生能说明我做错了什么。我查阅了苹果公司的MPS文档,但都无济于事。正如我所说,代码在英特尔GPU上运行得很好,所以我应该认为它也会在Radeon上运行。我运行了一些可下载的诊断工具来检查Radeon,但它没有显示在这些工具的菜单中。所以我甚至不知道这是我在代码中做错了什么,还是芯片本身坏了。

下面是代码,您可以通过粘贴到main.swift中来将其构建为控制台应用程序。找到以下行:

let device = MTLCopyAllDevices()[1]

我对英特尔使用[0],对Radeon使用[1],你可以看到输出是不同的,也就是说,Radeon都是零。我想你的里程数可能因你的机器而异。我欢迎任何意见,欢呼

import MetalPerformanceShaders
typealias MPSNumber = Float32
let MPSNumberSize = MemoryLayout<MPSNumber>.size
let MPSNumberTypeInGPU = MPSDataType.float32
class MPSNet {
let commandBuffer: MTLCommandBuffer
let commandQueue: MTLCommandQueue
let device = MTLCopyAllDevices()[1]
var neuronsInMatrix1: MPSMatrix?
var neuronsInMatrix2: MPSMatrix?
var neuronsOutMatrix: MPSMatrix?
init() {
guard let cq = device.makeCommandQueue() else { fatalError() }
guard let cb = cq.makeCommandBuffer() else { fatalError() }
commandQueue = cq
commandBuffer = cb
let cMatrices = 2
let cRows = 1
let cColumns = 3
let sensoryInputs1: [MPSNumber] = [1, 2, 3]
let sensoryInputs2: [MPSNumber] = [4, 5, 6]
neuronsInMatrix1 = makeMatrix(device, sensoryInputs1)
neuronsInMatrix2 = makeMatrix(device, sensoryInputs2)
let rowStride = MPSMatrixDescriptor.rowBytes(fromColumns: cColumns, dataType: MPSNumberTypeInGPU)
neuronsOutMatrix = makeMatrix(device, cRows, cColumnsOut: cColumns, rowStride: rowStride)
let adder = MPSMatrixSum(
device: device, count: cMatrices, rows: cRows, columns: cColumns, transpose: false
)
adder.encode(
to: commandBuffer,
sourceMatrices: [neuronsInMatrix1!, neuronsInMatrix2!],
resultMatrix: neuronsOutMatrix!, scale: nil, offsetVector: nil,
biasVector: nil, start: 0
)
commandBuffer.addCompletedHandler { _ in
let motorOutputs = self.getComputeOutput(self.neuronsOutMatrix!)
let discrete = !self.device.isLowPower && !self.device.isRemovable
let caps = "(self.device.isHeadless ? " headless" : " headful")" +
"(discrete ? ", discrete" : ", not discrete")" +
"(self.device.isLowPower ? ", integrated" : ", not integrated")" +
"(self.device.isRemovable ? ", external" : ", not external")"
print("Device (self.device.name); caps:(caps); motor outputs (motorOutputs)")
}
}
func compute() {
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
}
}
extension MPSNet {
func getComputeOutput(_ matrix: MPSMatrix) -> [Double] {
let rc = matrix.data.contents()
return stride(from: 0, to: matrix.columns * MPSNumberSize, by: MPSNumberSize).map {
offset in
let rr = rc.load(fromByteOffset: offset, as: MPSNumber.self)
return Double(rr)
}
}
func loadMatrix(_ data: MTLBuffer, _ rawValues: [MPSNumber]) {
let dContents = data.contents()
zip(stride(from: 0, to: rawValues.count * MPSNumberSize, by: MPSNumberSize), rawValues).forEach { z in
let (byteOffset, rawValue) = (z.0, MPSNumber(z.1))
dContents.storeBytes(of: rawValue, toByteOffset: byteOffset, as: MPSNumber.self)
}
}
func makeMatrix(_ device: MTLDevice, _ rawValues: [MPSNumber]) -> MPSMatrix {
let rowStride = MPSMatrixDescriptor.rowBytes(
fromColumns: rawValues.count, dataType: MPSNumberTypeInGPU
)
let descriptor = MPSMatrixDescriptor(
dimensions: 1, columns: rawValues.count, rowBytes: rowStride,
dataType: MPSNumberTypeInGPU
)
guard let inputBuffer = device.makeBuffer(
length: descriptor.matrixBytes, options: MTLResourceOptions.storageModeManaged
) else { fatalError() }
loadMatrix(inputBuffer, rawValues)
return MPSMatrix(buffer: inputBuffer, descriptor: descriptor)
}
func makeMatrix(_ device: MTLDevice, _ cRowsOut: Int, cColumnsOut: Int, rowStride: Int) -> MPSMatrix {
let matrixDescriptor = MPSMatrixDescriptor(
dimensions: cRowsOut, columns: cColumnsOut,
rowBytes: rowStride, dataType: MPSNumberTypeInGPU
)
return MPSMatrix(device: device, descriptor: matrixDescriptor)
}
}
let net = MPSNet()
net.compute()

您似乎无法使用-[MPSMatrix synchronizeOnCommandBuffer:]。在离散设备上,在数据从GPU返回之前,需要进行一些显式同步。

问题在于矩阵缓冲区的存储模式。您使用的是MTLResourceOptions.storageModeManaged,它告诉Metal您希望管理CPU和GPU之间共享的内存的同步。正如这里的另一个答案中所提到的,在尝试用CPU读取数据之前,必须在GPU操作后使用MPSMatrix.synchronize(on: MTLCommandBuffer)。但您也必须在另一个方向上同步,即在CPU操作之后,使用MTLBuffer.didModifyRange(_: Range)将命令提交给GPU。

或者,您可以使用共享存储模式MTLResourceOptions.storageModeShared,它为您负责同步。

有关详细信息,请参阅Apple文档中的同步托管资源。

以下是使用托管存储模式的示例的工作版本。请注意函数MPSNet.compute()中的差异。如果您的应用程序可以使用共享存储模式,那么在为矩阵创建MTLBuffer时,您可以忽略这些内容,只需更改存储模式。

import MetalPerformanceShaders
typealias MPSNumber = Float32
let MPSNumberSize = MemoryLayout<MPSNumber>.size
let MPSNumberTypeInGPU = MPSDataType.float32
class MPSNet {
let commandBuffer: MTLCommandBuffer
let commandQueue: MTLCommandQueue
let device = MTLCopyAllDevices()[1]
var neuronsInMatrix1: MPSMatrix?
var neuronsInMatrix2: MPSMatrix?
var neuronsOutMatrix: MPSMatrix?
init() {
guard let cq = device.makeCommandQueue() else { fatalError() }
guard let cb = cq.makeCommandBuffer() else { fatalError() }
commandQueue = cq
commandBuffer = cb
let cMatrices = 2
let cRows = 1
let cColumns = 3
let sensoryInputs1: [MPSNumber] = [1, 2, 3]
let sensoryInputs2: [MPSNumber] = [4, 5, 6]
neuronsInMatrix1 = makeMatrix(device, sensoryInputs1)
neuronsInMatrix2 = makeMatrix(device, sensoryInputs2)
let rowStride = MPSMatrixDescriptor.rowBytes(fromColumns: cColumns, dataType: MPSNumberTypeInGPU)
neuronsOutMatrix = makeMatrix(device, cRows, cColumnsOut: cColumns, rowStride: rowStride)
let adder = MPSMatrixSum(
device: device, count: cMatrices, rows: cRows, columns: cColumns, transpose: false
)
adder.encode(
to: commandBuffer,
sourceMatrices: [neuronsInMatrix1!, neuronsInMatrix2!],
resultMatrix: neuronsOutMatrix!, scale: nil, offsetVector: nil,
biasVector: nil, start: 0
)
commandBuffer.addCompletedHandler { _ in
let motorOutputs = self.getComputeOutput(self.neuronsOutMatrix!)
let discrete = !self.device.isLowPower && !self.device.isRemovable
let caps = "(self.device.isHeadless ? " headless" : " headful")" +
"(discrete ? ", discrete" : ", not discrete")" +
"(self.device.isLowPower ? ", integrated" : ", not integrated")" +
"(self.device.isRemovable ? ", external" : ", not external")"
print("Device (self.device.name); caps:(caps); motor outputs (motorOutputs)")
}
}
func compute() {
for matrix in [neuronsInMatrix1!, neuronsInMatrix2!, neuronsOutMatrix!] {
let matrixData = matrix.data
matrixData.didModifyRange(0..<matrixData.length)
matrix.synchronize(on: commandBuffer)
}
commandBuffer.commit()
}
}
extension MPSNet {
func getComputeOutput(_ matrix: MPSMatrix) -> [Double] {
let rc = matrix.data.contents()
return stride(from: 0, to: matrix.columns * MPSNumberSize, by: MPSNumberSize).map {
offset in
let rr = rc.load(fromByteOffset: offset, as: MPSNumber.self)
return Double(rr)
}
}
func loadMatrix(_ data: MTLBuffer, _ rawValues: [MPSNumber]) {
let dContents = data.contents()
zip(stride(from: 0, to: rawValues.count * MPSNumberSize, by: MPSNumberSize), rawValues).forEach { z in
let (byteOffset, rawValue) = (z.0, MPSNumber(z.1))
dContents.storeBytes(of: rawValue, toByteOffset: byteOffset, as: MPSNumber.self)
}
}
func makeMatrix(_ device: MTLDevice, _ rawValues: [MPSNumber]) -> MPSMatrix {
let rowStride = MPSMatrixDescriptor.rowBytes(
fromColumns: rawValues.count, dataType: MPSNumberTypeInGPU
)
let descriptor = MPSMatrixDescriptor(
dimensions: 1, columns: rawValues.count, rowBytes: rowStride,
dataType: MPSNumberTypeInGPU
)
guard let inputBuffer = device.makeBuffer(
length: descriptor.matrixBytes, options: MTLResourceOptions.storageModeManaged
) else { fatalError() }
loadMatrix(inputBuffer, rawValues)
return MPSMatrix(buffer: inputBuffer, descriptor: descriptor)
}
func makeMatrix(_ device: MTLDevice, _ cRowsOut: Int, cColumnsOut: Int, rowStride: Int) -> MPSMatrix {
let matrixDescriptor = MPSMatrixDescriptor(
dimensions: cRowsOut, columns: cColumnsOut,
rowBytes: rowStride, dataType: MPSNumberTypeInGPU
)
return MPSMatrix(device: device, descriptor: matrixDescriptor)
}
}
let net = MPSNet()
net.compute()

最新更新