我正在用memory_align创建分页对齐的内存,然后我从中创建没有副本的MTLBuffer。然后,GPU 将数据传送到该 MTLBuffer 中。完成后,我将相同的内存包装在Data.init(bytesNoCopy:count:deallocator:)中,以便在我的项目中传递。我不知道用什么作为分配器。我从用 OBJ-C 编写的 Apple 教程中翻译了这段代码。苹果代码在这里。我花了两天时间试图自己研究这一点。
Apple OBJ-C 代码分配器如下所示。这超出了我的 OBJ-C 知识。
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
有问题的代码位于我的列表末尾。
// Blit all positions and velocities and provide them to the client either to show final results
// or continue the simulation on another device
func provideFullData(
_ dataProvider: AAPLFullDatasetProvider,
forSimulationTime time: CFAbsoluteTime
) {
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
var positionDataAddress: UnsafeMutableRawPointer? = nil
var velocityDataAddress: UnsafeMutableRawPointer? = nil
// Create buffers to transfer data to client
do {
// allocate memory on page aligned addresses use by both GPU and CPU
let alignment = 0x4000
// make length a mulitple of alignment
let positionAllocationSize = (positionDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, positionAllocationSize)
let velocityAllocationSize = (velocityDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}
// Blit positions and velocities to a buffer for transfer
do {
// create MTL buffers with created mem allighed
let positionBuffer = device.makeBuffer(
bytesNoCopy: &positionDataAddress,
length: positionDataSize,
options: .storageModeShared,
deallocator: nil)
positionBuffer?.label = "Final Positions Buffer"
let velocityBuffer = device.makeBuffer(
bytesNoCopy: &velocityDataAddress,
length: velocityDataSize,
options: .storageModeShared,
deallocator: nil)
velocityBuffer?.label = "Final Velocities Buffer"
let commandBuffer = commandQueue?.makeCommandBuffer()
commandBuffer?.label = "Full Transfer Command Buffer"
let blitEncoder = commandBuffer?.makeBlitCommandEncoder()
blitEncoder?.label = "Full Transfer Blits"
blitEncoder?.pushDebugGroup("Full Position Data Blit")
if let _position = positions[oldBufferIndex], let positionBuffer {
blitEncoder?.copy(
from: _position,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.pushDebugGroup("Full Velocity Data Blit")
if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
blitEncoder?.copy(
from: _velocity,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.endEncoding()
commandBuffer?.commit()
// Ensure blit of data is complete before providing
// the data to the client
commandBuffer?.waitUntilCompleted()
}
// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
//this code was in obj-c I don'tlnow how to convert this to swift
// Block to dealloc memory created with vm_allocate
// let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
// { bytes, length in
// vm_deallocate(
// mach_task_self() as? vm_map_t,
// bytes as? vm_address_t,
// length)
// }
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .none) // this may be a memory leak
let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .none) // this may be a memory leak
dataProvider(positionData, velocityData, time)
}
}
这是苹果OBJ-C代码的列表
// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
const float pscale = _config->clusterScale;
const float vscale = _config->velocityScale * pscale;
const float inner = 2.5f * pscale;
const float outer = 4.0f * pscale;
const float length = outer - inner;
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
for(int i = 0; i < _config->numBodies; i++)
{
vector_float3 nrpos = generate_random_normalized_vector(-1.0, 1.0, 1.0);
vector_float3 rpos = generate_random_vector(0.0, 1.0);
vector_float3 position = nrpos * (inner + (length * rpos));
positions[i].xyz = position;
positions[i].w = 1.0;
vector_float3 axis = {0.0, 0.0, 1.0};
float scalar = vector_dot(nrpos, axis);
if((1.0f - scalar) < 1e-6)
{
axis.xy = nrpos.yx;
axis = vector_normalize(axis);
}
vector_float3 velocity = vector_cross(position, axis);
velocities[i].xyz = velocity * vscale;
}
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
}
/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
velocityData:(nonnull NSData *)velocityData
forSimulationTime:(CFAbsoluteTime)simulationTime
{
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
assert(_positions[_oldBufferIndex].length == positionData.length);
assert(_velocities[_oldBufferIndex].length == velocityData.length);
memcpy(positions, positionData.bytes, positionData.length);
memcpy(velocities, velocityData.bytes, velocityData.length);
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
_simulationTime = simulationTime;
}
/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Position Update Blit Encoder";
[blitEncoder pushDebugGroup:@"Position Update Blit Commands"];
[blitEncoder copyFromBuffer:buffer
sourceOffset:0
toBuffer:_updateBuffer[_currentBufferIndex]
destinationOffset:0
size:_updateBuffer[_currentBufferIndex].length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
}
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
forSimulationTime:(CFAbsoluteTime)time
{
NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
void *positionDataAddress = NULL;
void *velocityDataAddress = NULL;
// Create buffers to transfer data to client
{
// Use vm allocate to allocate buffer on page aligned address
kern_return_t err;
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&positionDataAddress,
positionDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&velocityDataAddress,
velocityDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
}
// Blit positions and velocities to a buffer for transfer
{
id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
length:positionDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
positionBuffer.label = @"Final Positions Buffer";
id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
velocityBuffer.label = @"Final Velocities Buffer";
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
commandBuffer.label = @"Full Transfer Command Buffer";
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Full Transfer Blits";
[blitEncoder pushDebugGroup:@"Full Position Data Blit"];
[blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
sourceOffset:0
toBuffer:positionBuffer
destinationOffset:0
size:positionBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];
[blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
sourceOffset:0
toBuffer:velocityBuffer
destinationOffset:0
size:velocityBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
[commandBuffer commit];
// Ensure blit of data is complete before providing the data to the client
[commandBuffer waitUntilCompleted];
}
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
{
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
length:positionDataSize
deallocator:deallocProvidedAddress];
NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
deallocator:deallocProvidedAddress];
dataProvider(positionData, velocityData, time);
}
}
你定义释放块(甚至命名函数)类似于它在 Obj-C 中完成的方式,尽管需要一些强制转换。 Obj-C 释放器块在 Swift 中成为以下闭包:
let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(mach_task_self_, vm_offset_t(bitPattern: bytes), vm_size_t(length))
}
然后,您传递.custom(deallocProvidedAddress)
,而不是为Data(bytesNoCopy:count:deallocator)
的deallocator
参数.none
。
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))
let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))
dataProvider(positionData, velocityData, time)
但是,由于您不调用vm_allocate
,而是使用posix_memalign
,因此您需要在deallocProvidedAddress
中调用free
而不是vm_deallocate
:
let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
free(bytes)
}
我怎么知道使用free
? 我自己从未真正使用过posix_memalign
,我只是在终端中做了man posix_memalign
,它说,除其他外:
通过 posix_memalign() 分配的内存可以在后续调用 realloc(3)、reallocf(3) 和 free(3) 时用作参数。
因此free
是释放通过posix_memalign
分配的内存的适当方法
这是我将 Obj-C 版本的provideFullData
翻译成 Swift。 它使用vm_allocate
和vm_deallocate
,因为这就是 Obj-C 版本所做的,但如果您愿意,您可以轻松地将其替换为posix_memalign
和free
:
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}
func makeMTLBuffer(
from bytes: UnsafeMutableRawPointer,
count: Int,
labeled label: String) -> MTLBuffer?
{
guard let buffer = device.makeBuffer(
bytesNoCopy: bytes,
length: count,
options: [.storageModeShared],
deallocator: nil)
else { return nil }
buffer.label = label
return buffer
}
guard let positionDataAddress = vm_alloc(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard let velocityDataAddress = vm_alloc(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
// Blit positions and velocities to a buffer for transfer
guard let positionBuffer = makeMTLBuffer(
from: positionDataAddress,
count: positionDataSize,
labeled: "Final Positions Buffer")
else { fatalError("Failed to allocate positions MTLBuffer") }
guard let velocityBuffer = makeMTLBuffer(
from: velocityDataAddress,
count: velocityDataSize,
labeled: "Final Velocities Buffer")
else { fatalError("Failed to allocate velocities MTLBuffer") }
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
commandBuffer.label = "Full Transfer Command Buffer"
guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
blitEncoder.label = "Full Transfer Blits"
blitEncoder.pushDebugGroup("Full Position Data Blit")
blitEncoder.copy(
from: positions[oldBufferIndex]!,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length
)
blitEncoder.popDebugGroup()
blitEncoder.pushDebugGroup("Full Velocity Data Blit")
blitEncoder.copy(
from: velocities[oldBufferIndex]!,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length
)
blitEncoder.popDebugGroup()
blitEncoder.endEncoding()
commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
// Block to dealloc memory created with vm_allocate
let deallocProvidedAddress =
{ (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}
let positionData = Data(
bytesNoCopy: positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))
let velocityData = Data(
bytesNoCopy: velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))
dataProvider(positionData, velocityData, time)
}
我在这里看到了很多重构的机会(我已经做了一点)。 如果你在"悲伤"的路径中做了fatalError
以外的事情,不要忘记你需要在返回或投掷之前positionDataAddress
和velocityDataAddress
分配。 我至少会重构它,以便每个Data
实例在其成功vm_allocate
/posix_memalign
后立即制作,而不是等到方法的最后,这样,如果出现错误,清理可以自动发生。 我还会将所有 Metal blit 代码提取到它自己的函数中。
重构版本
我原本打算让上面的版本保持原样,但它需要重组,所以我按照上面的建议重构了它,再加上更多。
为了方便起见,我在MTLBlitCommandEncoder
上创建了一个扩展,用于将副本从MTLBuffer
编码为Data
:
fileprivate extension MTLBlitCommandEncoder
{
func encodeCopy(
from src: MTLBuffer,
to dst: MTLBuffer,
dstName: @autoclosure () -> String)
{
#if DEBUG
pushDebugGroup("Full (dstName()) Data Blit")
defer { popDebugGroup() }
#endif
copy(
from: src, sourceOffset: 0,
to: dst, destinationOffset: 0,
size: dst.length
)
}
func encodeCopy(
from src: MTLBuffer,
to dst: inout Data,
dstName: @autoclosure () -> String)
{
dst.withUnsafeMutableBytes
{
guard let buffer = device.makeBuffer(
bytesNoCopy: $0.baseAddress!,
length: $0.count,
options: [.storageModeShared],
deallocator: nil)
else { fatalError("Failed to allocate MTLBuffer for (dstName())") }
#if DEBUG
buffer.label = "(dstName()) Buffer"
#endif
encodeCopy(from: src, to: buffer, dstName: dstName())
}
}
}
我将嵌套函数移动到fileprivate
方法,并从自定义分配器的closure
更改为static
方法,并将其重命名为vm_dealloc
:
fileprivate static func vm_dealloc(
_ bytes: UnsafeMutableRawPointer,
_ length: Int)
{
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}
fileprivate func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}
由于指针无论如何都会存储在Data
的实例中,并且Data
可以自动处理清理,因此我编写vmAllocData(count:)
来分配内存,然后立即将其放入Data
中。 调用代码不再需要担心基础指针。
fileprivate func vmAllocData(count: Int) -> Data?
{
guard let ptr = vm_alloc(count: count) else {
return nil
}
return Data(
bytesNoCopy: ptr,
count: count,
deallocator: .custom(Self.vm_dealloc)
)
}
然后,我将 Metal 代码移动到copy(positionsInto:andVelicitiesInto:)
方法。 有些人会狡辩名称中的"和",因为它说它正在做不止一件事,而且它是......但是,使用相同的MTLBlitCommandEncoder
对位置和速度进行编码复制的效率问题。 所以是的,它做了不止一件事,但另一种选择是单独创建编码器并传递它,这将使 Metal 代码传播得比必要的多一点。 我认为在这种情况下,为了效率和隔离 Metal 代码,可以做不止一件事。 无论如何,此函数使用上述extension
中的encodeCopy
:
fileprivate func copy(
positionsInto positionData: inout Data,
andVelocitiesInto velocityData: inout Data)
{
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
#if DEBUG
commandBuffer.label = "Full Transfer Command Buffer"
#endif
guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
#if DEBUG
blitEncoder.label = "Full Transfer Blits"
#endif
guard let positionSrc = positions[oldBufferIndex] else {
fatalError("positions[(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: positionSrc,
to: &positionData,
dstName: "Positions"
)
guard let velocitySrc = velocities[oldBufferIndex] else {
fatalError("velocities[(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: velocitySrc,
to: &velocityData,
dstName: "Velocity"
)
blitEncoder.endEncoding()
commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()
}
最后provide(fullData:forSimulationTime)
变成:
func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
guard var positionData = vmAllocData(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard var velocityData = vmAllocData(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
copy(positionsInto: &positionData, andVelocitiesInto: &velocityData)
dataProvider(positionData, velocityData, time)
}