如何在 Swift 中为 mem_align 分配的内存指定释放器?

我正在用memory_align创建分页对齐的内存，然后我从中创建没有副本的MTLBuffer。然后，GPU 将数据传送到该 MTLBuffer 中。完成后，我将相同的内存包装在Data.init(bytesNoCopy：count：deallocator：)中，以便在我的项目中传递。我不知道用什么作为分配器。我从用 OBJ-C 编写的 Apple 教程中翻译了这段代码。苹果代码在这里。我花了两天时间试图自己研究这一点。

Apple OBJ-C 代码分配器如下所示。这超出了我的 OBJ-C 知识。

// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};

有问题的代码位于我的列表末尾。

// Blit all positions and velocities and provide them to the client either to show final results
// or continue the simulation on another device
func provideFullData(
_ dataProvider: AAPLFullDatasetProvider,
forSimulationTime time: CFAbsoluteTime
) {
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
var positionDataAddress: UnsafeMutableRawPointer? = nil
var velocityDataAddress: UnsafeMutableRawPointer? = nil
// Create buffers to transfer data to client
do {

// allocate memory on page aligned addresses use by both GPU and CPU
let alignment = 0x4000

// make length a mulitple of alignment
let positionAllocationSize = (positionDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, positionAllocationSize)

let velocityAllocationSize = (velocityDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}
// Blit positions and velocities to a buffer for transfer
do {
// create MTL buffers with created mem allighed
let positionBuffer = device.makeBuffer(
bytesNoCopy: &positionDataAddress,
length: positionDataSize,
options: .storageModeShared,
deallocator: nil)

positionBuffer?.label = "Final Positions Buffer"

let velocityBuffer = device.makeBuffer(
bytesNoCopy: &velocityDataAddress,
length: velocityDataSize,
options: .storageModeShared,
deallocator: nil)

velocityBuffer?.label = "Final Velocities Buffer"

let commandBuffer = commandQueue?.makeCommandBuffer()
commandBuffer?.label = "Full Transfer Command Buffer"

let blitEncoder = commandBuffer?.makeBlitCommandEncoder()

blitEncoder?.label = "Full Transfer Blits"

blitEncoder?.pushDebugGroup("Full Position Data Blit")

if let _position = positions[oldBufferIndex], let positionBuffer {
blitEncoder?.copy(
from: _position,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length)
}

blitEncoder?.popDebugGroup()

blitEncoder?.pushDebugGroup("Full Velocity Data Blit")

if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
blitEncoder?.copy(
from: _velocity,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length)
}

blitEncoder?.popDebugGroup()

blitEncoder?.endEncoding()

commandBuffer?.commit()

// Ensure blit of data is complete before providing
// the data to the client
commandBuffer?.waitUntilCompleted()
}
// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
//this code was in obj-c I don'tlnow how to convert this to swift
// Block to dealloc memory created with vm_allocate
// let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
// { bytes, length in
// vm_deallocate(
// mach_task_self() as? vm_map_t,
// bytes as? vm_address_t,
// length)
// }
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .none) // this may be a memory leak

let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .none) // this may be a memory leak

dataProvider(positionData, velocityData, time)
}

}

这是苹果OBJ-C代码的列表

// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
const float pscale = _config->clusterScale;
const float vscale = _config->velocityScale * pscale;
const float inner  = 2.5f * pscale;
const float outer  = 4.0f * pscale;
const float length = outer - inner;
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
for(int i = 0; i < _config->numBodies; i++)
{
vector_float3 nrpos    = generate_random_normalized_vector(-1.0, 1.0, 1.0);
vector_float3 rpos     = generate_random_vector(0.0, 1.0);
vector_float3 position = nrpos * (inner + (length * rpos));
positions[i].xyz = position;
positions[i].w = 1.0;
vector_float3 axis = {0.0, 0.0, 1.0};
float scalar = vector_dot(nrpos, axis);
if((1.0f - scalar) < 1e-6)
{
axis.xy = nrpos.yx;
axis = vector_normalize(axis);
}
vector_float3 velocity = vector_cross(position, axis);
velocities[i].xyz = velocity * vscale;
}
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
}
/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
velocityData:(nonnull NSData *)velocityData
forSimulationTime:(CFAbsoluteTime)simulationTime
{
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
assert(_positions[_oldBufferIndex].length == positionData.length);
assert(_velocities[_oldBufferIndex].length == velocityData.length);
memcpy(positions, positionData.bytes, positionData.length);
memcpy(velocities, velocityData.bytes, velocityData.length);
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
_simulationTime = simulationTime;
}
/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Position Update Blit Encoder";
[blitEncoder pushDebugGroup:@"Position Update Blit Commands"];
[blitEncoder copyFromBuffer:buffer
sourceOffset:0
toBuffer:_updateBuffer[_currentBufferIndex]
destinationOffset:0
size:_updateBuffer[_currentBufferIndex].length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
}
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
forSimulationTime:(CFAbsoluteTime)time
{
NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
void *positionDataAddress = NULL;
void *velocityDataAddress = NULL;
// Create buffers to transfer data to client
{
// Use vm allocate to allocate buffer on page aligned address
kern_return_t err;

err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&positionDataAddress,
positionDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&velocityDataAddress,
velocityDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
}
// Blit positions and velocities to a buffer for transfer
{
id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
length:positionDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
positionBuffer.label = @"Final Positions Buffer";
id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
velocityBuffer.label = @"Final Velocities Buffer";
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
commandBuffer.label = @"Full Transfer Command Buffer";
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Full Transfer Blits";
[blitEncoder pushDebugGroup:@"Full Position Data Blit"];
[blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
sourceOffset:0
toBuffer:positionBuffer
destinationOffset:0
size:positionBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];
[blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
sourceOffset:0
toBuffer:velocityBuffer
destinationOffset:0
size:velocityBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
[commandBuffer commit];
// Ensure blit of data is complete before providing the data to the client
[commandBuffer waitUntilCompleted];
}
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
{
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
length:positionDataSize
deallocator:deallocProvidedAddress];
NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
deallocator:deallocProvidedAddress];
dataProvider(positionData, velocityData, time);
}
}

你定义释放块(甚至命名函数)类似于它在 Obj-C 中完成的方式，尽管需要一些强制转换。 Obj-C 释放器块在 Swift 中成为以下闭包：

let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(mach_task_self_, vm_offset_t(bitPattern: bytes), vm_size_t(length))
}

然后，您传递.custom(deallocProvidedAddress)，而不是为Data(bytesNoCopy:count:deallocator)的deallocator参数.none。

let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))

let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))

dataProvider(positionData, velocityData, time)

但是，由于您不调用vm_allocate，而是使用posix_memalign，因此您需要在deallocProvidedAddress中调用free而不是vm_deallocate：

let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
free(bytes)
}

我怎么知道使用free？我自己从未真正使用过posix_memalign，我只是在终端中做了man posix_memalign，它说，除其他外：

通过 posix_memalign() 分配的内存可以在后续调用 realloc(3)、reallocf(3) 和 free(3) 时用作参数。

因此free是释放通过posix_memalign分配的内存的适当方法

这是我将 Obj-C 版本的provideFullData翻译成 Swift。它使用vm_allocate和vm_deallocate，因为这就是 Obj-C 版本所做的，但如果您愿意，您可以轻松地将其替换为posix_memalign和free：

/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length

func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}

func makeMTLBuffer(
from bytes: UnsafeMutableRawPointer,
count: Int,
labeled label: String) -> MTLBuffer?
{
guard let buffer = device.makeBuffer(
bytesNoCopy: bytes,
length: count,
options: [.storageModeShared],
deallocator: nil)
else { return nil }

buffer.label = label
return buffer
}

guard let positionDataAddress = vm_alloc(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard let velocityDataAddress = vm_alloc(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
// Blit positions and velocities to a buffer for transfer
guard let positionBuffer = makeMTLBuffer(
from: positionDataAddress,
count: positionDataSize,
labeled: "Final Positions Buffer")
else { fatalError("Failed to allocate positions MTLBuffer") }

guard let velocityBuffer = makeMTLBuffer(
from: velocityDataAddress,
count: velocityDataSize,
labeled: "Final Velocities Buffer")
else { fatalError("Failed to allocate velocities MTLBuffer") }

guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
commandBuffer.label = "Full Transfer Command Buffer"

guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
blitEncoder.label = "Full Transfer Blits"
blitEncoder.pushDebugGroup("Full Position Data Blit")
blitEncoder.copy(
from: positions[oldBufferIndex]!,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length
)
blitEncoder.popDebugGroup()
blitEncoder.pushDebugGroup("Full Velocity Data Blit")
blitEncoder.copy(
from: velocities[oldBufferIndex]!,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length
)
blitEncoder.popDebugGroup()

blitEncoder.endEncoding()

commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()

// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
// Block to dealloc memory created with vm_allocate
let deallocProvidedAddress =
{ (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}
let positionData = Data(
bytesNoCopy: positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))

let velocityData = Data(
bytesNoCopy: velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))

dataProvider(positionData, velocityData, time)
}

我在这里看到了很多重构的机会(我已经做了一点)。如果你在"悲伤"的路径中做了fatalError以外的事情，不要忘记你需要在返回或投掷之前positionDataAddress和velocityDataAddress分配。我至少会重构它，以便每个Data实例在其成功vm_allocate/posix_memalign后立即制作，而不是等到方法的最后，这样，如果出现错误，清理可以自动发生。我还会将所有 Metal blit 代码提取到它自己的函数中。

重构版本

我原本打算让上面的版本保持原样，但它需要重组，所以我按照上面的建议重构了它，再加上更多。

为了方便起见，我在MTLBlitCommandEncoder上创建了一个扩展，用于将副本从MTLBuffer编码为Data：

fileprivate extension MTLBlitCommandEncoder
{
func encodeCopy(
from src: MTLBuffer,
to dst: MTLBuffer,
dstName: @autoclosure () -> String)
{
#if DEBUG
pushDebugGroup("Full (dstName()) Data Blit")
defer { popDebugGroup() }
#endif

copy(
from: src, sourceOffset: 0,
to: dst, destinationOffset: 0,
size: dst.length
)
}

func encodeCopy(
from src: MTLBuffer,
to dst: inout Data,
dstName: @autoclosure () -> String)
{
dst.withUnsafeMutableBytes
{
guard let buffer = device.makeBuffer(
bytesNoCopy: $0.baseAddress!,
length: $0.count,
options: [.storageModeShared],
deallocator: nil)
else { fatalError("Failed to allocate MTLBuffer for (dstName())") }

#if DEBUG
buffer.label = "(dstName()) Buffer"
#endif

encodeCopy(from: src, to: buffer, dstName: dstName())
}
}
}

我将嵌套函数移动到fileprivate方法，并从自定义分配器的closure更改为static方法，并将其重命名为vm_dealloc：

fileprivate static func vm_dealloc(
_ bytes: UnsafeMutableRawPointer,
_ length: Int)
{
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}

fileprivate func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}

由于指针无论如何都会存储在Data的实例中，并且Data可以自动处理清理，因此我编写vmAllocData(count:)来分配内存，然后立即将其放入Data中。调用代码不再需要担心基础指针。

fileprivate func vmAllocData(count: Int) -> Data?
{
guard let ptr = vm_alloc(count: count) else {
return nil
}

return Data(
bytesNoCopy: ptr,
count: count,
deallocator: .custom(Self.vm_dealloc)
)
}

然后，我将 Metal 代码移动到copy(positionsInto:andVelicitiesInto:)方法。有些人会狡辩名称中的"和"，因为它说它正在做不止一件事，而且它是......但是，使用相同的MTLBlitCommandEncoder对位置和速度进行编码复制的效率问题。所以是的，它做了不止一件事，但另一种选择是单独创建编码器并传递它，这将使 Metal 代码传播得比必要的多一点。我认为在这种情况下，为了效率和隔离 Metal 代码，可以做不止一件事。无论如何，此函数使用上述extension中的encodeCopy：

fileprivate func copy(
positionsInto positionData: inout Data,
andVelocitiesInto velocityData: inout Data)
{
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
#if DEBUG
commandBuffer.label = "Full Transfer Command Buffer"
#endif

guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
#if DEBUG
blitEncoder.label = "Full Transfer Blits"
#endif

guard let positionSrc = positions[oldBufferIndex] else {
fatalError("positions[(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: positionSrc,
to: &positionData,
dstName: "Positions"
)

guard let velocitySrc = velocities[oldBufferIndex] else {
fatalError("velocities[(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: velocitySrc,
to: &velocityData,
dstName: "Velocity"
)

blitEncoder.endEncoding()

commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()
}

最后provide(fullData:forSimulationTime)变成：

func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length

guard var positionData = vmAllocData(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard var velocityData = vmAllocData(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
copy(positionsInto: &positionData, andVelocitiesInto: &velocityData)
dataProvider(positionData, velocityData, time)
}

重构版本

相关内容

最新更新

热门标签：