为什么我的内存管理器和我的内存池实现如此缓慢?



出于学习目的,我尝试实现MemoryManager和MemoryPool,并尝试它如何与标准实现竞争。但尤其是我的内存管理器非常慢。有人可以指出我的方向,顺便说一句,这里发生了什么问题吗?

我的记忆池:

internal abstract class ByteMemoryPool : MemoryPool<byte>
{
private const int POOL_USAGE_BORDER_BYTES = 85000;
public override int MaxBufferSize => Int32.MaxValue;
public new static ByteMemoryPool.Impl Shared { get; } = new ByteMemoryPool.Impl();
public override IMemoryOwner<byte> Rent(int minBufferSize = -1)
{
return RentCore(minBufferSize);
}
protected override void Dispose(bool disposing)
{
}
private Rental RentCore(int minBufferSize)
{
return new Rental(minBufferSize);
}
public sealed class Impl : ByteMemoryPool
{
public new Rental Rent(int minBufferSize) => RentCore(minBufferSize);
}
public struct Rental : IMemoryOwner<byte>
{
private byte[]? _array;
private readonly bool _notRented;
public Rental(int minBufferSize)
{
if (minBufferSize < POOL_USAGE_BORDER_BYTES)
{
_array = new byte[minBufferSize];
_notRented = true;
}
else
{
_array = ArrayPool<byte>.Shared.Rent(minBufferSize);
_notRented = false;
}
}
public Memory<byte> Memory
{
get
{
if (_array == null)
throw new ObjectDisposedException(nameof(_array));
return new Memory<byte>(_array);
}
}
public void Dispose()
{
if (_array != null && !_notRented)
{
ArrayPool<byte>.Shared.Return(_array, true);
_array = null;
}
else
{
_array = null;
}
}
}
}

我的内存管理器:

internal sealed class NativeByteMemoryManager : MemoryManager<byte>
{
private IntPtr _memoryPtr;
private readonly int _length;
public unsafe NativeByteMemoryManager(int length)
{
_length = length;
_memoryPtr = Marshal.AllocHGlobal(length);
Unsafe.InitBlock((void*)_memoryPtr, 0, (uint)_length);
}
public override Memory<byte> Memory => CreateMemory(_length);
public override unsafe Span<byte> GetSpan()
{
return new Span<byte>(_memoryPtr.ToPointer(), _length);
}
public override unsafe MemoryHandle Pin(int elementIndex = 0)
{
void* pointer = (void*) ((byte*) _memoryPtr + elementIndex);
return new MemoryHandle(pointer, default, this);
}
public override void Unpin()
{
Marshal.FreeHGlobal(_memoryPtr);
_memoryPtr = IntPtr.Zero;
}
protected override void Dispose(bool disposing)
{
if (_memoryPtr != IntPtr.Zero)
{
Marshal.FreeHGlobal(_memoryPtr);
_memoryPtr = IntPtr.Zero;
}
}
}

基准:

public class MemoryManagerBenchmark
{
[Params(1000, 8000, 64000, 4000000)]
[System.Diagnostics.CodeAnalysis.SuppressMessage("Design", "CA1051:Do not declare visible instance fields", Justification = "<Pending>")]
public int ArraySize;
[Benchmark(Baseline = true)]
public int MemoryPoolDefault()
{
var x = ArrayPool<byte>.Shared.Rent(ArraySize);
var l = x.Length;
ArrayPool<byte>.Shared.Return(x, true);
return l;
}
[Benchmark(Baseline = false)]
public int MemoryPoolByte()
{
using var x = ByteMemoryPool.Shared.Rent(ArraySize);
var l = x.Memory.Length;
return l;
}
[Benchmark(Baseline = false)]
public int MemoryManager()
{
using var x = new NativeByteMemoryManager(ArraySize);
var l = x.Memory.Length;
return l;
}
}

结果如下:

BenchmarkDotNet=v0.11.5, OS=Windows 10.0.17763.107 (1809/October2018Update/Redstone5)
Intel Core i7-2600 CPU 3.40GHz (Sandy Bridge), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.0.100
[Host]     : .NET Core 3.0.0 (CoreCLR 4.700.19.46205, CoreFX 4.700.19.46214), 64bit RyuJIT
Job-MXYBLG : .NET Core 3.0.0 (CoreCLR 4.700.19.46205, CoreFX 4.700.19.46214), 64bit RyuJIT
Force=False  IterationCount=15  LaunchCount=2  
WarmupCount=10  
Method | ArraySize |           Mean |         Error |        StdDev |          Median | Kurtosis | Skewness | Ratio | RatioSD | Rank | Baseline |    Gen 0 |    Gen 1 |    Gen 2 | Allocated |
------------------ |---------- |---------------:|--------------:|--------------:|----------------:|---------:|---------:|------:|--------:|-----:|--------- |---------:|---------:|---------:|----------:|
**MemoryManager** |      **1000** |       **187.3 ns** |     **28.466 ns** |     **41.726 ns** |       **168.24 ns** |    **1.210** |   **0.1370** |  **1.88** |    **0.45** |    **3** |       **No** |   **0.0076** |        **-** |        **-** |      **32 B** |
**MemoryPoolByte** |      **1000** |       **123.1 ns** |      **6.342 ns** |      **9.492 ns** |       **121.44 ns** |    **1.730** |   **0.4267** |  **1.24** |    **0.14** |    **2** |       **No** |   **0.2447** |        **-** |        **-** |    **1024 B** |
**MemoryPoolDefault** |      **1000** |       **100.2 ns** |      **5.226 ns** |      **7.821 ns** |        **97.50 ns** |    **2.284** |   **0.6929** |  **1.00** |    **0.00** |    **1** |      **Yes** |        **-** |        **-** |        **-** |         **-** |
|           |                |               |               |                 |          |          |       |         |      |          |          |          |          |           |
**MemoryManager** |      **8000** |       **374.1 ns** |     **25.279 ns** |     **37.054 ns** |       **349.88 ns** |    **1.264** |   **0.2485** |  **1.54** |    **0.22** |    **2** |       **No** |   **0.0076** |        **-** |        **-** |      **32 B** |
**MemoryPoolByte** |      **8000** |       **842.4 ns** |     **12.637 ns** |     **18.523 ns** |       **839.46 ns** |    **2.485** |   **0.7287** |  **3.46** |    **0.26** |    **3** |       **No** |   **1.9150** |        **-** |        **-** |    **8024 B** |
**MemoryPoolDefault** |      **8000** |       **245.1 ns** |     **12.542 ns** |     **17.988 ns** |       **236.31 ns** |    **5.935** |   **1.9246** |  **1.00** |    **0.00** |    **1** |      **Yes** |        **-** |        **-** |        **-** |         **-** |
|           |                |               |               |                 |          |          |       |         |      |          |          |          |          |           |
**MemoryManager** |     **64000** |     **2,311.8 ns** |     **87.763 ns** |    **131.359 ns** |     **2,266.83 ns** |    **2.146** |   **0.6641** |  **1.06** |    **0.06** |    **2** |       **No** |   **0.0076** |        **-** |        **-** |      **32 B** |
**MemoryPoolByte** |     **64000** |     **5,351.5 ns** |     **82.720 ns** |    **118.634 ns** |     **5,298.23 ns** |    **4.749** |   **1.5884** |  **2.46** |    **0.14** |    **3** |       **No** |  **15.1443** |        **-** |        **-** |   **64024 B** |
**MemoryPoolDefault** |     **64000** |     **2,187.6 ns** |     **83.603 ns** |    **125.133 ns** |     **2,102.50 ns** |    **2.154** |   **0.9189** |  **1.00** |    **0.00** |    **1** |      **Yes** |        **-** |        **-** |        **-** |         **-** |
|           |                |               |               |                 |          |          |       |         |      |          |          |          |          |           |
**MemoryManager** |   **4000000** | **2,188,789.3 ns** | **65,843.021 ns** | **98,550.733 ns** | **2,165,661.52 ns** |    **4.130** |   **1.3955** | **10.78** |    **0.72** |    **2** |       **No** |        **-** |        **-** |        **-** |      **32 B** |
**MemoryPoolByte** |   **4000000** |   **199,434.5 ns** |  **2,634.057 ns** |  **3,777.686 ns** |   **198,360.50 ns** |    **3.567** |   **0.9854** |  **0.98** |    **0.04** |    **1** |       **No** | **999.7559** | **999.7559** | **999.7559** |         **-** |
**MemoryPoolDefault** |   **4000000** |   **203,299.8 ns** |  **3,986.979 ns** |  **5,967.523 ns** |   **201,993.74 ns** |    **3.340** |   **0.7295** |  **1.00** |    **0.00** |    **1** |      **Yes** | **999.7559** | **999.7559** | **999.7559** |         **-** |

基准测试工具只能告诉您运行给定代码需要多长时间。 探查器可以告诉您为什么需要这么长时间。因此,您需要分析代码以找出答案。

我已经在我的博客上描述了使用BenchmarkDotNet和PerfView进行的示例性能调查,这对您来说可能是一个很好的起点:https://adamsitnik.com/Sample-Perf-Investigation/

如果 PerfView 不符合您的要求,您可以尝试其他探查器:https://github.com/dotnet/performance/blob/master/docs/profiling-workflow-dotnet-runtime.md

相关内容

  • 没有找到相关文章

最新更新