可能的重复项:
为什么每个人都说旋转锁更快?
这个问题是关于SpinLock,Monitor和Interlocked的。
我做了 2 个测试来测试Monitor
、SpinLock
和Interlocked
的性能,这些测试让我感到困惑。
我的困惑是SpinLock
到底有多快。根据我的测试SpinLock
比Monitor
慢。但根据一些文件和文章SpinLock
应该提供性能提升。
现在我想知道在哪些情况下SpinLock
性能改进?
您可以在下面找到我执行的测试的一些详细信息:
在第一个测试中,我创建了几个线程(与我拥有的硬件线程一样多)访问同一个共享锁对象,操作时间很短(或根本没有操作:这只是一个测试)。
在第二个测试中,我创建了一个元素数组和几个线程随机访问该数组中的元素。每个元素都包含自己的锁定对象:System.Object
用于Monitor
测试,SpinLock
对象用于SpinLock
测试,至于Interlocked.Increment
,线程使用数组元素内部 int 类型的公共变量来执行Interlocked.Increment
操作。
在每个测试中,对共享区域的访问都是在循环中执行的。每个测试由 3 个例程组成:
- 测试自旋锁
- 测试监视器
- 测试增量.联锁
每次测试都表明SpinLock
比Monitor
慢。因此,自从我执行上述测试以来,困扰我的问题是哪些场景适合SpinLock
给出的性能改进
发布测试代码以提供有关其的详细信息:
(两个测试都是针对 .net 4.5 编译的)
测试 1,线程正在尝试获得对同一共享锁定对象的独占访问权限
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading.Tasks;
using System.Linq;
using System.Globalization;
using System.ComponentModel;
using System.Threading;
using System.Net.Sockets;
using System.Net;
class Program
{
static int _loopsCount = 1000000;
static int _threadsCount = -1;
static ProcessPriorityClass _processPriority = ProcessPriorityClass.RealTime;
static ThreadPriority _threadPriority = ThreadPriority.Highest;
static long _testingVar = 0;
static void Main(string[] args)
{
_threadsCount = Environment.ProcessorCount;
_threadsCount = (_threadsCount == 0) ? 1 : _threadsCount;
Console.WriteLine("Cores/processors count: {0}", Environment.ProcessorCount);
Console.WriteLine("Threads count: {0}", _threadsCount);
Process.GetCurrentProcess().PriorityClass = _processPriority;
TimeSpan tsInterlocked = ExecuteInterlocked();
TimeSpan tsSpinLock = ExecuteSpinLock();
TimeSpan tsMonitor = ExecuteMonitor();
Console.WriteLine("Test with interlocked: {0} msrnTest with SpinLock: {1} msrnTest with Monitor: {2} ms",
tsInterlocked.TotalMilliseconds,
tsSpinLock.TotalMilliseconds,
tsMonitor.TotalMilliseconds);
Console.ReadLine();
}
static TimeSpan ExecuteInterlocked()
{
_testingVar = 0;
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
for (int i = 0; i < threads.Length; i++)
{
threads[i] = new Thread(() =>
{
_startEvent.WaitOne();
for (int j = 0; j < _loopsCount; j++)
{
Interlocked.Increment(ref _testingVar);
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
static SpinLock _spinLock = new SpinLock();
static TimeSpan ExecuteSpinLock()
{
_testingVar = 0;
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
for (int i = 0; i < threads.Length; i++)
{
threads[i] = new Thread(() =>
{
_startEvent.WaitOne();
bool lockTaken;
for (int j = 0; j < _loopsCount; j++)
{
lockTaken = false;
try
{
_spinLock.Enter(ref lockTaken);
_testingVar++;
}
finally
{
if (lockTaken)
{
_spinLock.Exit();
}
}
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
static object _locker = new object();
static TimeSpan ExecuteMonitor()
{
_testingVar = 0;
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
for (int i = 0; i < threads.Length; i++)
{
threads[i] = new Thread(() =>
{
_startEvent.WaitOne();
bool lockTaken;
for (int j = 0; j < _loopsCount; j++)
{
lockTaken = false;
try
{
Monitor.Enter(_locker, ref lockTaken);
_testingVar++;
}
finally
{
if (lockTaken)
{
Monitor.Exit(_locker);
}
}
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
}
测试 2,线程试图获得对数组元素的独占访问权限,这些元素是随机选择的,即以低争用进行测试
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace TestConcurrency
{
class Program
{
static int _loopsCount = 10000000;
static int _threadsCount = -1;
static int _arrayCount = 1000;
static ProcessPriorityClass _processPriority = ProcessPriorityClass.RealTime;
static ThreadPriority _threadPriority = ThreadPriority.Highest;
static void Main(string[] args)
{
_threadsCount = Environment.ProcessorCount;
_threadsCount = (_threadsCount == 0) ? 1 : _threadsCount;
Console.WriteLine("Cores/processors count: {0}", Environment.ProcessorCount);
Console.WriteLine("Threads count: {0}", _threadsCount);
Process.GetCurrentProcess().PriorityClass = _processPriority;
TimeSpan tsInterlocked = ExecuteInterlocked();
TimeSpan tsSpinLock = ExecuteSpinLock();
TimeSpan tsMonitor = ExecuteMonitor();
Console.WriteLine("Test with interlocked: {0} msrnTest with SpinLock: {1} msrnTest with Monitor: {2} ms",
tsInterlocked.TotalMilliseconds,
tsSpinLock.TotalMilliseconds,
tsMonitor.TotalMilliseconds);
Console.ReadLine();
}
static IEnumerable<int> newList()
{
return Enumerable.Range(0, _arrayCount);
}
static TimeSpan ExecuteMonitor()
{
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
var array = newList().Select(i => new ArrayElementForMonitor()).ToArray();
for (int i = 0; i < threads.Length; i++)
{
int localI = i;
threads[i] = new Thread(() =>
{
Random r = new Random(localI * localI * localI);
int index = 0;
_startEvent.WaitOne();
bool lockTaken;
for (int j = 0; j < _loopsCount; j++)
{
index = r.Next(0, _arrayCount);
lockTaken = false;
try
{
Monitor.Enter(array[index].Locker, ref lockTaken);
}
finally
{
if (lockTaken)
{
Monitor.Exit(array[index].Locker);
}
}
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
GC.Collect();
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
static TimeSpan ExecuteSpinLock()
{
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
var array = newList().Select(i => new ArrayElementForSpinLock()).ToArray();
for (int i = 0; i < threads.Length; i++)
{
int localI = i;
threads[i] = new Thread(() =>
{
Random r = new Random(localI * localI * localI);
int index = 0;
_startEvent.WaitOne();
bool lockTaken;
for (int j = 0; j < _loopsCount; j++)
{
index = r.Next(0, _arrayCount);
lockTaken = false;
try
{
array[index].Locker.Enter(ref lockTaken);
}
finally
{
if (lockTaken)
{
array[index].Locker.Exit();
}
}
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
GC.Collect();
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
static TimeSpan ExecuteInterlocked()
{
ManualResetEvent _startEvent = new ManualResetEvent(false);
CountdownEvent _endCountdown = new CountdownEvent(_threadsCount);
Thread[] threads = new Thread[_threadsCount];
var array = newList().Select(i => new ArrayElementInterlocked()).ToArray();
for (int i = 0; i < threads.Length; i++)
{
int localI = i;
threads[i] = new Thread(() =>
{
Random r = new Random(localI * localI * localI);
int index = 0;
_startEvent.WaitOne();
for (int j = 0; j < _loopsCount; j++)
{
index = r.Next(0, _arrayCount);
Interlocked.Increment(ref array[index].Element);
}
_endCountdown.Signal();
});
threads[i].Priority = _threadPriority;
threads[i].Start();
}
GC.Collect();
Stopwatch sw = Stopwatch.StartNew();
_startEvent.Set();
_endCountdown.Wait();
return sw.Elapsed;
}
}
public class ArrayElementForMonitor
{
public object Locker = new object();
}
public class ArrayElementForSpinLock
{
public SpinLock Locker = new SpinLock();
}
public class ArrayElementInterlocked
{
public int Element;
}
}
附加测试 3.测试在单个线程中执行。线程访问锁的几率最高。
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace TestSimpleLocking
{
class Program
{
static int _loopsCount = 100000000;
static ProcessPriorityClass _processPriority = ProcessPriorityClass.RealTime;
static ThreadPriority _threadPriority = ThreadPriority.Highest;
static void Main(string[] args)
{
Process.GetCurrentProcess().PriorityClass = _processPriority;
Thread.CurrentThread.Priority = _threadPriority;
TimeSpan tsInterlocked = ExecuteInterlocked();
TimeSpan tsSpinLock = ExecuteSpinLock();
TimeSpan tsMonitor = ExecuteMonitor();
Console.WriteLine("Test with interlocked: {0} msrnTest with SpinLock: {1} msrnTest with Monitor: {2} ms",
tsInterlocked.TotalMilliseconds,
tsSpinLock.TotalMilliseconds,
tsMonitor.TotalMilliseconds);
Console.ReadLine();
}
static TimeSpan ExecuteMonitor()
{
object locker = new object();
int variable = 0;
Stopwatch sw = Stopwatch.StartNew();
bool lockTaken = false;
for (int i = 0; i < _loopsCount; i++)
{
lockTaken = false;
try
{
Monitor.Enter(locker, ref lockTaken);
variable++;
}
finally
{
if (lockTaken)
{
Monitor.Exit(locker);
}
}
}
sw.Stop();
Console.WriteLine(variable);
return sw.Elapsed;
}
static TimeSpan ExecuteSpinLock()
{
SpinLock spinLock = new SpinLock();
int variable = 0;
Stopwatch sw = Stopwatch.StartNew();
bool lockTaken = false;
for (int i = 0; i < _loopsCount; i++)
{
lockTaken = false;
try
{
spinLock.Enter(ref lockTaken);
variable++;
}
finally
{
if (lockTaken)
{
spinLock.Exit();
}
}
}
sw.Stop();
Console.WriteLine(variable);
return sw.Elapsed;
}
static TimeSpan ExecuteInterlocked()
{
int variable = 0;
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < _loopsCount; i++)
{
Interlocked.Increment(ref variable);
}
sw.Stop();
Console.WriteLine(variable);
return sw.Elapsed;
}
}
}
据我了解,第三次测试是SpinLock
选择的最佳情况。完全没有争执。单线程 - 顺序执行。为什么SpinLock
还远远落后于Monitor
?任何人都可以指出一些代码来证明我SpinLock
是有用的(设备驱动程序开发除外)?
如果资源的争用较低(即,当资源的锁定几乎总是成功时),SpinLock 会非常快。参考资料:乔·达菲(Joe Duffy)的书籍和博客 http://www.bluebytesoftware.com/blog/
在每个测试中,对共享区域的访问都是在循环中执行
的
_could_mean这种争论很高;(顺便说一句,你能发布一个完整的代码示例吗?这将有助于并减少所需的"猜测")。因此,旋转锁很可能旋转,然后等待 - 使其比直接等待的监视器更糟糕。
编辑:阅读有关已关闭的相关问题的详细信息后:我完全同意汉斯·帕桑特的回答:
所以基本要求是锁保持很短的时间,这在您的情况下是正确的。并且有合理的几率可以获得锁。在您的情况下并非如此,锁受到不少于24个线程的严重争议。
盲目地使用 SpinLock,而不测量和/或至少不了解其设计背后的原理,是一种过早优化的情况,可能会很快运行到实际上更慢甚至不正确的代码中:请记住,一些同步结构保证公平和/或进度,而其他则不能; 当大量访问是只读的时,有些效果更好, 有些当争用较低时,....在这种情况下,公平可能是相关的。
只是另一个快速的,未经测试的假设:我更惊讶的是InterlockedIncrement
比监视器慢或等于。这让我想到了缓存一致性问题;毕竟,当写入争用很少时,Interlock 也效果最好,因为它是在目标变量上使用原子 CAS 操作实现的。在像您这样的写入密集型场景中,它将需要大量的重试,结束每次重试可能会在核心间总线上生成大量流量,以保持缓存一致性。使用监视器可以以某种方式更好地"序列化"访问,减少内核间/进程间总线上的流量。但这一切都只是猜测:)