我实现了以下快速选择算法来实现中位数选择(更一般是第 k 个最小数(O(n)
复杂性:
static size_t partition(struct point **points_ptr, size_t points_size, size_t pivot_idx)
{
const double pivot_value = points_ptr[pivot_idx]->distance;
/* Move pivot to the end. */
SWAP(points_ptr[pivot_idx], points_ptr[points_size - 1], struct point *);
/* Perform the element moving. */
size_t border_idx = 0;
for (size_t i = 0; i < points_size - 1; ++i) {
if (points_ptr[i]->distance < pivot_value) {
SWAP(points_ptr[border_idx], points_ptr[i], struct point *);
border_idx++;
}
}
/* Move pivot to act as a border element. */
SWAP(points_ptr[border_idx], points_ptr[points_size - 1], struct point *);
return border_idx;
}
static struct point * qselect(struct point **points_ptr, size_t points_size, size_t k)
{
const size_t pivot_idx = partition(points_ptr, points_size, rand() % points_size);
if (k == pivot_idx) { //k lies on the same place as a pivot
return points_ptr[pivot_idx];
} else if (k < pivot_idx) { //k lies on the left of the pivot
//points_ptr remains the same
points_size = pivot_idx;
//k remains the same
} else { //k lies on the right of the pivot
points_ptr += pivot_idx + 1;
points_size -= pivot_idx + 1;
k -= pivot_idx + 1;
}
return qselect(points_ptr, points_size, k);
}
然后我试图将它与 glibc 的qsort()
与O(nlog(n))
进行比较,并对其卓越的性能感到惊讶。以下是测量代码:
double wtime;
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
wtime -= omp_get_wtime();
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_distance);
wtime += omp_get_wtime();
}
printf("qsort took %fn", wtime);
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
wtime -= omp_get_wtime();
qselect(points_ptr, points_size, points_size / 2);
wtime += omp_get_wtime();
}
printf("qselect took %fn", wtime);
结果类似于 qsort took 0.280432
,qselect took 8.516676
个 10000 个元素的数组。为什么快速排序比快速选择快?
第一个明显的答案是:也许 qsort 没有实现快速排序。自从我阅读标准以来已经有一段时间了,但我认为没有任何要求 qsort(( 执行快速排序。
第二:现有的C标准库通常经过大量优化(例如,在可用的情况下使用特殊的汇编指令(。结合现代CPU的复杂性能特征,这很可能导致O(n log n(算法比O(n(算法更快 - 快速排序不是。
我的猜测是你正在搞砸缓存 - 瓦尔格林德/缓存磨盘灵魂能够告诉你的事情。
感谢您的建议,伙计们,我实现快速选择的问题在于,对于包含许多重复元素的输入,它表现出最坏情况的复杂性O(n^2)
,这是我的情况。Glibc 的qsort()
(默认情况下使用 mergesort(在这里不显示O(n^2)
。
我已经修改了我的 partition()
函数以执行基本的 3 向分区和三向中位数,这非常适合快速选择:
/** breif Quicksort's partition procedure.
*
* In linear time, partition a list into three parts: less than, greater than
* and equals to the pivot, for example input 3 2 7 4 5 1 4 1 will be
* partitioned into 3 2 1 1 | 5 7 | 4 4 4 where 4 is the pivot.
* Modified version of the median-of-three strategy is implemented, it ends with
* a median at the end of an array (this saves us one or two swaps).
*/
static void partition(struct point **points_ptr, size_t points_size,
size_t *less_size, size_t *equal_size)
{
/* Modified median-of-three and pivot selection. */
struct point **first_ptr = points_ptr;
struct point **middle_ptr = points_ptr + (points_size / 2);
struct point **last_ptr = points_ptr + (points_size - 1);
if ((*first_ptr)->distance > (*last_ptr)->distance) {
SWAP(*first_ptr, *last_ptr, struct point *);
}
if ((*first_ptr)->distance > (*middle_ptr)->distance) {
SWAP(*first_ptr, *middle_ptr, struct point *);
}
if ((*last_ptr)->distance > (*middle_ptr)->distance) { //reversed
SWAP(*last_ptr, *middle_ptr, struct point *);
}
const double pivot_value = (*last_ptr)->distance;
/* Element swapping. */
size_t greater_idx = 0;
size_t equal_idx = points_size - 1;
size_t i = 0;
while (i < equal_idx) {
const double elem_value = points_ptr[i]->distance;
if (elem_value < pivot_value) {
SWAP(points_ptr[greater_idx], points_ptr[i], struct point *);
greater_idx++;
i++;
} else if (elem_value == pivot_value) {
equal_idx--;
SWAP(points_ptr[i], points_ptr[equal_idx], struct point *);
} else { //elem_value > pivot_value
i++;
}
}
*less_size = greater_idx;
*equal_size = points_size - equal_idx;
}
/** A selection algorithm to find the kth smallest element in an unordered list.
*/
static struct point * qselect(struct point **points_ptr, size_t points_size,
size_t k)
{
size_t less_size;
size_t equal_size;
partition(points_ptr, points_size, &less_size, &equal_size);
if (k < less_size) { //k lies in the less-than-pivot partition
points_size = less_size;
} else if (k < less_size + equal_size) { //k lies in the equals-to-pivot partition
return points_ptr[points_size - 1];
} else { //k lies in the greater-than-pivot partition
points_ptr += less_size;
points_size -= less_size + equal_size;
k -= less_size + equal_size;
}
return qselect(points_ptr, points_size, k);
}
结果确实是线性的,并且比qsort()
更好(我按照@IVlad的建议使用了费舍尔-耶茨洗牌,所以绝对qsort()
时间更差(:
array size qsort qselect speedup
1000 0.044678 0.008671 5.152328
5000 0.248413 0.045899 5.412160
10000 0.551095 0.096064 5.736730
20000 1.134857 0.191933 5.912773
30000 2.169177 0.278726 7.782467