用C语言实现SHLD/SHRD指令



我正在尝试在不使用内联汇编的情况下高效地实现x86SHLDSHRD指令。

uint32_t shld_UB_on_0(uint32_t a, uint32_t b, uint32_t c) {
return a << c | b >> 32 - c;
}

似乎有效,但在c == 0时调用未定义行为,因为第二个移位的操作数变为32。第三个操作数为0的实际SHLD指令被很好地定义为不执行任何操作。(https://www.felixcloutier.com/x86/shld)

uint32_t shld_broken_on_0(uint32_t a, uint32_t b, uint32_t c) {
return a << c | b >> (-c & 31);
}

不调用未定义的行为,但当c == 0时,结果是a | b而不是a

uint32_t shld_safe(uint32_t a, uint32_t b, uint32_t c) {
if (c == 0) return a;
return a << c | b >> 32 - c;
}

执行预期,但gcc现在放置了je。CCD_ 13足够聪明,可以将其转换为单个CCD_。

有没有任何方法可以在没有内联汇编的情况下正确有效地实现它?

为什么gcc如此努力不放shldshld_safe的尝试被gcc11.2-O3翻译为(Godbolt(:

shld_safe:
mov     eax, edi
test    edx, edx
je      .L1
mov     ecx, 32
sub     ecx, edx
shr     esi, cl
mov     ecx, edx
sal     eax, cl
or      eax, esi
.L1:
ret

而clang是这样做的,

shld_safe:
mov     ecx, edx
mov     eax, edi
shld    eax, esi, cl
ret

就我使用gcc 9.3(x86-64(进行的测试而言,它将以下代码转换为shldqshrdq

uint64_t shldq_x64(uint64_t low, uint64_t high, uint64_t count) {
return (uint64_t)(((((unsigned __int128)high << 64) | (unsigned __int128)low) << (count & 63)) >> 64);
}
uint64_t shrdq_x64(uint64_t low, uint64_t high, uint64_t count) {
return (uint64_t)((((unsigned __int128)high << 64) | (unsigned __int128)low) >> (count & 63));
}

此外,gcc -m32 -O3将以下代码翻译为shldshrd。(不过,我还没有用gcc(i386(进行测试。(

uint32_t shld_x86(uint32_t low, uint32_t high, uint32_t count) {
return (uint32_t)(((((uint64_t)high << 32) | (uint64_t)low) << (count & 31)) >> 32);
}
uint32_t shrd_x86(uint32_t low, uint32_t high, uint32_t count) {
return (uint32_t)((((uint64_t)high << 32) | (uint64_t)low) >> (count & 31));
}

(我刚刚阅读了gcc代码并编写了上述函数,即我不确定它们是否是您期望的函数。(

最新更新