我是SSIM.js和jest图像快照的活跃作者和维护者。目前,我正在优化我们的图像处理实现,以利用WebAssembly来提高性能。
现在,我注意到生成的代码添加了来自llvm程序集(webassembly text?(输出透视图以及Node.js实际程序集输出(--print wasm代码(的不必要指令。特别值得注意的是,它在加载常量时会做一些非常奇怪的事情。例如,在下面的三段代码中,查看名为multiplier的数组或常量rounder。在GCC上,乘数将存储在要加载一次或转换为整数的程序集的.rodata部分中,rounder与movd或movq内联。在这里,它似乎是在循环的每一轮上插入值。它也在用vpblendw做一些我完全不知道的事情。
我该如何解决这个问题?
alignas(64) const static uint16_t multiplierArray[8]= {77,150,29,1,77,160,29,1};
extern "C"
int rgba2y(void* inputDataBuffer, ptrdiff_t length) {
typedef __u8x16 v8x16;
typedef __u16x8 v16x8;
v8x16* pInputPtr = (v8x16*) inputDataBuffer;
v8x16* pInputPtrEnd = (v8x16*)((uint8_t*)inputDataBuffer + length);
v8x16* pOutputPtr = (v8x16*) inputDataBuffer;
__m128i rounder = _mm_cvtsi32_si128(0x80808080);
v8x16 zero;
zero ^= zero;
__m128i multiplier = *((__m128i*)multiplierArray);
// v16x8 multiplier = wasm_i64x2_splat(0x1001d0096004d);
unsigned i = 0;
for (; (i+4)*sizeof(__m128i)<= length; i+= 4) {
v8x16 iv0 = wasm_v8x16_shuffle(pInputPtr[i/4],rounder,0,1,2,16,4,5,6,16,8,9,10,16,12,13,14,16);
v8x16 iv1 = wasm_v8x16_shuffle(pInputPtr[i/4+1],rounder,0,1,2,16,4,5,6,16,8,9,10,16,12,13,14,16);
v8x16 iv2 = wasm_v8x16_shuffle(pInputPtr[i/4+2],rounder,0,1,2,16,4,5,6,16,8,9,10,16,12,13,14,16);
v8x16 iv3 = wasm_v8x16_shuffle(pInputPtr[i/4+3],rounder,0,1,2,16,4,5,6,16,8,9,10,16,12,13,14,16);
// rg ba rg ba rg ba rg ba rg ba rg ba rg ba
__m128i rg0 = _mm_hadd_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8((__m128i)iv0, (__m128i)zero),(__m128i)multiplier), _mm_mullo_epi16(_mm_unpackhi_epi8((__m128i)iv0,(__m128i)zero),(__m128i)multiplier));
__m128i rg1 = _mm_hadd_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8((__m128i)iv1,(__m128i)zero),(__m128i)multiplier), _mm_mullo_epi16(_mm_unpackhi_epi8((__m128i)iv1,(__m128i)zero),(__m128i)multiplier));
__m128i rg2 = _mm_hadd_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8((__m128i)iv2,(__m128i)zero),(__m128i)multiplier), _mm_mullo_epi16(_mm_unpackhi_epi8((__m128i)iv2,(__m128i)zero),(__m128i)multiplier));
__m128i rg3 = _mm_hadd_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8((__m128i)iv3,(__m128i)zero),(__m128i)multiplier), _mm_mullo_epi16(_mm_unpackhi_epi8((__m128i)iv3,(__m128i)zero),(__m128i)multiplier));
// rgba rgba rgba rgba rgba rgba rgba rgba
__m128i rgba0 = wasm_u16x8_shr(_mm_hadd_epi16(rg0,rg1), 8);
__m128i rgba1 = wasm_u16x8_shr(_mm_hadd_epi16(rg2,rg3), 8);
pOutputPtr[i/4] = wasm_u8x16_narrow_i16x8(rgba0,rgba1);
}
// abbreviated...
return 0;
}
llvm程序集是:
.section .text.rgba2y,"",@
.hidden rgba2y # -- Begin function rgba2y
.globl rgba2y
.type rgba2y,@function
rgba2y: # @rgba2y
.Lfunc_begin0:
.loc 2 56 0 # rgb2y-sample.cpp:56:0
.functype rgba2y (i32, i32) -> (i32)
.local i32, i32, v128, v128, v128, v128, v128, v128
# %bb.0: # %entry
#DEBUG_VALUE: rgba2y:length <- %4
#DEBUG_VALUE: rgba2y:pInputPtrEnd <- undef
#DEBUG_VALUE: rgba2y:i <- 0
#DEBUG_VALUE: rgba2y:inputDataBuffer <- %3
#DEBUG_VALUE: rgba2y:pInputPtr <- %3
#DEBUG_VALUE: rgba2y:pOutputPtr <- %3
#DEBUG_VALUE: rgba2y:rounder <- undef
#DEBUG_VALUE: rgba2y:zero <- undef
#DEBUG_VALUE: rgba2y:multiplier <- undef
block
.Ltmp0:
.loc 2 68 30 prologue_end # rgb2y-sample.cpp:68:30
local.get 1
i32.const 64
i32.lt_u
.Ltmp1:
.loc 2 68 2 is_stmt 0 # rgb2y-sample.cpp:68:2
br_if 0 # 0: down to label0
.Ltmp2:
# %bb.1:
.loc 2 0 2 # rgb2y-sample.cpp:0:2
i32.const 0
local.set 2
i32.const 4
local.set 3
.LBB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
loop # label1:
.Ltmp3:
#DEBUG_VALUE: rgba2y:i <- %101
#DEBUG_VALUE: rgba0 <- undef
#DEBUG_VALUE: rgba1 <- undef
.loc 2 69 15 is_stmt 1 # rgb2y-sample.cpp:69:15
local.get 0
local.get 2
i32.const 2
i32.shl
i32.add
local.tee 2
local.get 2
v128.load 0
i32.const 0
i8x16.splat
local.tee 4
i32.const -128
i8x16.replace_lane 0
i32.const -128
i8x16.replace_lane 1
i32.const -128
i8x16.replace_lane 2
i32.const -128
i8x16.replace_lane 3
local.tee 5
v8x16.shuffle 0, 1, 2, 16, 4, 5, 6, 16, 8, 9, 10, 16, 12, 13, 14, 16
.Ltmp4:
.loc 2 74 48 # rgb2y-sample.cpp:74:48
local.tee 6
.Ltmp5:
#DEBUG_VALUE: iv0 <- undef
#DEBUG_VALUE: iv0 <- %153
local.get 4
v8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
i32.const 77
.loc 2 74 32 is_stmt 0 # rgb2y-sample.cpp:74:32
i16x8.splat
i32.const 150
i16x8.replace_lane 1
i32.const 29
i16x8.replace_lane 2
i32.const 1
i16x8.replace_lane 3
i32.const 160
i16x8.replace_lane 5
i32.const 29
i16x8.replace_lane 6
i32.const 1
i16x8.replace_lane 7
local.tee 7
i16x8.mul
.loc 2 74 133 # rgb2y-sample.cpp:74:133
local.tee 8
local.get 6
local.get 4
v8x16.shuffle 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
.loc 2 74 117 # rgb2y-sample.cpp:74:117
local.get 7
i16x8.mul
.loc 2 74 17 # rgb2y-sample.cpp:74:17
local.tee 6
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 8
local.get 6
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
.Ltmp6:
.loc 2 0 17 # rgb2y-sample.cpp:0:17
local.tee 6
.Ltmp7:
#DEBUG_VALUE: rg0 <- undef
#DEBUG_VALUE: rg0 <- %153
.loc 2 70 15 is_stmt 1 # rgb2y-sample.cpp:70:15
local.get 2
i32.const 16
i32.add
v128.load 0
local.get 5
v8x16.shuffle 0, 1, 2, 16, 4, 5, 6, 16, 8, 9, 10, 16, 12, 13, 14, 16
.Ltmp8:
.loc 2 75 62 # rgb2y-sample.cpp:75:62
local.tee 8
.Ltmp9:
#DEBUG_VALUE: iv1 <- undef
#DEBUG_VALUE: iv1 <- %157
local.get 4
v8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
.loc 2 75 46 is_stmt 0 # rgb2y-sample.cpp:75:46
local.get 7
i16x8.mul
.loc 2 75 146 # rgb2y-sample.cpp:75:146
local.tee 9
local.get 8
local.get 4
v8x16.shuffle 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
.loc 2 75 130 # rgb2y-sample.cpp:75:130
local.get 7
i16x8.mul
.loc 2 75 31 # rgb2y-sample.cpp:75:31
local.tee 8
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 9
local.get 8
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
.Ltmp10:
.loc 2 79 33 is_stmt 1 # rgb2y-sample.cpp:79:33
local.tee 8
.Ltmp11:
#DEBUG_VALUE: rg1 <- undef
#DEBUG_VALUE: rg1 <- %157
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 6
local.get 8
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
i32.const 8
.loc 2 79 18 is_stmt 0 # rgb2y-sample.cpp:79:18
i16x8.shr_u
.loc 2 71 15 is_stmt 1 # rgb2y-sample.cpp:71:15
local.get 2
i32.const 32
i32.add
v128.load 0
local.get 5
v8x16.shuffle 0, 1, 2, 16, 4, 5, 6, 16, 8, 9, 10, 16, 12, 13, 14, 16
.Ltmp12:
.loc 2 76 62 # rgb2y-sample.cpp:76:62
local.tee 6
.Ltmp13:
#DEBUG_VALUE: iv2 <- undef
#DEBUG_VALUE: iv2 <- %153
local.get 4
v8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
.loc 2 76 46 is_stmt 0 # rgb2y-sample.cpp:76:46
local.get 7
i16x8.mul
.loc 2 76 146 # rgb2y-sample.cpp:76:146
local.tee 8
local.get 6
local.get 4
v8x16.shuffle 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
.loc 2 76 130 # rgb2y-sample.cpp:76:130
local.get 7
i16x8.mul
.loc 2 76 31 # rgb2y-sample.cpp:76:31
local.tee 6
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 8
local.get 6
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
.Ltmp14:
.loc 2 0 31 # rgb2y-sample.cpp:0:31
local.tee 6
.Ltmp15:
#DEBUG_VALUE: rg2 <- undef
#DEBUG_VALUE: rg2 <- %153
.loc 2 72 15 is_stmt 1 # rgb2y-sample.cpp:72:15
local.get 2
i32.const 48
i32.add
v128.load 0
local.get 5
v8x16.shuffle 0, 1, 2, 16, 4, 5, 6, 16, 8, 9, 10, 16, 12, 13, 14, 16
.Ltmp16:
.loc 2 77 62 # rgb2y-sample.cpp:77:62
local.tee 5
.Ltmp17:
#DEBUG_VALUE: iv3 <- undef
#DEBUG_VALUE: iv3 <- %98
local.get 4
v8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
.loc 2 77 46 is_stmt 0 # rgb2y-sample.cpp:77:46
local.get 7
i16x8.mul
.loc 2 77 146 # rgb2y-sample.cpp:77:146
local.tee 8
local.get 5
local.get 4
v8x16.shuffle 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
.loc 2 77 130 # rgb2y-sample.cpp:77:130
local.get 7
i16x8.mul
.loc 2 77 31 # rgb2y-sample.cpp:77:31
local.tee 4
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 8
local.get 4
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
.Ltmp18:
.loc 2 80 33 is_stmt 1 # rgb2y-sample.cpp:80:33
local.tee 4
.Ltmp19:
#DEBUG_VALUE: rg3 <- undef
#DEBUG_VALUE: rg3 <- %93
v8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
local.get 6
local.get 4
v8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
i16x8.add
i32.const 8
.loc 2 80 18 is_stmt 0 # rgb2y-sample.cpp:80:18
i16x8.shr_u
.loc 2 81 21 is_stmt 1 # rgb2y-sample.cpp:81:21
i8x16.narrow_i16x8_u
.loc 2 81 19 is_stmt 0 # rgb2y-sample.cpp:81:19
v128.store 0
.Ltmp20:
#DEBUG_VALUE: rgba2y:i <- %170
.loc 2 0 19 # rgb2y-sample.cpp:0:19
local.get 3
local.tee 3
local.set 2
.Ltmp21:
.loc 2 68 11 is_stmt 1 # rgb2y-sample.cpp:68:11
local.get 3
i32.const 4
i32.add
local.tee 3
i32.const 4
.loc 2 68 14 is_stmt 0 # rgb2y-sample.cpp:68:14
i32.shl
.loc 2 68 30 # rgb2y-sample.cpp:68:30
local.get 1
i32.le_u
.Ltmp22:
.loc 2 68 2 # rgb2y-sample.cpp:68:2
br_if 0 # 0: up to label1
.Ltmp23:
.LBB0_3: # %for.end
end_loop
end_block # label0:
i32.const 0
.Ltmp24:
.loc 2 84 2 is_stmt 1 # rgb2y-sample.cpp:84:2
# fallthrough-return
end_function
.Ltmp25:
.Lfunc_end0:
.size rgba2y, .Lfunc_end0-rgba2y
# -- End function
装配商/装配商:
--- WebAssembly code ---
index: 2
kind: wasm function
compiler: TurboFan
Body (size = 1088 = 1086 + 2 padding)
Instructions (size = 1064)
0xa5976359180 0 55 push rbp
0xa5976359181 1 4889e5 REX.W movq rbp,rsp
0xa5976359184 4 6a0a push 0xa
0xa5976359186 6 56 push rsi
0xa5976359187 7 4883ec58 REX.W subq rsp,0x58
0xa597635918b b 488b5e17 REX.W movq rbx,[rsi+0x17]
0xa597635918f f 83fa40 cmpl rdx,0x40
0xa5976359192 12 0f8307000000 jnc 0xa597635919f <+0x1f>
0xa5976359198 18 33c9 xorl rcx,rcx
0xa597635919a 1a e990030000 jmp 0xa597635952f <+0x3af>
0xa597635919f 1f b94d000000 movl rcx,0x4d
0xa59763591a4 24 c5f96ec1 vmovd xmm0,rcx
0xa59763591a8 28 c5fb70c000 vpshuflw xmm0,xmm0,0x0
0xa59763591ad 2d c5f970c000 vpshufd xmm0,xmm0,0x0
0xa59763591b2 32 33c9 xorl rcx,rcx
0xa59763591b4 34 c5f96ec9 vmovd xmm1,rcx
0xa59763591b8 38 c4410057ff vxorps xmm15,xmm15,xmm15
0xa59763591bd 3d c4c27100cf vpshufb xmm1,xmm1,xmm15
0xa59763591c2 42 bf96000000 movl rdi,0x96
0xa59763591c7 47 c5f9c4c701 vpinsrw xmm0,xmm0,rdi,0x1
0xa59763591cc 4c bf80ffffff movl rdi,0xffffff80
0xa59763591d1 51 c5f928d1 vmovapd xmm2,xmm1
0xa59763591d5 55 c4e36920d700 vpinsrb xmm2,xmm2,dil,0x0
0xa59763591db 5b 41b81d000000 movl r8,0x1d
0xa59763591e1 61 c4c179c4c002 vpinsrw xmm0,xmm0,r8,0x2
0xa59763591e7 67 c4e36920d701 vpinsrb xmm2,xmm2,dil,0x1
0xa59763591ed 6d 41b901000000 movl r9,0x1
0xa59763591f3 73 c4c179c4c103 vpinsrw xmm0,xmm0,r9,0x3
0xa59763591f9 79 c4e36920d702 vpinsrb xmm2,xmm2,dil,0x2
0xa59763591ff 7f 41bba0000000 movl r11,0xa0
0xa5976359205 85 c4c179c4c305 vpinsrw xmm0,xmm0,r11,0x5
0xa597635920b 8b c4e36920d703 vpinsrb xmm2,xmm2,dil,0x3
0xa5976359211 91 c4c179c4c006 vpinsrw xmm0,xmm0,r8,0x6
0xa5976359217 97 c4c179c4c107 vpinsrw xmm0,xmm0,r9,0x7
0xa597635921d 9d 488bf9 REX.W movq rdi,rcx
0xa5976359220 a0 41b804000000 movl r8,0x4
0xa5976359226 a6 e90b000000 jmp 0xa5976359236 <+0xb6>
0xa597635922b ab 0f1f440000 nop
0xa5976359230 b0 498bf8 REX.W movq rdi,r8
0xa5976359233 b3 4d8bc1 REX.W movq r8,r9
0xa5976359236 b6 4c8b4e2f REX.W movq r9,[rsi+0x2f]
0xa597635923a ba 493b21 REX.W cmpq rsp,[r9]
0xa597635923d bd 0f86f4020000 jna 0xa5976359537 <+0x3b7>
0xa5976359243 c3 458d4804 leal r9,[r8+0x4]
0xa5976359247 c7 4d8bd9 REX.W movq r11,r9
0xa597635924a ca 41c1e304 shll r11, 4
0xa597635924e ce 8d3cb8 leal rdi,[rax+rdi*4]
0xa5976359251 d1 c5fa6f1c3b vmovdqu xmm3,[rbx+rdi*1]
0xa5976359256 d6 c5fa6f641f10 vmovdqu xmm4,[rdi+rbx*1+0x10]
0xa597635925c dc c5fa6f6c1f20 vmovdqu xmm5,[rdi+rbx*1+0x20]
0xa5976359262 e2 c5fa6f741f30 vmovdqu xmm6,[rdi+rbx*1+0x30]
0xa5976359268 e8 c57810fe vmovups xmm15,xmm6
0xa597635926c ec 49ba0001028004050680 REX.W movq r10,0x8006050480020100
0xa5976359276 f6 c441f96ec2 vmovq xmm8,r10
0xa597635927b fb 49ba08090a800c0d0e80 REX.W movq r10,0x800e0d0c800a0908
0xa5976359285 105 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa597635928b 10b c4420100f8 vpshufb xmm15,xmm15,xmm8
0xa5976359290 110 0f10fa movups xmm7,xmm2
0xa5976359293 113 49ba8080800080808000 REX.W movq r10,0x80808000808080
0xa597635929d 11d c441f96ec2 vmovq xmm8,r10
0xa59763592a2 122 4c8b15ecffffff REX.W movq r10,[rip+0xffffffec]
0xa59763592a9 129 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa59763592af 12f c4c24100f8 vpshufb xmm7,xmm7,xmm8
0xa59763592b4 134 c4c141ebff vpor xmm7,xmm7,xmm15
0xa59763592b9 139 c57810fd vmovups xmm15,xmm5
0xa59763592bd 13d 4c8b15aaffffff REX.W movq r10,[rip+0xffffffaa]
0xa59763592c4 144 c441f96ec2 vmovq xmm8,r10
0xa59763592c9 149 4c8b15adffffff REX.W movq r10,[rip+0xffffffad]
0xa59763592d0 150 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa59763592d6 156 c4420100f8 vpshufb xmm15,xmm15,xmm8
0xa59763592db 15b 0f10f2 movups xmm6,xmm2
0xa59763592de 15e 4c8b15b0ffffff REX.W movq r10,[rip+0xffffffb0]
0xa59763592e5 165 c441f96ec2 vmovq xmm8,r10
0xa59763592ea 16a 4c8b15a4ffffff REX.W movq r10,[rip+0xffffffa4]
0xa59763592f1 171 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa59763592f7 177 c4c24900f0 vpshufb xmm6,xmm6,xmm8
0xa59763592fc 17c c4c149ebf7 vpor xmm6,xmm6,xmm15
0xa5976359301 181 c57810fc vmovups xmm15,xmm4
0xa5976359305 185 4c8b1562ffffff REX.W movq r10,[rip+0xffffff62]
0xa597635930c 18c c441f96ec2 vmovq xmm8,r10
0xa5976359311 191 4c8b1565ffffff REX.W movq r10,[rip+0xffffff65]
0xa5976359318 198 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa597635931e 19e c4420100f8 vpshufb xmm15,xmm15,xmm8
0xa5976359323 1a3 0f10ea movups xmm5,xmm2
0xa5976359326 1a6 4c8b1568ffffff REX.W movq r10,[rip+0xffffff68]
0xa597635932d 1ad c441f96ec2 vmovq xmm8,r10
0xa5976359332 1b2 4c8b155cffffff REX.W movq r10,[rip+0xffffff5c]
0xa5976359339 1b9 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa597635933f 1bf c4c25100e8 vpshufb xmm5,xmm5,xmm8
0xa5976359344 1c4 c4c151ebef vpor xmm5,xmm5,xmm15
0xa5976359349 1c9 c57810fb vmovups xmm15,xmm3
0xa597635934d 1cd 4c8b151affffff REX.W movq r10,[rip+0xffffff1a]
0xa5976359354 1d4 c441f96ec2 vmovq xmm8,r10
0xa5976359359 1d9 4c8b151dffffff REX.W movq r10,[rip+0xffffff1d]
0xa5976359360 1e0 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa5976359366 1e6 c4420100f8 vpshufb xmm15,xmm15,xmm8
0xa597635936b 1eb 0f10e2 movups xmm4,xmm2
0xa597635936e 1ee 4c8b1520ffffff REX.W movq r10,[rip+0xffffff20]
0xa5976359375 1f5 c441f96ec2 vmovq xmm8,r10
0xa597635937a 1fa 4c8b1514ffffff REX.W movq r10,[rip+0xffffff14]
0xa5976359381 201 c443b922c201 vpinsrq xmm8,xmm8,r10,0x1
0xa5976359387 207 c4c25900e0 vpshufb xmm4,xmm4,xmm8
0xa597635938c 20c c4c159ebe7 vpor xmm4,xmm4,xmm15
0xa5976359391 211 c5f928df vmovapd xmm3,xmm7
0xa5976359395 215 c5e168d9 vpunpckhbw xmm3,xmm3,xmm1
0xa5976359399 219 c5c160f9 vpunpcklbw xmm7,xmm7,xmm1
0xa597635939d 21d c57928c6 vmovapd xmm8,xmm6
0xa59763593a1 221 c53968c1 vpunpckhbw xmm8,xmm8,xmm1
0xa59763593a5 225 c5c960f1 vpunpcklbw xmm6,xmm6,xmm1
0xa59763593a9 229 c57928cd vmovapd xmm9,xmm5
0xa59763593ad 22d c53168c9 vpunpckhbw xmm9,xmm9,xmm1
0xa59763593b1 231 c5d160e9 vpunpcklbw xmm5,xmm5,xmm1
0xa59763593b5 235 c57928d4 vmovapd xmm10,xmm4
0xa59763593b9 239 c52968d1 vpunpckhbw xmm10,xmm10,xmm1
0xa59763593bd 23d c5d960e1 vpunpcklbw xmm4,xmm4,xmm1
0xa59763593c1 241 c5e1d5d8 vpmullw xmm3,xmm3,xmm0
0xa59763593c5 245 c5c1d5f8 vpmullw xmm7,xmm7,xmm0
0xa59763593c9 249 c539d5c0 vpmullw xmm8,xmm8,xmm0
0xa59763593cd 24d c5c9d5f0 vpmullw xmm6,xmm6,xmm0
0xa59763593d1 251 c531d5c8 vpmullw xmm9,xmm9,xmm0
0xa59763593d5 255 c5d1d5e8 vpmullw xmm5,xmm5,xmm0
0xa59763593d9 259 c529d5d0 vpmullw xmm10,xmm10,xmm0
0xa59763593dd 25d c5d9d5e0 vpmullw xmm4,xmm4,xmm0
0xa59763593e1 261 c57928df vmovapd xmm11,xmm7
0xa59763593e5 265 c44101efff vpxor xmm15,xmm15,xmm15
0xa59763593ea 26a c463010efb55 vpblendw xmm15,xmm15,xmm3,0x55
0xa59763593f0 270 c443210edfaa vpblendw xmm11,xmm11,xmm15,0xaa
0xa59763593f6 276 c442212bdf vpackusdw xmm11,xmm11,xmm15
0xa59763593fb 27b c57810fb vmovups xmm15,xmm3
0xa59763593ff 27f c4c10172d710 vpsrld xmm15,xmm15,16
0xa5976359405 285 c5c172d710 vpsrld xmm7,xmm7,16
0xa597635940a 28a c4c2412bff vpackusdw xmm7,xmm7,xmm15
0xa597635940f 28f c5f928de vmovapd xmm3,xmm6
0xa5976359413 293 c44101efff vpxor xmm15,xmm15,xmm15
0xa5976359418 298 c443010ef855 vpblendw xmm15,xmm15,xmm8,0x55
0xa597635941e 29e c4c3610edfaa vpblendw xmm3,xmm3,xmm15,0xaa
0xa5976359424 2a4 c4c2612bdf vpackusdw xmm3,xmm3,xmm15
0xa5976359429 2a9 c4417810f8 vmovups xmm15,xmm8
0xa597635942e 2ae c4c10172d710 vpsrld xmm15,xmm15,16
0xa5976359434 2b4 c5c972d610 vpsrld xmm6,xmm6,16
0xa5976359439 2b9 c4c2492bf7 vpackusdw xmm6,xmm6,xmm15
0xa597635943e 2be c57928c5 vmovapd xmm8,xmm5
0xa5976359442 2c2 c44101efff vpxor xmm15,xmm15,xmm15
0xa5976359447 2c7 c443010ef955 vpblendw xmm15,xmm15,xmm9,0x55
0xa597635944d 2cd c443390ec7aa vpblendw xmm8,xmm8,xmm15,0xaa
0xa5976359453 2d3 c442392bc7 vpackusdw xmm8,xmm8,xmm15
0xa5976359458 2d8 c4417810f9 vmovups xmm15,xmm9
0xa597635945d 2dd c4c10172d710 vpsrld xmm15,xmm15,16
0xa5976359463 2e3 c5d172d510 vpsrld xmm5,xmm5,16
0xa5976359468 2e8 c4c2512bef vpackusdw xmm5,xmm5,xmm15
0xa597635946d 2ed c57928cc vmovapd xmm9,xmm4
0xa5976359471 2f1 c44101efff vpxor xmm15,xmm15,xmm15
0xa5976359476 2f6 c443010efa55 vpblendw xmm15,xmm15,xmm10,0x55
0xa597635947c 2fc c443310ecfaa vpblendw xmm9,xmm9,xmm15,0xaa
0xa5976359482 302 c442312bcf vpackusdw xmm9,xmm9,xmm15
0xa5976359487 307 c4417810fa vmovups xmm15,xmm10
0xa597635948c 30c c4c10172d710 vpsrld xmm15,xmm15,16
0xa5976359492 312 c5d972d410 vpsrld xmm4,xmm4,16
0xa5976359497 317 c4c2592be7 vpackusdw xmm4,xmm4,xmm15
0xa597635949c 31c c4c141fdfb vpaddw xmm7,xmm7,xmm11
0xa59763594a1 321 c5c9fdf3 vpaddw xmm6,xmm6,xmm3
0xa59763594a5 325 c4c151fde8 vpaddw xmm5,xmm5,xmm8
0xa59763594aa 32a c4c159fde1 vpaddw xmm4,xmm4,xmm9
0xa59763594af 32f c5f928de vmovapd xmm3,xmm6
0xa59763594b3 333 c44101efff vpxor xmm15,xmm15,xmm15
0xa59763594b8 338 c463010eff55 vpblendw xmm15,xmm15,xmm7,0x55
0xa59763594be 33e c4c3610edfaa vpblendw xmm3,xmm3,xmm15,0xaa
0xa59763594c4 344 c4c2612bdf vpackusdw xmm3,xmm3,xmm15
0xa59763594c9 349 c57810ff vmovups xmm15,xmm7
0xa59763594cd 34d c4c10172d710 vpsrld xmm15,xmm15,16
0xa59763594d3 353 c5c972d610 vpsrld xmm6,xmm6,16
0xa59763594d8 358 c4c2492bf7 vpackusdw xmm6,xmm6,xmm15
0xa59763594dd 35d c5f928fc vmovapd xmm7,xmm4
0xa59763594e1 361 c44101efff vpxor xmm15,xmm15,xmm15
0xa59763594e6 366 c463010efd55 vpblendw xmm15,xmm15,xmm5,0x55
0xa59763594ec 36c c4c3410effaa vpblendw xmm7,xmm7,xmm15,0xaa
0xa59763594f2 372 c4c2412bff vpackusdw xmm7,xmm7,xmm15
0xa59763594f7 377 c57810fd vmovups xmm15,xmm5
0xa59763594fb 37b c4c10172d710 vpsrld xmm15,xmm15,16
0xa5976359501 381 c5d972d410 vpsrld xmm4,xmm4,16
0xa5976359506 386 c4c2592be7 vpackusdw xmm4,xmm4,xmm15
0xa597635950b 38b c5c9fdf3 vpaddw xmm6,xmm6,xmm3
0xa597635950f 38f c5d9fde7 vpaddw xmm4,xmm4,xmm7
0xa5976359513 393 c5c971d608 vpsrlw xmm6,xmm6,8
0xa5976359518 398 c5d971d408 vpsrlw xmm4,xmm4,8
0xa597635951d 39d c5d967e6 vpackuswb xmm4,xmm4,xmm6
0xa5976359521 3a1 c5fa7f243b vmovdqu [rbx+rdi*1],xmm4
0xa5976359526 3a6 443bda cmpl r11,rdx
0xa5976359529 3a9 0f8601fdffff jna 0xa5976359230 <+0xb0>
0xa597635952f 3af 488bc1 REX.W movq rax,rcx
0xa5976359532 3b2 488be5 REX.W movq rsp,rbp
0xa5976359535 3b5 5d pop rbp
0xa5976359536 3b6 c3 retl
0xa5976359537 3b7 488955e8 REX.W movq [rbp-0x18],rdx
0xa597635953b 3bb 48895de0 REX.W movq [rbp-0x20],rbx
0xa597635953f 3bf c5f81145d0 vmovups [rbp-0x30],xmm0
0xa5976359544 3c4 c5f8114dc0 vmovups [rbp-0x40],xmm1
0xa5976359549 3c9 c5f81155b0 vmovups [rbp-0x50],xmm2
0xa597635954e 3ce 488945a8 REX.W movq [rbp-0x58],rax
0xa5976359552 3d2 48897da0 REX.W movq [rbp-0x60],rdi
0xa5976359556 3d6 4c894598 REX.W movq [rbp-0x68],r8
0xa597635955a 3da e8615dffff call 0xa597634f2c0 ;; wasm stub: WasmStackGuard
0xa597635955f 3df 33c9 xorl rcx,rcx
0xa5976359561 3e1 488b55e8 REX.W movq rdx,[rbp-0x18]
0xa5976359565 3e5 488b5de0 REX.W movq rbx,[rbp-0x20]
0xa5976359569 3e9 c5f81045d0 vmovups xmm0,[rbp-0x30]
0xa597635956e 3ee c5f8104dc0 vmovups xmm1,[rbp-0x40]
0xa5976359573 3f3 c5f81055b0 vmovups xmm2,[rbp-0x50]
0xa5976359578 3f8 488b45a8 REX.W movq rax,[rbp-0x58]
0xa597635957c 3fc 488b7da0 REX.W movq rdi,[rbp-0x60]
0xa5976359580 400 4c8b4598 REX.W movq r8,[rbp-0x68]
0xa5976359584 404 488b75f0 REX.W movq rsi,[rbp-0x10]
0xa5976359588 408 e9b6fcffff jmp 0xa5976359243 <+0xc3>
0xa597635958d 40d e8fe5affff call 0xa597634f090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xa5976359592 412 e8f95affff call 0xa597634f090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xa5976359597 417 e8f45affff call 0xa597634f090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xa597635959c 41c e8ef5affff call 0xa597634f090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xa59763595a1 421 e8ea5affff call 0xa597634f090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xa59763595a6 426 90 nop
0xa59763595a7 427 90 nop
Protected instructions:
pc offset land pad
3a1 40d
e2 412
dc 417
d6 41c
d1 421
Source positions:
pc offset position
d1 43
d6 239
dc 416
e2 545
3a1 722
3b7 29
40d 722
412 545
417 416
41c 239
421 43
Safepoints (size = 22)
0xa5a7635917fffffffff 000000000000000 (sp -> fp)
RelocInfo (size = 8)
0xa597635955b wasm stub call
0xa597635958e wasm stub call
0xa5976359593 wasm stub call
0xa5976359598 wasm stub call
0xa597635959d wasm stub call
0xa59763595a2 wasm stub call
--- End code ---
复制Emscripten问题的答案:
我们之所以不使用v128.const,是因为v128.const最近才在V8中实现。为了避免破坏原始试用版用户,在相关的V8补丁进入Chrome稳定版之前,我们不能更新LLVM以发出v128.const。我一直在关注这个仪表板,以确定什么时候是做出这一改变的好时机。如果你使用的是Chrome的最新版本或其他支持v128.const的执行环境,你可以尝试使用-munimplemented-simd128标志编译你的项目,这将在LLVM中启用v128.const(但也可能会引入其他你不想要的更改(。一旦v128.const广泛可用,LLVM使用v128.consts将比从内存加载向量更好,因为这允许引擎在给定运行时平台的情况下确定实现向量的最佳方式。
还可能值得考虑移植代码中对性能敏感的部分,以便直接使用WebAssembly内部函数头,而不是依赖于模拟的SSE。这将减少代码和底层机器代码之间的阻抗不匹配。
最后,如果您注意到任何地方的指令选择都不理想,那么如果您可以针对您看到的特定问题提交LLVM错误(如果它在代码端(或V8错误(如果在本机端(,这将是非常有用的。这种反馈对我们来说非常有价值
@PeterOrderes
我已经对LLVM实现进行了一些更改,现在正在对其进行测试。您认为为常量生成的ASM代码如何?我认为这要好得多,但我希望有第二种意见。
--- WebAssembly code ---
index: 3
kind: wasm function
compiler: TurboFan
Body (size = 1280 = 1278 + 2 padding)
Instructions (size = 1256)
0xfbcf162d3c0 0 55 push rbp
0xfbcf162d3c1 1 4889e5 REX.W movq rbp,rsp
0xfbcf162d3c4 4 6a0a push 0xa
0xfbcf162d3c6 6 56 push rsi
0xfbcf162d3c7 7 4883ec50 REX.W subq rsp,0x50
0xfbcf162d3cb b 488b4e17 REX.W movq rcx,[rsi+0x17]
0xfbcf162d3cf f 488bd8 REX.W movq rbx,rax
0xfbcf162d3d2 12 83fa40 cmpl rdx,0x40
0xfbcf162d3d5 15 0f8308000000 jnc 0xfbcf162d3e3 <+0x23>
0xfbcf162d3db 1b 4533c0 xorl r8,r8
0xfbcf162d3de 1e e950030000 jmp 0xfbcf162d733 <+0x373>
0xfbcf162d3e3 23 48bf8080808080808080 REX.W movq rdi,0x8080808080808080
0xfbcf162d3ed 2d c4e1f96ec7 vmovq xmm0,rdi
0xfbcf162d3f2 32 c5fb12c0 vmovddup xmm0,xmm0
0xfbcf162d3f6 36 48bf4d0096001d000100 REX.W movq rdi,0x1001d0096004d
0xfbcf162d400 40 c4e1f96ecf vmovq xmm1,rdi
0xfbcf162d405 45 c5fb12c9 vmovddup xmm1,xmm1
0xfbcf162d409 49 bf04000000 movl rdi,0x4
0xfbcf162d40e 4e 4533c9 xorl r9,r9
0xfbcf162d411 51 4c8bc7 REX.W movq r8,rdi
0xfbcf162d414 54 e90d000000 jmp 0xfbcf162d426 <+0x66>
0xfbcf162d419 59 0f1f8000000000 nop
0xfbcf162d420 60 4d8bc8 REX.W movq r9,r8
0xfbcf162d423 63 4d8bc3 REX.W movq r8,r11
0xfbcf162d426 66 4c8b5e2f REX.W movq r11,[rsi+0x2f]
0xfbcf162d42a 6a 493b23 REX.W cmpq rsp,[r11]
0xfbcf162d42d 6d 0f86aa030000 jna 0xfbcf162d7dd <+0x41d>
0xfbcf162d433 73 458d5804 leal r11,[r8+0x4]
0xfbcf162d437 77 4d8be3 REX.W movq r12,r11
0xfbcf162d43a 7a 41c1e404 shll r12, 4
0xfbcf162d43e 7e 468d0c8b leal r9,[rbx+r9*4]
0xfbcf162d442 82 c4a17a6f1409 vmovdqu xmm2,[rcx+r9*1]
0xfbcf162d448 88 c4c17a6f5c0910 vmovdqu xmm3,[r9+rcx*1+0x10]
0xfbcf162d44f 8f c4c17a6f640920 vmovdqu xmm4,[r9+rcx*1+0x20]
0xfbcf162d456 96 c4c17a6f6c0930 vmovdqu xmm5,[r9+rcx*1+0x30]
0xfbcf162d45d 9d c57810fd vmovups xmm15,xmm5
0xfbcf162d461 a1 49ba0001028004050680 REX.W movq r10,0x8006050480020100
0xfbcf162d46b ab c4c1f96efa vmovq xmm7,r10
0xfbcf162d470 b0 49ba08090a800c0d0e80 REX.W movq r10,0x800e0d0c800a0908
0xfbcf162d47a ba c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d480 c0 c4620100ff vpshufb xmm15,xmm15,xmm7
0xfbcf162d485 c5 0f10f0 movups xmm6,xmm0
0xfbcf162d488 c8 49ba8080800080808000 REX.W movq r10,0x80808000808080
0xfbcf162d492 d2 c4c1f96efa vmovq xmm7,r10
0xfbcf162d497 d7 4c8b15ecffffff REX.W movq r10,[rip+0xffffffec]
0xfbcf162d49e de c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d4a4 e4 c4e24900f7 vpshufb xmm6,xmm6,xmm7
0xfbcf162d4a9 e9 c4c149ebf7 vpor xmm6,xmm6,xmm15
0xfbcf162d4ae ee c57810fc vmovups xmm15,xmm4
0xfbcf162d4b2 f2 4c8b15aaffffff REX.W movq r10,[rip+0xffffffaa]
0xfbcf162d4b9 f9 c4c1f96efa vmovq xmm7,r10
0xfbcf162d4be fe 4c8b15adffffff REX.W movq r10,[rip+0xffffffad]
0xfbcf162d4c5 105 c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d4cb 10b c4620100ff vpshufb xmm15,xmm15,xmm7
0xfbcf162d4d0 110 0f10e8 movups xmm5,xmm0
0xfbcf162d4d3 113 4c8b15b0ffffff REX.W movq r10,[rip+0xffffffb0]
0xfbcf162d4da 11a c4c1f96efa vmovq xmm7,r10
0xfbcf162d4df 11f 4c8b15a4ffffff REX.W movq r10,[rip+0xffffffa4]
0xfbcf162d4e6 126 c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d4ec 12c c4e25100ef vpshufb xmm5,xmm5,xmm7
0xfbcf162d4f1 131 c4c151ebef vpor xmm5,xmm5,xmm15
0xfbcf162d4f6 136 c57810fb vmovups xmm15,xmm3
0xfbcf162d4fa 13a 4c8b1562ffffff REX.W movq r10,[rip+0xffffff62]
0xfbcf162d501 141 c4c1f96efa vmovq xmm7,r10
0xfbcf162d506 146 4c8b1565ffffff REX.W movq r10,[rip+0xffffff65]
0xfbcf162d50d 14d c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d513 153 c4620100ff vpshufb xmm15,xmm15,xmm7
0xfbcf162d518 158 0f10e0 movups xmm4,xmm0
0xfbcf162d51b 15b 4c8b1568ffffff REX.W movq r10,[rip+0xffffff68]
0xfbcf162d522 162 c4c1f96efa vmovq xmm7,r10
0xfbcf162d527 167 4c8b155cffffff REX.W movq r10,[rip+0xffffff5c]
0xfbcf162d52e 16e c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d534 174 c4e25900e7 vpshufb xmm4,xmm4,xmm7
0xfbcf162d539 179 c4c159ebe7 vpor xmm4,xmm4,xmm15
0xfbcf162d53e 17e c57810fa vmovups xmm15,xmm2
0xfbcf162d542 182 4c8b151affffff REX.W movq r10,[rip+0xffffff1a]
0xfbcf162d549 189 c4c1f96efa vmovq xmm7,r10
0xfbcf162d54e 18e 4c8b151dffffff REX.W movq r10,[rip+0xffffff1d]
0xfbcf162d555 195 c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d55b 19b c4620100ff vpshufb xmm15,xmm15,xmm7
0xfbcf162d560 1a0 0f10d8 movups xmm3,xmm0
0xfbcf162d563 1a3 4c8b1520ffffff REX.W movq r10,[rip+0xffffff20]
0xfbcf162d56a 1aa c4c1f96efa vmovq xmm7,r10
0xfbcf162d56f 1af 4c8b1514ffffff REX.W movq r10,[rip+0xffffff14]
0xfbcf162d576 1b6 c4c3c122fa01 vpinsrq xmm7,xmm7,r10,0x1
0xfbcf162d57c 1bc c4e26100df vpshufb xmm3,xmm3,xmm7
0xfbcf162d581 1c1 c4c161ebdf vpor xmm3,xmm3,xmm15
0xfbcf162d586 1c6 c4e3690fd608 vpalignr xmm2,xmm2,xmm6,0x8
0xfbcf162d58c 1cc c4e27930d2 vpmovzxbw xmm2,xmm2
0xfbcf162d591 1d1 c4e27930f6 vpmovzxbw xmm6,xmm6
0xfbcf162d596 1d6 c4e3410ffd08 vpalignr xmm7,xmm7,xmm5,0x8
0xfbcf162d59c 1dc c4e27930ff vpmovzxbw xmm7,xmm7
0xfbcf162d5a1 1e1 c4e27930ed vpmovzxbw xmm5,xmm5
0xfbcf162d5a6 1e6 c463390fc408 vpalignr xmm8,xmm8,xmm4,0x8
0xfbcf162d5ac 1ec c4427930c0 vpmovzxbw xmm8,xmm8
0xfbcf162d5b1 1f1 c4e27930e4 vpmovzxbw xmm4,xmm4
0xfbcf162d5b6 1f6 c463310fcb08 vpalignr xmm9,xmm9,xmm3,0x8
0xfbcf162d5bc 1fc c4427930c9 vpmovzxbw xmm9,xmm9
0xfbcf162d5c1 201 c4e27930db vpmovzxbw xmm3,xmm3
0xfbcf162d5c6 206 c5e9d5d1 vpmullw xmm2,xmm2,xmm1
0xfbcf162d5ca 20a c5c9d5f1 vpmullw xmm6,xmm6,xmm1
0xfbcf162d5ce 20e c5c1d5f9 vpmullw xmm7,xmm7,xmm1
0xfbcf162d5d2 212 c5d1d5e9 vpmullw xmm5,xmm5,xmm1
0xfbcf162d5d6 216 c539d5c1 vpmullw xmm8,xmm8,xmm1
0xfbcf162d5da 21a c5d9d5e1 vpmullw xmm4,xmm4,xmm1
0xfbcf162d5de 21e c531d5c9 vpmullw xmm9,xmm9,xmm1
0xfbcf162d5e2 222 c5e1d5d9 vpmullw xmm3,xmm3,xmm1
0xfbcf162d5e6 226 c57928d6 vmovapd xmm10,xmm6
0xfbcf162d5ea 22a c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d5ef 22f c463010efa55 vpblendw xmm15,xmm15,xmm2,0x55
0xfbcf162d5f5 235 c443290ed7aa vpblendw xmm10,xmm10,xmm15,0xaa
0xfbcf162d5fb 23b c442292bd7 vpackusdw xmm10,xmm10,xmm15
0xfbcf162d600 240 c57810fa vmovups xmm15,xmm2
0xfbcf162d604 244 c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d60a 24a c5c972d610 vpsrld xmm6,xmm6,16
0xfbcf162d60f 24f c4c2492bf7 vpackusdw xmm6,xmm6,xmm15
0xfbcf162d614 254 c5f928d5 vmovapd xmm2,xmm5
0xfbcf162d618 258 c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d61d 25d c463010eff55 vpblendw xmm15,xmm15,xmm7,0x55
0xfbcf162d623 263 c4c3690ed7aa vpblendw xmm2,xmm2,xmm15,0xaa
0xfbcf162d629 269 c4c2692bd7 vpackusdw xmm2,xmm2,xmm15
0xfbcf162d62e 26e c57810ff vmovups xmm15,xmm7
0xfbcf162d632 272 c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d638 278 c5d172d510 vpsrld xmm5,xmm5,16
0xfbcf162d63d 27d c4c2512bef vpackusdw xmm5,xmm5,xmm15
0xfbcf162d642 282 c5f928fc vmovapd xmm7,xmm4
0xfbcf162d646 286 c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d64b 28b c443010ef855 vpblendw xmm15,xmm15,xmm8,0x55
0xfbcf162d651 291 c4c3410effaa vpblendw xmm7,xmm7,xmm15,0xaa
0xfbcf162d657 297 c4c2412bff vpackusdw xmm7,xmm7,xmm15
0xfbcf162d65c 29c c4417810f8 vmovups xmm15,xmm8
0xfbcf162d661 2a1 c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d667 2a7 c5d972d410 vpsrld xmm4,xmm4,16
0xfbcf162d66c 2ac c4c2592be7 vpackusdw xmm4,xmm4,xmm15
0xfbcf162d671 2b1 c57928c3 vmovapd xmm8,xmm3
0xfbcf162d675 2b5 c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d67a 2ba c443010ef955 vpblendw xmm15,xmm15,xmm9,0x55
0xfbcf162d680 2c0 c443390ec7aa vpblendw xmm8,xmm8,xmm15,0xaa
0xfbcf162d686 2c6 c442392bc7 vpackusdw xmm8,xmm8,xmm15
0xfbcf162d68b 2cb c4417810f9 vmovups xmm15,xmm9
0xfbcf162d690 2d0 c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d696 2d6 c5e172d310 vpsrld xmm3,xmm3,16
0xfbcf162d69b 2db c4c2612bdf vpackusdw xmm3,xmm3,xmm15
0xfbcf162d6a0 2e0 c4c149fdf2 vpaddw xmm6,xmm6,xmm10
0xfbcf162d6a5 2e5 c5d1fdea vpaddw xmm5,xmm5,xmm2
0xfbcf162d6a9 2e9 c5d9fde7 vpaddw xmm4,xmm4,xmm7
0xfbcf162d6ad 2ed c4c161fdd8 vpaddw xmm3,xmm3,xmm8
0xfbcf162d6b2 2f2 c5f928d5 vmovapd xmm2,xmm5
0xfbcf162d6b6 2f6 c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d6bb 2fb c463010efe55 vpblendw xmm15,xmm15,xmm6,0x55
0xfbcf162d6c1 301 c4c3690ed7aa vpblendw xmm2,xmm2,xmm15,0xaa
0xfbcf162d6c7 307 c4c2692bd7 vpackusdw xmm2,xmm2,xmm15
0xfbcf162d6cc 30c c57810fe vmovups xmm15,xmm6
0xfbcf162d6d0 310 c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d6d6 316 c5d172d510 vpsrld xmm5,xmm5,16
0xfbcf162d6db 31b c4c2512bef vpackusdw xmm5,xmm5,xmm15
0xfbcf162d6e0 320 c5f928f3 vmovapd xmm6,xmm3
0xfbcf162d6e4 324 c44101efff vpxor xmm15,xmm15,xmm15
0xfbcf162d6e9 329 c463010efc55 vpblendw xmm15,xmm15,xmm4,0x55
0xfbcf162d6ef 32f c4c3490ef7aa vpblendw xmm6,xmm6,xmm15,0xaa
0xfbcf162d6f5 335 c4c2492bf7 vpackusdw xmm6,xmm6,xmm15
0xfbcf162d6fa 33a c57810fc vmovups xmm15,xmm4
0xfbcf162d6fe 33e c4c10172d710 vpsrld xmm15,xmm15,16
0xfbcf162d704 344 c5e172d310 vpsrld xmm3,xmm3,16
0xfbcf162d709 349 c4c2612bdf vpackusdw xmm3,xmm3,xmm15
0xfbcf162d70e 34e c5d1fdea vpaddw xmm5,xmm5,xmm2
0xfbcf162d712 352 c5e1fdde vpaddw xmm3,xmm3,xmm6
0xfbcf162d716 356 c5d171d508 vpsrlw xmm5,xmm5,8
0xfbcf162d71b 35b c5e171d308 vpsrlw xmm3,xmm3,8
0xfbcf162d720 360 c5e167dd vpackuswb xmm3,xmm3,xmm5
0xfbcf162d724 364 c4a17a7f1c09 vmovdqu [rcx+r9*1],xmm3
0xfbcf162d72a 36a 443be2 cmpl r12,rdx
0xfbcf162d72d 36d 0f86edfcffff jna 0xfbcf162d420 <+0x60>
0xfbcf162d733 373 33ff xorl rdi,rdi
0xfbcf162d735 375 41b904000000 movl r9,0x4
0xfbcf162d73b 37b 4183f9ff cmpl r9,0xff
0xfbcf162d73f 37f 0f84e7000000 jz 0xfbcf162d82c <+0x46c>
0xfbcf162d745 385 41c1e004 shll r8, 4
0xfbcf162d749 389 488bc2 REX.W movq rax,rdx
0xfbcf162d74c 38c 99 cdql
0xfbcf162d74d 38d 41f7f9 idivl r9
0xfbcf162d750 390 428d1403 leal rdx,[rbx+r8*1]
0xfbcf162d754 394 03d8 addl rbx,rax
0xfbcf162d756 396 3bd3 cmpl rdx,rbx
0xfbcf162d758 398 0f8777000000 ja 0xfbcf162d7d5 <+0x415>
0xfbcf162d75e 39e 4c8bc7 REX.W movq r8,rdi
0xfbcf162d761 3a1 4c8bca REX.W movq r9,rdx
0xfbcf162d764 3a4 e90d000000 jmp 0xfbcf162d776 <+0x3b6>
0xfbcf162d769 3a9 0f1f8000000000 nop
0xfbcf162d770 3b0 4d8bc3 REX.W movq r8,r11
0xfbcf162d773 3b3 4d89e1 REX.W movq r9,r12
0xfbcf162d776 3b6 4c8b5e2f REX.W movq r11,[rsi+0x2f]
0xfbcf162d77a 3ba 493b23 REX.W cmpq rsp,[r11]
0xfbcf162d77d 3bd 0f86ba000000 jna 0xfbcf162d83d <+0x47d>
0xfbcf162d783 3c3 458d5804 leal r11,[r8+0x4]
0xfbcf162d787 3c7 468d241a leal r12,[rdx+r11*1]
0xfbcf162d78b 3cb 4d8bf0 REX.W movq r14,r8
0xfbcf162d78e 3ce 4183ce01 orl r14,0x1
0xfbcf162d792 3d2 458bc9 movl r9,r9
0xfbcf162d795 3d5 4403f2 addl r14,rdx
0xfbcf162d798 3d8 4183c802 orl r8,0x2
0xfbcf162d79c 3dc 460fb63c09 movzxbl r15,[rcx+r9*1]
0xfbcf162d7a1 3e1 4403c2 addl r8,rdx
0xfbcf162d7a4 3e4 460fb63431 movzxbl r14,[rcx+r14*1]
0xfbcf162d7a9 3e9 460fb60401 movzxbl r8,[rcx+r8*1]
0xfbcf162d7ae 3ee 4569f696000000 imull r14,r14,0x96
0xfbcf162d7b5 3f5 456bff4d imull r15,r15,0x4d
0xfbcf162d7b9 3f9 456bc01d imull r8,r8,0x1d
0xfbcf162d7bd 3fd 4503f7 addl r14,r15
0xfbcf162d7c0 400 478d843080000000 leal r8,[r8+r14*1+0x80]
0xfbcf162d7c8 408 41c1e808 shrl r8, 8
0xfbcf162d7cc 40c 46880409 movb [rcx+r9*1],r8l
0xfbcf162d7d0 410 443be3 cmpl r12,rbx
0xfbcf162d7d3 413 769b jna 0xfbcf162d770 <+0x3b0>
0xfbcf162d7d5 415 488bc7 REX.W movq rax,rdi
0xfbcf162d7d8 418 488be5 REX.W movq rsp,rbp
0xfbcf162d7db 41b 5d pop rbp
0xfbcf162d7dc 41c c3 retl
0xfbcf162d7dd 41d 48894de0 REX.W movq [rbp-0x20],rcx
0xfbcf162d7e1 421 48895de8 REX.W movq [rbp-0x18],rbx
0xfbcf162d7e5 425 488955d8 REX.W movq [rbp-0x28],rdx
0xfbcf162d7e9 429 4c8945d0 REX.W movq [rbp-0x30],r8
0xfbcf162d7ed 42d c5f8114db0 vmovups [rbp-0x50],xmm1
0xfbcf162d7f2 432 c5f81145a0 vmovups [rbp-0x60],xmm0
0xfbcf162d7f7 437 4c894dc8 REX.W movq [rbp-0x38],r9
0xfbcf162d7fb 43b e8c04affff call 0xfbcf16222c0 ;; wasm stub: WasmStackGuard
0xfbcf162d800 440 488b4de0 REX.W movq rcx,[rbp-0x20]
0xfbcf162d804 444 488b75f0 REX.W movq rsi,[rbp-0x10]
0xfbcf162d808 448 488b5de8 REX.W movq rbx,[rbp-0x18]
0xfbcf162d80c 44c bf04000000 movl rdi,0x4
0xfbcf162d811 451 488b55d8 REX.W movq rdx,[rbp-0x28]
0xfbcf162d815 455 4c8b45d0 REX.W movq r8,[rbp-0x30]
0xfbcf162d819 459 c5f8104db0 vmovups xmm1,[rbp-0x50]
0xfbcf162d81e 45e c5f81045a0 vmovups xmm0,[rbp-0x60]
0xfbcf162d823 463 4c8b4dc8 REX.W movq r9,[rbp-0x38]
0xfbcf162d827 467 e907fcffff jmp 0xfbcf162d433 <+0x73>
0xfbcf162d82c 46c 81fa00000080 cmpl rdx,0x80000000
0xfbcf162d832 472 0f843d000000 jz 0xfbcf162d875 <+0x4b5>
0xfbcf162d838 478 e908ffffff jmp 0xfbcf162d745 <+0x385>
0xfbcf162d83d 47d 48895de8 REX.W movq [rbp-0x18],rbx
0xfbcf162d841 481 48894de0 REX.W movq [rbp-0x20],rcx
0xfbcf162d845 485 488955d8 REX.W movq [rbp-0x28],rdx
0xfbcf162d849 489 4c8945d0 REX.W movq [rbp-0x30],r8
0xfbcf162d84d 48d 4c894dc8 REX.W movq [rbp-0x38],r9
0xfbcf162d851 491 e86a4affff call 0xfbcf16222c0 ;; wasm stub: WasmStackGuard
0xfbcf162d856 496 33ff xorl rdi,rdi
0xfbcf162d858 498 488b5de8 REX.W movq rbx,[rbp-0x18]
0xfbcf162d85c 49c 488b4de0 REX.W movq rcx,[rbp-0x20]
0xfbcf162d860 4a0 488b55d8 REX.W movq rdx,[rbp-0x28]
0xfbcf162d864 4a4 4c8b45d0 REX.W movq r8,[rbp-0x30]
0xfbcf162d868 4a8 4c8b4dc8 REX.W movq r9,[rbp-0x38]
0xfbcf162d86c 4ac 488b75f0 REX.W movq rsi,[rbp-0x10]
0xfbcf162d870 4b0 e90effffff jmp 0xfbcf162d783 <+0x3c3>
0xfbcf162d875 4b5 e84648ffff call 0xfbcf16220c0 ;; wasm stub: ThrowWasmTrapDivUnrepresentable
0xfbcf162d87a 4ba e81148ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d87f 4bf e80c48ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d884 4c4 e80748ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d889 4c9 e80248ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d88e 4ce e8fd47ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d893 4d3 e8f847ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d898 4d8 e8f347ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d89d 4dd e8ee47ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d8a2 4e2 e8e947ffff call 0xfbcf1622090 ;; wasm stub: ThrowWasmTrapMemOutOfBounds
0xfbcf162d8a7 4e7 90 nop