-rw-r--r-- 6241 libmceliece-20240812/crypto_kem/460896/avx/vec128_mul_asm.q raw
reg256 b0 reg256 b1 reg256 b2 reg256 b3 reg256 b4 reg256 b5 reg256 b6 reg256 b7 reg256 b8 reg256 b9 reg256 b10 reg256 b11 reg256 b12 reg256 a0 reg256 a1 reg256 a2 reg256 a3 reg256 a4 reg256 a5 reg256 a6 reg256 r0 reg256 r1 reg256 r2 reg256 r3 reg256 r4 reg256 r5 reg256 r6 reg256 r7 reg256 r8 reg256 r9 reg256 r10 reg256 r11 reg256 r12 reg256 r13 reg256 r14 reg256 r15 reg256 r16 reg256 r17 reg256 r18 reg256 r19 reg256 r20 reg256 r21 reg256 r22 reg256 r23 reg256 r24 reg256 r reg128 h0 reg128 h1 reg128 h2 reg128 h3 reg128 h4 reg128 h5 reg128 h6 reg128 h7 reg128 h8 reg128 h9 reg128 h10 reg128 h11 reg128 h12 reg128 h13 reg128 h14 reg128 h15 reg128 h16 reg128 h17 reg128 h18 reg128 h19 reg128 h20 reg128 h21 reg128 h22 reg128 h23 reg128 h24 stack4864 buf int64 ptr int64 tmp enter vec128_mul_asm ptr = &buf tmp = input_3 tmp *= 12 input_2 += tmp b12 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 a6 = a6 ^ a6 a6[0] = mem128[ input_1 + 96 ] r18 = b12 & a6 mem256[ ptr + 576 ] = r18 a5[0] = mem128[ input_1 + 80 ] a5[1] = mem128[ input_1 + 192 ] r17 = b12 & a5 a4[0] = mem128[ input_1 + 64 ] a4[1] = mem128[ input_1 + 176 ] r16 = b12 & a4 a3[0] = mem128[ input_1 + 48 ] a3[1] = mem128[ input_1 + 160 ] r15 = b12 & a3 a2[0] = mem128[ input_1 + 32 ] a2[1] = mem128[ input_1 + 144 ] r14 = b12 & a2 a1[0] = mem128[ input_1 + 16 ] a1[1] = mem128[ input_1 + 128 ] r13 = b12 & a1 a0[0] = mem128[ input_1 + 0 ] a0[1] = mem128[ input_1 + 112 ] r12 = b12 & a0 b11 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b11 & a6 r17 ^= r mem256[ ptr + 544 ] = r17 r = b11 & a5 r16 ^= r r = b11 & a4 r15 ^= r r = b11 & a3 r14 ^= r r = b11 & a2 r13 ^= r r = b11 & a1 r12 ^= r r11 = b11 & a0 b10 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b10 & a6 r16 ^= r mem256[ ptr + 512 ] = r16 r = b10 & a5 r15 ^= r r = b10 & a4 r14 ^= r r = b10 & a3 r13 ^= r r = b10 & a2 r12 ^= r r = b10 & a1 r11 ^= r r10 = b10 & a0 b9 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b9 & a6 r15 ^= r mem256[ ptr + 480 ] = r15 r = b9 & a5 r14 ^= r r = b9 & a4 r13 ^= r r = b9 & a3 r12 ^= r r = b9 & a2 r11 ^= r r = b9 & a1 r10 ^= r r9 = b9 & a0 b8 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b8 & a6 r14 ^= r mem256[ ptr + 448 ] = r14 r = b8 & a5 r13 ^= r r = b8 & a4 r12 ^= r r = b8 & a3 r11 ^= r r = b8 & a2 r10 ^= r r = b8 & a1 r9 ^= r r8 = b8 & a0 b7 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b7 & a6 r13 ^= r mem256[ ptr + 416 ] = r13 r = b7 & a5 r12 ^= r r = b7 & a4 r11 ^= r r = b7 & a3 r10 ^= r r = b7 & a2 r9 ^= r r = b7 & a1 r8 ^= r r7 = b7 & a0 b6 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b6 & a6 r12 ^= r mem256[ ptr + 384 ] = r12 r = b6 & a5 r11 ^= r r = b6 & a4 r10 ^= r r = b6 & a3 r9 ^= r r = b6 & a2 r8 ^= r r = b6 & a1 r7 ^= r r6 = b6 & a0 b5 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b5 & a6 r11 ^= r mem256[ ptr + 352 ] = r11 r = b5 & a5 r10 ^= r r = b5 & a4 r9 ^= r r = b5 & a3 r8 ^= r r = b5 & a2 r7 ^= r r = b5 & a1 r6 ^= r r5 = b5 & a0 b4 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b4 & a6 r10 ^= r mem256[ ptr + 320 ] = r10 r = b4 & a5 r9 ^= r r = b4 & a4 r8 ^= r r = b4 & a3 r7 ^= r r = b4 & a2 r6 ^= r r = b4 & a1 r5 ^= r r4 = b4 & a0 b3 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b3 & a6 r9 ^= r mem256[ ptr + 288 ] = r9 r = b3 & a5 r8 ^= r r = b3 & a4 r7 ^= r r = b3 & a3 r6 ^= r r = b3 & a2 r5 ^= r r = b3 & a1 r4 ^= r r3 = b3 & a0 b2 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b2 & a6 r8 ^= r mem256[ ptr + 256 ] = r8 r = b2 & a5 r7 ^= r r = b2 & a4 r6 ^= r r = b2 & a3 r5 ^= r r = b2 & a2 r4 ^= r r = b2 & a1 r3 ^= r r2 = b2 & a0 b1 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b1 & a6 r7 ^= r mem256[ ptr + 224 ] = r7 r = b1 & a5 r6 ^= r r = b1 & a4 r5 ^= r r = b1 & a3 r4 ^= r r = b1 & a2 r3 ^= r r = b1 & a1 r2 ^= r r1 = b1 & a0 b0 = mem128[ input_2 + 0 ] x2 input_2 -= input_3 r = b0 & a6 r6 ^= r mem256[ ptr + 192 ] = r6 r = b0 & a5 r5 ^= r r = b0 & a4 r4 ^= r r = b0 & a3 r3 ^= r r = b0 & a2 r2 ^= r r = b0 & a1 r1 ^= r r0 = b0 & a0 mem256[ ptr + 160 ] = r5 mem256[ ptr + 128 ] = r4 mem256[ ptr + 96 ] = r3 mem256[ ptr + 64 ] = r2 mem256[ ptr + 32 ] = r1 mem256[ ptr + 0 ] = r0 vzeroupper h24 = mem128[ ptr + 560 ] h11 = h24 h12 = h24 h14 = h24 h15 = h24 h23 = mem128[ ptr + 528 ] h10 = h23 h11 = h11 ^ h23 h13 = h23 h14 = h14 ^ h23 h22 = mem128[ ptr + 496 ] h9 = h22 h10 = h10 ^ h22 h12 = h12 ^ h22 h13 = h13 ^ h22 h21 = mem128[ ptr + 464 ] h8 = h21 h9 = h9 ^ h21 h11 = h11 ^ h21 h12 = h12 ^ h21 h20 = mem128[ ptr + 432 ] h7 = h20 h8 = h8 ^ h20 h10 = h10 ^ h20 h11 = h11 ^ h20 h19 = mem128[ ptr + 400 ] h6 = h19 h7 = h7 ^ h19 h9 = h9 ^ h19 h10 = h10 ^ h19 h18 = mem128[ ptr + 368 ] h18 = h18 ^ mem128[ ptr + 576 ] h5 = h18 h6 = h6 ^ h18 h8 = h8 ^ h18 h9 = h9 ^ h18 h17 = mem128[ ptr + 336 ] h17 = h17 ^ mem128[ ptr + 544 ] h4 = h17 h5 = h5 ^ h17 h7 = h7 ^ h17 h8 = h8 ^ h17 h16 = mem128[ ptr + 304 ] h16 = h16 ^ mem128[ ptr + 512 ] h3 = h16 h4 = h4 ^ h16 h6 = h6 ^ h16 h7 = h7 ^ h16 h15 = h15 ^ mem128[ ptr + 272 ] h15 = h15 ^ mem128[ ptr + 480 ] h2 = h15 h3 = h3 ^ h15 h5 = h5 ^ h15 h6 = h6 ^ h15 h14 = h14 ^ mem128[ ptr + 240 ] h14 = h14 ^ mem128[ ptr + 448 ] h1 = h14 h2 = h2 ^ h14 h4 = h4 ^ h14 h5 = h5 ^ h14 h13 = h13 ^ mem128[ ptr + 208 ] h13 = h13 ^ mem128[ ptr + 416 ] h0 = h13 h1 = h1 ^ h13 h3 = h3 ^ h13 h4 = h4 ^ h13 h12 = h12 ^ mem128[ ptr + 384 ] h12 = h12 ^ mem128[ ptr + 176 ] mem128[ input_0 + 192 ] = h12 h11 = h11 ^ mem128[ ptr + 352 ] h11 = h11 ^ mem128[ ptr + 144 ] mem128[ input_0 + 176 ] = h11 h10 = h10 ^ mem128[ ptr + 320 ] h10 = h10 ^ mem128[ ptr + 112 ] mem128[ input_0 + 160 ] = h10 h9 = h9 ^ mem128[ ptr + 288 ] h9 = h9 ^ mem128[ ptr + 80 ] mem128[ input_0 + 144 ] = h9 h8 = h8 ^ mem128[ ptr + 256 ] h8 = h8 ^ mem128[ ptr + 48 ] mem128[ input_0 + 128 ] = h8 h7 = h7 ^ mem128[ ptr + 224 ] h7 = h7 ^ mem128[ ptr + 16 ] mem128[ input_0 + 112 ] = h7 h6 = h6 ^ mem128[ ptr + 192 ] mem128[ input_0 + 96 ] = h6 h5 = h5 ^ mem128[ ptr + 160 ] mem128[ input_0 + 80 ] = h5 h4 = h4 ^ mem128[ ptr + 128 ] mem128[ input_0 + 64 ] = h4 h3 = h3 ^ mem128[ ptr + 96 ] mem128[ input_0 + 48 ] = h3 h2 = h2 ^ mem128[ ptr + 64 ] mem128[ input_0 + 32 ] = h2 h1 = h1 ^ mem128[ ptr + 32 ] mem128[ input_0 + 16 ] = h1 h0 = h0 ^ mem128[ ptr + 0 ] mem128[ input_0 + 0 ] = h0 return