-rw-r--r-- 6241 libmceliece-20240812/crypto_kem/460896/avx/vec128_mul_asm.q raw
reg256 b0
reg256 b1
reg256 b2
reg256 b3
reg256 b4
reg256 b5
reg256 b6
reg256 b7
reg256 b8
reg256 b9
reg256 b10
reg256 b11
reg256 b12
reg256 a0
reg256 a1
reg256 a2
reg256 a3
reg256 a4
reg256 a5
reg256 a6
reg256 r0
reg256 r1
reg256 r2
reg256 r3
reg256 r4
reg256 r5
reg256 r6
reg256 r7
reg256 r8
reg256 r9
reg256 r10
reg256 r11
reg256 r12
reg256 r13
reg256 r14
reg256 r15
reg256 r16
reg256 r17
reg256 r18
reg256 r19
reg256 r20
reg256 r21
reg256 r22
reg256 r23
reg256 r24
reg256 r
reg128 h0
reg128 h1
reg128 h2
reg128 h3
reg128 h4
reg128 h5
reg128 h6
reg128 h7
reg128 h8
reg128 h9
reg128 h10
reg128 h11
reg128 h12
reg128 h13
reg128 h14
reg128 h15
reg128 h16
reg128 h17
reg128 h18
reg128 h19
reg128 h20
reg128 h21
reg128 h22
reg128 h23
reg128 h24
stack4864 buf
int64 ptr
int64 tmp
enter vec128_mul_asm
ptr = &buf
tmp = input_3
tmp *= 12
input_2 += tmp
b12 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
a6 = a6 ^ a6
a6[0] = mem128[ input_1 + 96 ]
r18 = b12 & a6
mem256[ ptr + 576 ] = r18
a5[0] = mem128[ input_1 + 80 ]
a5[1] = mem128[ input_1 + 192 ]
r17 = b12 & a5
a4[0] = mem128[ input_1 + 64 ]
a4[1] = mem128[ input_1 + 176 ]
r16 = b12 & a4
a3[0] = mem128[ input_1 + 48 ]
a3[1] = mem128[ input_1 + 160 ]
r15 = b12 & a3
a2[0] = mem128[ input_1 + 32 ]
a2[1] = mem128[ input_1 + 144 ]
r14 = b12 & a2
a1[0] = mem128[ input_1 + 16 ]
a1[1] = mem128[ input_1 + 128 ]
r13 = b12 & a1
a0[0] = mem128[ input_1 + 0 ]
a0[1] = mem128[ input_1 + 112 ]
r12 = b12 & a0
b11 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b11 & a6
r17 ^= r
mem256[ ptr + 544 ] = r17
r = b11 & a5
r16 ^= r
r = b11 & a4
r15 ^= r
r = b11 & a3
r14 ^= r
r = b11 & a2
r13 ^= r
r = b11 & a1
r12 ^= r
r11 = b11 & a0
b10 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b10 & a6
r16 ^= r
mem256[ ptr + 512 ] = r16
r = b10 & a5
r15 ^= r
r = b10 & a4
r14 ^= r
r = b10 & a3
r13 ^= r
r = b10 & a2
r12 ^= r
r = b10 & a1
r11 ^= r
r10 = b10 & a0
b9 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b9 & a6
r15 ^= r
mem256[ ptr + 480 ] = r15
r = b9 & a5
r14 ^= r
r = b9 & a4
r13 ^= r
r = b9 & a3
r12 ^= r
r = b9 & a2
r11 ^= r
r = b9 & a1
r10 ^= r
r9 = b9 & a0
b8 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b8 & a6
r14 ^= r
mem256[ ptr + 448 ] = r14
r = b8 & a5
r13 ^= r
r = b8 & a4
r12 ^= r
r = b8 & a3
r11 ^= r
r = b8 & a2
r10 ^= r
r = b8 & a1
r9 ^= r
r8 = b8 & a0
b7 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b7 & a6
r13 ^= r
mem256[ ptr + 416 ] = r13
r = b7 & a5
r12 ^= r
r = b7 & a4
r11 ^= r
r = b7 & a3
r10 ^= r
r = b7 & a2
r9 ^= r
r = b7 & a1
r8 ^= r
r7 = b7 & a0
b6 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b6 & a6
r12 ^= r
mem256[ ptr + 384 ] = r12
r = b6 & a5
r11 ^= r
r = b6 & a4
r10 ^= r
r = b6 & a3
r9 ^= r
r = b6 & a2
r8 ^= r
r = b6 & a1
r7 ^= r
r6 = b6 & a0
b5 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b5 & a6
r11 ^= r
mem256[ ptr + 352 ] = r11
r = b5 & a5
r10 ^= r
r = b5 & a4
r9 ^= r
r = b5 & a3
r8 ^= r
r = b5 & a2
r7 ^= r
r = b5 & a1
r6 ^= r
r5 = b5 & a0
b4 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b4 & a6
r10 ^= r
mem256[ ptr + 320 ] = r10
r = b4 & a5
r9 ^= r
r = b4 & a4
r8 ^= r
r = b4 & a3
r7 ^= r
r = b4 & a2
r6 ^= r
r = b4 & a1
r5 ^= r
r4 = b4 & a0
b3 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b3 & a6
r9 ^= r
mem256[ ptr + 288 ] = r9
r = b3 & a5
r8 ^= r
r = b3 & a4
r7 ^= r
r = b3 & a3
r6 ^= r
r = b3 & a2
r5 ^= r
r = b3 & a1
r4 ^= r
r3 = b3 & a0
b2 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b2 & a6
r8 ^= r
mem256[ ptr + 256 ] = r8
r = b2 & a5
r7 ^= r
r = b2 & a4
r6 ^= r
r = b2 & a3
r5 ^= r
r = b2 & a2
r4 ^= r
r = b2 & a1
r3 ^= r
r2 = b2 & a0
b1 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b1 & a6
r7 ^= r
mem256[ ptr + 224 ] = r7
r = b1 & a5
r6 ^= r
r = b1 & a4
r5 ^= r
r = b1 & a3
r4 ^= r
r = b1 & a2
r3 ^= r
r = b1 & a1
r2 ^= r
r1 = b1 & a0
b0 = mem128[ input_2 + 0 ] x2
input_2 -= input_3
r = b0 & a6
r6 ^= r
mem256[ ptr + 192 ] = r6
r = b0 & a5
r5 ^= r
r = b0 & a4
r4 ^= r
r = b0 & a3
r3 ^= r
r = b0 & a2
r2 ^= r
r = b0 & a1
r1 ^= r
r0 = b0 & a0
mem256[ ptr + 160 ] = r5
mem256[ ptr + 128 ] = r4
mem256[ ptr + 96 ] = r3
mem256[ ptr + 64 ] = r2
mem256[ ptr + 32 ] = r1
mem256[ ptr + 0 ] = r0
vzeroupper
h24 = mem128[ ptr + 560 ]
h11 = h24
h12 = h24
h14 = h24
h15 = h24
h23 = mem128[ ptr + 528 ]
h10 = h23
h11 = h11 ^ h23
h13 = h23
h14 = h14 ^ h23
h22 = mem128[ ptr + 496 ]
h9 = h22
h10 = h10 ^ h22
h12 = h12 ^ h22
h13 = h13 ^ h22
h21 = mem128[ ptr + 464 ]
h8 = h21
h9 = h9 ^ h21
h11 = h11 ^ h21
h12 = h12 ^ h21
h20 = mem128[ ptr + 432 ]
h7 = h20
h8 = h8 ^ h20
h10 = h10 ^ h20
h11 = h11 ^ h20
h19 = mem128[ ptr + 400 ]
h6 = h19
h7 = h7 ^ h19
h9 = h9 ^ h19
h10 = h10 ^ h19
h18 = mem128[ ptr + 368 ]
h18 = h18 ^ mem128[ ptr + 576 ]
h5 = h18
h6 = h6 ^ h18
h8 = h8 ^ h18
h9 = h9 ^ h18
h17 = mem128[ ptr + 336 ]
h17 = h17 ^ mem128[ ptr + 544 ]
h4 = h17
h5 = h5 ^ h17
h7 = h7 ^ h17
h8 = h8 ^ h17
h16 = mem128[ ptr + 304 ]
h16 = h16 ^ mem128[ ptr + 512 ]
h3 = h16
h4 = h4 ^ h16
h6 = h6 ^ h16
h7 = h7 ^ h16
h15 = h15 ^ mem128[ ptr + 272 ]
h15 = h15 ^ mem128[ ptr + 480 ]
h2 = h15
h3 = h3 ^ h15
h5 = h5 ^ h15
h6 = h6 ^ h15
h14 = h14 ^ mem128[ ptr + 240 ]
h14 = h14 ^ mem128[ ptr + 448 ]
h1 = h14
h2 = h2 ^ h14
h4 = h4 ^ h14
h5 = h5 ^ h14
h13 = h13 ^ mem128[ ptr + 208 ]
h13 = h13 ^ mem128[ ptr + 416 ]
h0 = h13
h1 = h1 ^ h13
h3 = h3 ^ h13
h4 = h4 ^ h13
h12 = h12 ^ mem128[ ptr + 384 ]
h12 = h12 ^ mem128[ ptr + 176 ]
mem128[ input_0 + 192 ] = h12
h11 = h11 ^ mem128[ ptr + 352 ]
h11 = h11 ^ mem128[ ptr + 144 ]
mem128[ input_0 + 176 ] = h11
h10 = h10 ^ mem128[ ptr + 320 ]
h10 = h10 ^ mem128[ ptr + 112 ]
mem128[ input_0 + 160 ] = h10
h9 = h9 ^ mem128[ ptr + 288 ]
h9 = h9 ^ mem128[ ptr + 80 ]
mem128[ input_0 + 144 ] = h9
h8 = h8 ^ mem128[ ptr + 256 ]
h8 = h8 ^ mem128[ ptr + 48 ]
mem128[ input_0 + 128 ] = h8
h7 = h7 ^ mem128[ ptr + 224 ]
h7 = h7 ^ mem128[ ptr + 16 ]
mem128[ input_0 + 112 ] = h7
h6 = h6 ^ mem128[ ptr + 192 ]
mem128[ input_0 + 96 ] = h6
h5 = h5 ^ mem128[ ptr + 160 ]
mem128[ input_0 + 80 ] = h5
h4 = h4 ^ mem128[ ptr + 128 ]
mem128[ input_0 + 64 ] = h4
h3 = h3 ^ mem128[ ptr + 96 ]
mem128[ input_0 + 48 ] = h3
h2 = h2 ^ mem128[ ptr + 64 ]
mem128[ input_0 + 32 ] = h2
h1 = h1 ^ mem128[ ptr + 32 ]
mem128[ input_0 + 16 ] = h1
h0 = h0 ^ mem128[ ptr + 0 ]
mem128[ input_0 + 0 ] = h0
return