// 20240504 djb: add note.GNU-stack // 20221231 djb: port hidden to macos; tnx thom wiggers // 20221230 djb: add linker line // linker define vec_mul_sp_asm #include "crypto_asm_hidden.h" #define vec_mul_sp_asm CRYPTO_SHARED_NAMESPACE(vec_mul_sp_asm) #define _vec_mul_sp_asm _CRYPTO_SHARED_NAMESPACE(vec_mul_sp_asm) # qhasm: int64 input_0 # qhasm: int64 input_1 # qhasm: int64 input_2 # qhasm: int64 input_3 # qhasm: int64 input_4 # qhasm: int64 input_5 # qhasm: stack64 input_6 # qhasm: stack64 input_7 # qhasm: int64 caller_r11 # qhasm: int64 caller_r12 # qhasm: int64 caller_r13 # qhasm: int64 caller_r14 # qhasm: int64 caller_r15 # qhasm: int64 caller_rbx # qhasm: int64 caller_rbp # qhasm: reg256 s0 # qhasm: reg256 s1 # qhasm: reg256 s2 # qhasm: reg256 s3 # qhasm: reg256 s4 # qhasm: reg256 s5 # qhasm: reg256 b0 # qhasm: reg256 b1 # qhasm: reg256 b2 # qhasm: reg256 b3 # qhasm: reg256 b4 # qhasm: reg256 b5 # qhasm: reg256 a0 # qhasm: reg256 a1 # qhasm: reg256 a2 # qhasm: reg256 a3 # qhasm: reg256 a4 # qhasm: reg256 a5 # qhasm: reg256 r0 # qhasm: reg256 r1 # qhasm: reg256 r2 # qhasm: reg256 r3 # qhasm: reg256 r4 # qhasm: reg256 r5 # qhasm: reg256 r6 # qhasm: reg256 r7 # qhasm: reg256 r8 # qhasm: reg256 r9 # qhasm: reg256 r10 # qhasm: reg256 r11 # qhasm: reg256 r12 # qhasm: reg256 r13 # qhasm: reg256 r14 # qhasm: reg256 r15 # qhasm: reg256 r16 # qhasm: reg256 r17 # qhasm: reg256 r18 # qhasm: reg256 r19 # qhasm: reg256 r20 # qhasm: reg256 r21 # qhasm: reg256 r22 # qhasm: reg256 r # qhasm: int64 h0 # qhasm: int64 h1 # qhasm: int64 h2 # qhasm: int64 h3 # qhasm: int64 h4 # qhasm: int64 h5 # qhasm: int64 h6 # qhasm: int64 h7 # qhasm: int64 h8 # qhasm: int64 h9 # qhasm: int64 h10 # qhasm: int64 h11 # qhasm: int64 h12 # qhasm: int64 h13 # qhasm: int64 h14 # qhasm: int64 h15 # qhasm: int64 h16 # qhasm: int64 h17 # qhasm: int64 h18 # qhasm: int64 h19 # qhasm: int64 h20 # qhasm: int64 h21 # qhasm: int64 h22 # qhasm: stack4864 buf # qhasm: int64 ptr # qhasm: int64 tmp # qhasm: stack64 r11_stack # qhasm: stack64 r12_stack # qhasm: stack64 r13_stack # qhasm: stack64 r14_stack # qhasm: stack64 r15_stack # qhasm: stack64 rbx_stack # qhasm: stack64 rbp_stack # qhasm: enter vec_mul_sp_asm .p2align 5 ASM_HIDDEN _vec_mul_sp_asm ASM_HIDDEN vec_mul_sp_asm .global _vec_mul_sp_asm .global vec_mul_sp_asm _vec_mul_sp_asm: vec_mul_sp_asm: mov %rsp,%r11 and $31,%r11 add $672,%r11 sub %r11,%rsp # qhasm: r11_stack = caller_r11 # asm 1: movq r11_stack=stack64#1 # asm 2: movq r11_stack=608(%rsp) movq %r11,608(%rsp) # qhasm: r12_stack = caller_r12 # asm 1: movq r12_stack=stack64#2 # asm 2: movq r12_stack=616(%rsp) movq %r12,616(%rsp) # qhasm: r13_stack = caller_r13 # asm 1: movq r13_stack=stack64#3 # asm 2: movq r13_stack=624(%rsp) movq %r13,624(%rsp) # qhasm: r14_stack = caller_r14 # asm 1: movq r14_stack=stack64#4 # asm 2: movq r14_stack=632(%rsp) movq %r14,632(%rsp) # qhasm: r15_stack = caller_r15 # asm 1: movq r15_stack=stack64#5 # asm 2: movq r15_stack=640(%rsp) movq %r15,640(%rsp) # qhasm: rbx_stack = caller_rbx # asm 1: movq rbx_stack=stack64#6 # asm 2: movq rbx_stack=648(%rsp) movq %rbx,648(%rsp) # qhasm: ptr = &buf # asm 1: leaq ptr=int64#4 # asm 2: leaq ptr=%rcx leaq 0(%rsp),%rcx # qhasm: s0 = mem256[ input_1 + 0 ] # asm 1: vmovupd 0(s0=reg256#1 # asm 2: vmovupd 0(s0=%ymm0 vmovupd 0(%rsi),%ymm0 # qhasm: s1 = mem256[ input_1 + 32 ] # asm 1: vmovupd 32(s1=reg256#2 # asm 2: vmovupd 32(s1=%ymm1 vmovupd 32(%rsi),%ymm1 # qhasm: s2 = mem256[ input_1 + 64 ] # asm 1: vmovupd 64(s2=reg256#3 # asm 2: vmovupd 64(s2=%ymm2 vmovupd 64(%rsi),%ymm2 # qhasm: a5[0,1,2,3] = s2[2,2,3,3] # asm 1: vpermq $0xfa,a5=reg256#4 # asm 2: vpermq $0xfa,a5=%ymm3 vpermq $0xfa,%ymm2,%ymm3 # qhasm: r = mem256[ input_2 + 160 ] # asm 1: vmovupd 160(r=reg256#5 # asm 2: vmovupd 160(r=%ymm4 vmovupd 160(%rdx),%ymm4 # qhasm: b5[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b5=reg256#5 # asm 2: vpermq $0xdd,b5=%ymm4 vpermq $0xdd,%ymm4,%ymm4 # qhasm: r10 = a5 & b5 # asm 1: vpand r10=reg256#6 # asm 2: vpand r10=%ymm5 vpand %ymm3,%ymm4,%ymm5 # qhasm: mem256[ ptr + 320 ] = r10 # asm 1: vmovupd r=reg256#6 # asm 2: vmovupd 128(r=%ymm5 vmovupd 128(%rdx),%ymm5 # qhasm: b4[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b4=reg256#6 # asm 2: vpermq $0xdd,b4=%ymm5 vpermq $0xdd,%ymm5,%ymm5 # qhasm: r9 = a5 & b4 # asm 1: vpand r9=reg256#7 # asm 2: vpand r9=%ymm6 vpand %ymm3,%ymm5,%ymm6 # qhasm: r = mem256[ input_2 + 96 ] # asm 1: vmovupd 96(r=reg256#8 # asm 2: vmovupd 96(r=%ymm7 vmovupd 96(%rdx),%ymm7 # qhasm: b3[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b3=reg256#8 # asm 2: vpermq $0xdd,b3=%ymm7 vpermq $0xdd,%ymm7,%ymm7 # qhasm: r8 = a5 & b3 # asm 1: vpand r8=reg256#9 # asm 2: vpand r8=%ymm8 vpand %ymm3,%ymm7,%ymm8 # qhasm: r = mem256[ input_2 + 64 ] # asm 1: vmovupd 64(r=reg256#10 # asm 2: vmovupd 64(r=%ymm9 vmovupd 64(%rdx),%ymm9 # qhasm: b2[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b2=reg256#10 # asm 2: vpermq $0xdd,b2=%ymm9 vpermq $0xdd,%ymm9,%ymm9 # qhasm: r7 = a5 & b2 # asm 1: vpand r7=reg256#11 # asm 2: vpand r7=%ymm10 vpand %ymm3,%ymm9,%ymm10 # qhasm: r = mem256[ input_2 + 32 ] # asm 1: vmovupd 32(r=reg256#12 # asm 2: vmovupd 32(r=%ymm11 vmovupd 32(%rdx),%ymm11 # qhasm: b1[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b1=reg256#12 # asm 2: vpermq $0xdd,b1=%ymm11 vpermq $0xdd,%ymm11,%ymm11 # qhasm: r6 = a5 & b1 # asm 1: vpand r6=reg256#13 # asm 2: vpand r6=%ymm12 vpand %ymm3,%ymm11,%ymm12 # qhasm: r = mem256[ input_2 + 0 ] # asm 1: vmovupd 0(r=reg256#14 # asm 2: vmovupd 0(r=%ymm13 vmovupd 0(%rdx),%ymm13 # qhasm: b0[0,1,2,3] = r[1,3,1,3] # asm 1: vpermq $0xdd,b0=reg256#14 # asm 2: vpermq $0xdd,b0=%ymm13 vpermq $0xdd,%ymm13,%ymm13 # qhasm: r5 = a5 & b0 # asm 1: vpand r5=reg256#4 # asm 2: vpand r5=%ymm3 vpand %ymm3,%ymm13,%ymm3 # qhasm: a4[0,1,2,3] = s2[0,0,1,1] # asm 1: vpermq $0x50,a4=reg256#3 # asm 2: vpermq $0x50,a4=%ymm2 vpermq $0x50,%ymm2,%ymm2 # qhasm: r = a4 & b5 # asm 1: vpand r=reg256#15 # asm 2: vpand r=%ymm14 vpand %ymm2,%ymm4,%ymm14 # qhasm: r9 ^= r # asm 1: vpxor r=reg256#7 # asm 2: vpand r=%ymm6 vpand %ymm2,%ymm5,%ymm6 # qhasm: r8 ^= r # asm 1: vpxor r=reg256#7 # asm 2: vpand r=%ymm6 vpand %ymm2,%ymm7,%ymm6 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#7 # asm 2: vpand r=%ymm6 vpand %ymm2,%ymm9,%ymm6 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#7 # asm 2: vpand r=%ymm6 vpand %ymm2,%ymm11,%ymm6 # qhasm: r5 ^= r # asm 1: vpxor r4=reg256#3 # asm 2: vpand r4=%ymm2 vpand %ymm2,%ymm13,%ymm2 # qhasm: a3[0,1,2,3] = s1[2,2,3,3] # asm 1: vpermq $0xfa,a3=reg256#7 # asm 2: vpermq $0xfa,a3=%ymm6 vpermq $0xfa,%ymm1,%ymm6 # qhasm: r = a3 & b5 # asm 1: vpand r=reg256#15 # asm 2: vpand r=%ymm14 vpand %ymm6,%ymm4,%ymm14 # qhasm: r8 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm6,%ymm5,%ymm8 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm6,%ymm7,%ymm8 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm6,%ymm9,%ymm8 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm6,%ymm11,%ymm8 # qhasm: r4 ^= r # asm 1: vpxor r3=reg256#7 # asm 2: vpand r3=%ymm6 vpand %ymm6,%ymm13,%ymm6 # qhasm: a2[0,1,2,3] = s1[0,0,1,1] # asm 1: vpermq $0x50,a2=reg256#2 # asm 2: vpermq $0x50,a2=%ymm1 vpermq $0x50,%ymm1,%ymm1 # qhasm: r = a2 & b5 # asm 1: vpand r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm1,%ymm4,%ymm8 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm1,%ymm5,%ymm8 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm1,%ymm7,%ymm8 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm1,%ymm9,%ymm8 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand %ymm1,%ymm11,%ymm8 # qhasm: r3 ^= r # asm 1: vpxor r2=reg256#2 # asm 2: vpand r2=%ymm1 vpand %ymm1,%ymm13,%ymm1 # qhasm: a1[0,1,2,3] = s0[2,2,3,3] # asm 1: vpermq $0xfa,a1=reg256#9 # asm 2: vpermq $0xfa,a1=%ymm8 vpermq $0xfa,%ymm0,%ymm8 # qhasm: r = a1 & b5 # asm 1: vpand r=reg256#11 # asm 2: vpand r=%ymm10 vpand %ymm8,%ymm4,%ymm10 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand %ymm8,%ymm5,%ymm10 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand %ymm8,%ymm7,%ymm10 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand %ymm8,%ymm9,%ymm10 # qhasm: r3 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand %ymm8,%ymm11,%ymm10 # qhasm: r2 ^= r # asm 1: vpxor r1=reg256#9 # asm 2: vpand r1=%ymm8 vpand %ymm8,%ymm13,%ymm8 # qhasm: a0[0,1,2,3] = s0[0,0,1,1] # asm 1: vpermq $0x50,a0=reg256#1 # asm 2: vpermq $0x50,a0=%ymm0 vpermq $0x50,%ymm0,%ymm0 # qhasm: r = a0 & b5 # asm 1: vpand r=reg256#5 # asm 2: vpand r=%ymm4 vpand %ymm0,%ymm4,%ymm4 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#4 # asm 2: vpand r=%ymm3 vpand %ymm0,%ymm5,%ymm3 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#4 # asm 2: vpand r=%ymm3 vpand %ymm0,%ymm7,%ymm3 # qhasm: r3 ^= r # asm 1: vpxor r=reg256#4 # asm 2: vpand r=%ymm3 vpand %ymm0,%ymm9,%ymm3 # qhasm: r2 ^= r # asm 1: vpxor r=reg256#4 # asm 2: vpand r=%ymm3 vpand %ymm0,%ymm11,%ymm3 # qhasm: r1 ^= r # asm 1: vpxor r0=reg256#1 # asm 2: vpand r0=%ymm0 vpand %ymm0,%ymm13,%ymm0 # qhasm: mem256[ ptr + 128 ] = r4 # asm 1: vmovupd h22=int64#2 # asm 2: movq 344(h22=%rsi movq 344(%rcx),%rsi # qhasm: h13 = h22 # asm 1: mov h13=int64#3 # asm 2: mov h13=%rdx mov %rsi,%rdx # qhasm: h10 = h22 # asm 1: mov h10=int64#2 # asm 2: mov h10=%rsi mov %rsi,%rsi # qhasm: h21 = mem64[ ptr + 336 ] # asm 1: movq 336(h21=int64#5 # asm 2: movq 336(h21=%r8 movq 336(%rcx),%r8 # qhasm: h21 ^= *(uint64 *) ( ptr + 328 ) # asm 1: xorq 328(h12=int64#6 # asm 2: mov h12=%r9 mov %r8,%r9 # qhasm: h9 = h21 # asm 1: mov h9=int64#5 # asm 2: mov h9=%r8 mov %r8,%r8 # qhasm: h20 = mem64[ ptr + 312 ] # asm 1: movq 312(h20=int64#7 # asm 2: movq 312(h20=%rax movq 312(%rcx),%rax # qhasm: h20 ^= *(uint64 *) ( ptr + 320 ) # asm 1: xorq 320(h11=int64#8 # asm 2: mov h11=%r10 mov %rax,%r10 # qhasm: h8 = h20 # asm 1: mov h8=int64#7 # asm 2: mov h8=%rax mov %rax,%rax # qhasm: h19 = mem64[ ptr + 304 ] # asm 1: movq 304(h19=int64#9 # asm 2: movq 304(h19=%r11 movq 304(%rcx),%r11 # qhasm: h19 ^= *(uint64 *) ( ptr + 296 ) # asm 1: xorq 296(h7=int64#9 # asm 2: mov h7=%r11 mov %r11,%r11 # qhasm: h18 = mem64[ ptr + 280 ] # asm 1: movq 280(h18=int64#10 # asm 2: movq 280(h18=%r12 movq 280(%rcx),%r12 # qhasm: h18 ^= *(uint64 *) ( ptr + 288 ) # asm 1: xorq 288(h6=int64#10 # asm 2: mov h6=%r12 mov %r12,%r12 # qhasm: h17 = mem64[ ptr + 272 ] # asm 1: movq 272(h17=int64#11 # asm 2: movq 272(h17=%r13 movq 272(%rcx),%r13 # qhasm: h17 ^= *(uint64 *) ( ptr + 264 ) # asm 1: xorq 264(h5=int64#11 # asm 2: mov h5=%r13 mov %r13,%r13 # qhasm: h16 = mem64[ ptr + 248 ] # asm 1: movq 248(h16=int64#12 # asm 2: movq 248(h16=%r14 movq 248(%rcx),%r14 # qhasm: h16 ^= *(uint64 *) ( ptr + 256 ) # asm 1: xorq 256(h4=int64#12 # asm 2: mov h4=%r14 mov %r14,%r14 # qhasm: h15 = mem64[ ptr + 240 ] # asm 1: movq 240(h15=int64#13 # asm 2: movq 240(h15=%r15 movq 240(%rcx),%r15 # qhasm: h15 ^= *(uint64 *) ( ptr + 232 ) # asm 1: xorq 232(h3=int64#13 # asm 2: mov h3=%r15 mov %r15,%r15 # qhasm: h14 = mem64[ ptr + 216 ] # asm 1: movq 216(h14=int64#14 # asm 2: movq 216(h14=%rbx movq 216(%rcx),%rbx # qhasm: h14 ^= *(uint64 *) ( ptr + 224 ) # asm 1: xorq 224(h2=int64#14 # asm 2: mov h2=%rbx mov %rbx,%rbx # qhasm: h13 ^= *(uint64 *) ( ptr + 208 ) # asm 1: xorq 208(h1=int64#3 # asm 2: mov h1=%rdx mov %rdx,%rdx # qhasm: h12 ^= *(uint64 *) ( ptr + 184 ) # asm 1: xorq 184(h0=int64#6 # asm 2: mov h0=%r9 mov %r9,%r9 # qhasm: h11 ^= *(uint64 *) ( ptr + 176 ) # asm 1: xorq 176(caller_r11=int64#9 # asm 2: movq caller_r11=%r11 movq 608(%rsp),%r11 # qhasm: caller_r12 = r12_stack # asm 1: movq caller_r12=int64#10 # asm 2: movq caller_r12=%r12 movq 616(%rsp),%r12 # qhasm: caller_r13 = r13_stack # asm 1: movq caller_r13=int64#11 # asm 2: movq caller_r13=%r13 movq 624(%rsp),%r13 # qhasm: caller_r14 = r14_stack # asm 1: movq caller_r14=int64#12 # asm 2: movq caller_r14=%r14 movq 632(%rsp),%r14 # qhasm: caller_r15 = r15_stack # asm 1: movq caller_r15=int64#13 # asm 2: movq caller_r15=%r15 movq 640(%rsp),%r15 # qhasm: caller_rbx = rbx_stack # asm 1: movq caller_rbx=int64#14 # asm 2: movq caller_rbx=%rbx movq 648(%rsp),%rbx # qhasm: return add %r11,%rsp ret .section .note.GNU-stack,"",@progbits