-rw-r--r-- 31045 libmceliece-20240726/crypto_kem/460896/avx/transpose_64x128_sp_asm.q raw
reg128 x0 reg128 x1 reg128 x2 reg128 x3 reg128 x4 reg128 x5 reg128 x6 reg128 x7 reg128 t0 reg128 t1 reg128 v00 reg128 v01 reg128 v10 reg128 v11 reg128 mask0 reg128 mask1 reg128 mask2 reg128 mask3 reg128 mask4 reg128 mask5 enter transpose_64x128_sp_asm mask0 aligned= mem128[ MASK5_0 ] mask1 aligned= mem128[ MASK5_1 ] mask2 aligned= mem128[ MASK4_0 ] mask3 aligned= mem128[ MASK4_1 ] mask4 aligned= mem128[ MASK3_0 ] mask5 aligned= mem128[ MASK3_1 ] x0 = mem128[ input_0 + 0 ] x1 = mem128[ input_0 + 128 ] x2 = mem128[ input_0 + 256 ] x3 = mem128[ input_0 + 384 ] x4 = mem128[ input_0 + 512 ] x5 = mem128[ input_0 + 640 ] x6 = mem128[ input_0 + 768 ] x7 = mem128[ input_0 + 896 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 0 ] = x0 mem128[ input_0 + 128 ] = x1 mem128[ input_0 + 256 ] = x2 mem128[ input_0 + 384 ] = x3 mem128[ input_0 + 512 ] = x4 mem128[ input_0 + 640 ] = x5 mem128[ input_0 + 768 ] = x6 mem128[ input_0 + 896 ] = x7 x0 = mem128[ input_0 + 16 ] x1 = mem128[ input_0 + 144 ] x2 = mem128[ input_0 + 272 ] x3 = mem128[ input_0 + 400 ] x4 = mem128[ input_0 + 528 ] x5 = mem128[ input_0 + 656 ] x6 = mem128[ input_0 + 784 ] x7 = mem128[ input_0 + 912 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 16 ] = x0 mem128[ input_0 + 144 ] = x1 mem128[ input_0 + 272 ] = x2 mem128[ input_0 + 400 ] = x3 mem128[ input_0 + 528 ] = x4 mem128[ input_0 + 656 ] = x5 mem128[ input_0 + 784 ] = x6 mem128[ input_0 + 912 ] = x7 x0 = mem128[ input_0 + 32 ] x1 = mem128[ input_0 + 160 ] x2 = mem128[ input_0 + 288 ] x3 = mem128[ input_0 + 416 ] x4 = mem128[ input_0 + 544 ] x5 = mem128[ input_0 + 672 ] x6 = mem128[ input_0 + 800 ] x7 = mem128[ input_0 + 928 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 32 ] = x0 mem128[ input_0 + 160 ] = x1 mem128[ input_0 + 288 ] = x2 mem128[ input_0 + 416 ] = x3 mem128[ input_0 + 544 ] = x4 mem128[ input_0 + 672 ] = x5 mem128[ input_0 + 800 ] = x6 mem128[ input_0 + 928 ] = x7 x0 = mem128[ input_0 + 48 ] x1 = mem128[ input_0 + 176 ] x2 = mem128[ input_0 + 304 ] x3 = mem128[ input_0 + 432 ] x4 = mem128[ input_0 + 560 ] x5 = mem128[ input_0 + 688 ] x6 = mem128[ input_0 + 816 ] x7 = mem128[ input_0 + 944 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 48 ] = x0 mem128[ input_0 + 176 ] = x1 mem128[ input_0 + 304 ] = x2 mem128[ input_0 + 432 ] = x3 mem128[ input_0 + 560 ] = x4 mem128[ input_0 + 688 ] = x5 mem128[ input_0 + 816 ] = x6 mem128[ input_0 + 944 ] = x7 x0 = mem128[ input_0 + 64 ] x1 = mem128[ input_0 + 192 ] x2 = mem128[ input_0 + 320 ] x3 = mem128[ input_0 + 448 ] x4 = mem128[ input_0 + 576 ] x5 = mem128[ input_0 + 704 ] x6 = mem128[ input_0 + 832 ] x7 = mem128[ input_0 + 960 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 64 ] = x0 mem128[ input_0 + 192 ] = x1 mem128[ input_0 + 320 ] = x2 mem128[ input_0 + 448 ] = x3 mem128[ input_0 + 576 ] = x4 mem128[ input_0 + 704 ] = x5 mem128[ input_0 + 832 ] = x6 mem128[ input_0 + 960 ] = x7 x0 = mem128[ input_0 + 80 ] x1 = mem128[ input_0 + 208 ] x2 = mem128[ input_0 + 336 ] x3 = mem128[ input_0 + 464 ] x4 = mem128[ input_0 + 592 ] x5 = mem128[ input_0 + 720 ] x6 = mem128[ input_0 + 848 ] x7 = mem128[ input_0 + 976 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 80 ] = x0 mem128[ input_0 + 208 ] = x1 mem128[ input_0 + 336 ] = x2 mem128[ input_0 + 464 ] = x3 mem128[ input_0 + 592 ] = x4 mem128[ input_0 + 720 ] = x5 mem128[ input_0 + 848 ] = x6 mem128[ input_0 + 976 ] = x7 x0 = mem128[ input_0 + 96 ] x1 = mem128[ input_0 + 224 ] x2 = mem128[ input_0 + 352 ] x3 = mem128[ input_0 + 480 ] x4 = mem128[ input_0 + 608 ] x5 = mem128[ input_0 + 736 ] x6 = mem128[ input_0 + 864 ] x7 = mem128[ input_0 + 992 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 96 ] = x0 mem128[ input_0 + 224 ] = x1 mem128[ input_0 + 352 ] = x2 mem128[ input_0 + 480 ] = x3 mem128[ input_0 + 608 ] = x4 mem128[ input_0 + 736 ] = x5 mem128[ input_0 + 864 ] = x6 mem128[ input_0 + 992 ] = x7 x0 = mem128[ input_0 + 112 ] x1 = mem128[ input_0 + 240 ] x2 = mem128[ input_0 + 368 ] x3 = mem128[ input_0 + 496 ] x4 = mem128[ input_0 + 624 ] x5 = mem128[ input_0 + 752 ] x6 = mem128[ input_0 + 880 ] x7 = mem128[ input_0 + 1008 ] v00 = x0 & mask0 2x v10 = x4 << 32 2x v01 = x0 unsigned>> 32 v11 = x4 & mask1 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 2x v10 = x5 << 32 2x v01 = x1 unsigned>> 32 v11 = x5 & mask1 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 2x v10 = x6 << 32 2x v01 = x2 unsigned>> 32 v11 = x6 & mask1 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 2x v10 = x7 << 32 2x v01 = x3 unsigned>> 32 v11 = x7 & mask1 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 4x v10 = x2 << 16 4x v01 = x0 unsigned>> 16 v11 = x2 & mask3 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 4x v10 = x3 << 16 4x v01 = x1 unsigned>> 16 v11 = x3 & mask3 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 4x v10 = x6 << 16 4x v01 = x4 unsigned>> 16 v11 = x6 & mask3 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 4x v10 = x7 << 16 4x v01 = x5 unsigned>> 16 v11 = x7 & mask3 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 8x v10 = x1 << 8 8x v01 = x0 unsigned>> 8 v11 = x1 & mask5 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 8x v10 = x3 << 8 8x v01 = x2 unsigned>> 8 v11 = x3 & mask5 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 8x v10 = x5 << 8 8x v01 = x4 unsigned>> 8 v11 = x5 & mask5 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 8x v10 = x7 << 8 8x v01 = x6 unsigned>> 8 v11 = x7 & mask5 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 112 ] = x0 mem128[ input_0 + 240 ] = x1 mem128[ input_0 + 368 ] = x2 mem128[ input_0 + 496 ] = x3 mem128[ input_0 + 624 ] = x4 mem128[ input_0 + 752 ] = x5 mem128[ input_0 + 880 ] = x6 mem128[ input_0 + 1008 ] = x7 mask0 aligned= mem128[ MASK2_0 ] mask1 aligned= mem128[ MASK2_1 ] mask2 aligned= mem128[ MASK1_0 ] mask3 aligned= mem128[ MASK1_1 ] mask4 aligned= mem128[ MASK0_0 ] mask5 aligned= mem128[ MASK0_1 ] x0 = mem128[ input_0 + 0 ] x1 = mem128[ input_0 + 16 ] x2 = mem128[ input_0 + 32 ] x3 = mem128[ input_0 + 48 ] x4 = mem128[ input_0 + 64 ] x5 = mem128[ input_0 + 80 ] x6 = mem128[ input_0 + 96 ] x7 = mem128[ input_0 + 112 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 0 ] = x0 mem128[ input_0 + 16 ] = x1 mem128[ input_0 + 32 ] = x2 mem128[ input_0 + 48 ] = x3 mem128[ input_0 + 64 ] = x4 mem128[ input_0 + 80 ] = x5 mem128[ input_0 + 96 ] = x6 mem128[ input_0 + 112 ] = x7 x0 = mem128[ input_0 + 128 ] x1 = mem128[ input_0 + 144 ] x2 = mem128[ input_0 + 160 ] x3 = mem128[ input_0 + 176 ] x4 = mem128[ input_0 + 192 ] x5 = mem128[ input_0 + 208 ] x6 = mem128[ input_0 + 224 ] x7 = mem128[ input_0 + 240 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 128 ] = x0 mem128[ input_0 + 144 ] = x1 mem128[ input_0 + 160 ] = x2 mem128[ input_0 + 176 ] = x3 mem128[ input_0 + 192 ] = x4 mem128[ input_0 + 208 ] = x5 mem128[ input_0 + 224 ] = x6 mem128[ input_0 + 240 ] = x7 x0 = mem128[ input_0 + 256 ] x1 = mem128[ input_0 + 272 ] x2 = mem128[ input_0 + 288 ] x3 = mem128[ input_0 + 304 ] x4 = mem128[ input_0 + 320 ] x5 = mem128[ input_0 + 336 ] x6 = mem128[ input_0 + 352 ] x7 = mem128[ input_0 + 368 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 256 ] = x0 mem128[ input_0 + 272 ] = x1 mem128[ input_0 + 288 ] = x2 mem128[ input_0 + 304 ] = x3 mem128[ input_0 + 320 ] = x4 mem128[ input_0 + 336 ] = x5 mem128[ input_0 + 352 ] = x6 mem128[ input_0 + 368 ] = x7 x0 = mem128[ input_0 + 384 ] x1 = mem128[ input_0 + 400 ] x2 = mem128[ input_0 + 416 ] x3 = mem128[ input_0 + 432 ] x4 = mem128[ input_0 + 448 ] x5 = mem128[ input_0 + 464 ] x6 = mem128[ input_0 + 480 ] x7 = mem128[ input_0 + 496 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 384 ] = x0 mem128[ input_0 + 400 ] = x1 mem128[ input_0 + 416 ] = x2 mem128[ input_0 + 432 ] = x3 mem128[ input_0 + 448 ] = x4 mem128[ input_0 + 464 ] = x5 mem128[ input_0 + 480 ] = x6 mem128[ input_0 + 496 ] = x7 x0 = mem128[ input_0 + 512 ] x1 = mem128[ input_0 + 528 ] x2 = mem128[ input_0 + 544 ] x3 = mem128[ input_0 + 560 ] x4 = mem128[ input_0 + 576 ] x5 = mem128[ input_0 + 592 ] x6 = mem128[ input_0 + 608 ] x7 = mem128[ input_0 + 624 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 512 ] = x0 mem128[ input_0 + 528 ] = x1 mem128[ input_0 + 544 ] = x2 mem128[ input_0 + 560 ] = x3 mem128[ input_0 + 576 ] = x4 mem128[ input_0 + 592 ] = x5 mem128[ input_0 + 608 ] = x6 mem128[ input_0 + 624 ] = x7 x0 = mem128[ input_0 + 640 ] x1 = mem128[ input_0 + 656 ] x2 = mem128[ input_0 + 672 ] x3 = mem128[ input_0 + 688 ] x4 = mem128[ input_0 + 704 ] x5 = mem128[ input_0 + 720 ] x6 = mem128[ input_0 + 736 ] x7 = mem128[ input_0 + 752 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 640 ] = x0 mem128[ input_0 + 656 ] = x1 mem128[ input_0 + 672 ] = x2 mem128[ input_0 + 688 ] = x3 mem128[ input_0 + 704 ] = x4 mem128[ input_0 + 720 ] = x5 mem128[ input_0 + 736 ] = x6 mem128[ input_0 + 752 ] = x7 x0 = mem128[ input_0 + 768 ] x1 = mem128[ input_0 + 784 ] x2 = mem128[ input_0 + 800 ] x3 = mem128[ input_0 + 816 ] x4 = mem128[ input_0 + 832 ] x5 = mem128[ input_0 + 848 ] x6 = mem128[ input_0 + 864 ] x7 = mem128[ input_0 + 880 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 768 ] = x0 mem128[ input_0 + 784 ] = x1 mem128[ input_0 + 800 ] = x2 mem128[ input_0 + 816 ] = x3 mem128[ input_0 + 832 ] = x4 mem128[ input_0 + 848 ] = x5 mem128[ input_0 + 864 ] = x6 mem128[ input_0 + 880 ] = x7 x0 = mem128[ input_0 + 896 ] x1 = mem128[ input_0 + 912 ] x2 = mem128[ input_0 + 928 ] x3 = mem128[ input_0 + 944 ] x4 = mem128[ input_0 + 960 ] x5 = mem128[ input_0 + 976 ] x6 = mem128[ input_0 + 992 ] x7 = mem128[ input_0 + 1008 ] v00 = x0 & mask0 v10 = x4 & mask0 2x v10 <<= 4 v01 = x0 & mask1 v11 = x4 & mask1 2x v01 unsigned>>= 4 x0 = v00 | v10 x4 = v01 | v11 v00 = x1 & mask0 v10 = x5 & mask0 2x v10 <<= 4 v01 = x1 & mask1 v11 = x5 & mask1 2x v01 unsigned>>= 4 x1 = v00 | v10 x5 = v01 | v11 v00 = x2 & mask0 v10 = x6 & mask0 2x v10 <<= 4 v01 = x2 & mask1 v11 = x6 & mask1 2x v01 unsigned>>= 4 x2 = v00 | v10 x6 = v01 | v11 v00 = x3 & mask0 v10 = x7 & mask0 2x v10 <<= 4 v01 = x3 & mask1 v11 = x7 & mask1 2x v01 unsigned>>= 4 x3 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask2 v10 = x2 & mask2 2x v10 <<= 2 v01 = x0 & mask3 v11 = x2 & mask3 2x v01 unsigned>>= 2 x0 = v00 | v10 x2 = v01 | v11 v00 = x1 & mask2 v10 = x3 & mask2 2x v10 <<= 2 v01 = x1 & mask3 v11 = x3 & mask3 2x v01 unsigned>>= 2 x1 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask2 v10 = x6 & mask2 2x v10 <<= 2 v01 = x4 & mask3 v11 = x6 & mask3 2x v01 unsigned>>= 2 x4 = v00 | v10 x6 = v01 | v11 v00 = x5 & mask2 v10 = x7 & mask2 2x v10 <<= 2 v01 = x5 & mask3 v11 = x7 & mask3 2x v01 unsigned>>= 2 x5 = v00 | v10 x7 = v01 | v11 v00 = x0 & mask4 v10 = x1 & mask4 2x v10 <<= 1 v01 = x0 & mask5 v11 = x1 & mask5 2x v01 unsigned>>= 1 x0 = v00 | v10 x1 = v01 | v11 v00 = x2 & mask4 v10 = x3 & mask4 2x v10 <<= 1 v01 = x2 & mask5 v11 = x3 & mask5 2x v01 unsigned>>= 1 x2 = v00 | v10 x3 = v01 | v11 v00 = x4 & mask4 v10 = x5 & mask4 2x v10 <<= 1 v01 = x4 & mask5 v11 = x5 & mask5 2x v01 unsigned>>= 1 x4 = v00 | v10 x5 = v01 | v11 v00 = x6 & mask4 v10 = x7 & mask4 2x v10 <<= 1 v01 = x6 & mask5 v11 = x7 & mask5 2x v01 unsigned>>= 1 x6 = v00 | v10 x7 = v01 | v11 mem128[ input_0 + 896 ] = x0 mem128[ input_0 + 912 ] = x1 mem128[ input_0 + 928 ] = x2 mem128[ input_0 + 944 ] = x3 mem128[ input_0 + 960 ] = x4 mem128[ input_0 + 976 ] = x5 mem128[ input_0 + 992 ] = x6 mem128[ input_0 + 1008 ] = x7 return