blob: a6f5e07d9ceb7979d726b53e5e684c2deae3a89a [file] [log] [blame]
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
chacha20_poly1305_constants:
.align 64
.chacha20_consts:
.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
.rol8:
.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
.rol16:
.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
.avx2_init:
.long 0,0,0,0
.sse_inc:
.long 1,0,0,0
.avx2_inc:
.long 2,0,0,0,2,0,0,0
.clamp:
.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
.align 16
.and_masks:
.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
.type poly_hash_ad_internal,@function
.align 64
poly_hash_ad_internal:
.cfi_startproc
xorq %r10,%r10
xorq %r11,%r11
xorq %r12,%r12
cmpq $13,%r8
jne hash_ad_loop
poly_fast_tls_ad:
movq (%rcx),%r10
movq 5(%rcx),%r11
shrq $24,%r11
movq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
.byte 0xf3,0xc3
hash_ad_loop:
cmpq $16,%r8
jb hash_ad_tail
addq 0(%rcx),%r10
adcq 8+0(%rcx),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rcx),%rcx
subq $16,%r8
jmp hash_ad_loop
hash_ad_tail:
cmpq $0,%r8
je 1f
xorq %r13,%r13
xorq %r14,%r14
xorq %r15,%r15
addq %r8,%rcx
hash_ad_tail_loop:
shldq $8,%r13,%r14
shlq $8,%r13
movzbq -1(%rcx),%r15
xorq %r15,%r13
decq %rcx
decq %r8
jne hash_ad_tail_loop
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
1:
.byte 0xf3,0xc3
.cfi_endproc
.size poly_hash_ad_internal, .-poly_hash_ad_internal
.globl chacha20_poly1305_open
.hidden chacha20_poly1305_open
.type chacha20_poly1305_open,@function
.align 64
chacha20_poly1305_open:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
pushq %rbx
.cfi_adjust_cfa_offset 8
pushq %r12
.cfi_adjust_cfa_offset 8
pushq %r13
.cfi_adjust_cfa_offset 8
pushq %r14
.cfi_adjust_cfa_offset 8
pushq %r15
.cfi_adjust_cfa_offset 8
pushq %r9
.cfi_adjust_cfa_offset 8
subq $288 + 32,%rsp
.cfi_adjust_cfa_offset 288 + 32
.cfi_offset rbp, -16
.cfi_offset rbx, -24
.cfi_offset r12, -32
.cfi_offset r13, -40
.cfi_offset r14, -48
.cfi_offset r15, -56
leaq 32(%rsp),%rbp
andq $-32,%rbp
movq %rdx,8+32(%rbp)
movq %r8,0+32(%rbp)
movq %rdx,%rbx
movl OPENSSL_ia32cap_P+8(%rip),%eax
andl $288,%eax
xorl $288,%eax
jz chacha20_poly1305_open_avx2
1:
cmpq $128,%rbx
jbe open_sse_128
movdqa .chacha20_consts(%rip),%xmm0
movdqu 0(%r9),%xmm4
movdqu 16(%r9),%xmm8
movdqu 32(%r9),%xmm12
movdqa %xmm12,%xmm7
movdqa %xmm4,48(%rbp)
movdqa %xmm8,64(%rbp)
movdqa %xmm12,96(%rbp)
movq $10,%r10
1:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %r10
jne 1b
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
pand .clamp(%rip),%xmm0
movdqa %xmm0,0(%rbp)
movdqa %xmm4,16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
open_sse_main_loop:
cmpq $256,%rbx
jb 2f
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 96(%rbp),%xmm15
paddd .sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
movdqa %xmm15,144(%rbp)
movq $4,%rcx
movq %rsi,%r8
1:
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
leaq 16(%r8),%r8
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %rcx
jge 1b
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
cmpq $-6,%rcx
jg 1b
paddd .chacha20_consts(%rip),%xmm3
paddd 48(%rbp),%xmm7
paddd 64(%rbp),%xmm11
paddd 144(%rbp),%xmm15
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqa %xmm12,80(%rbp)
movdqu 0 + 0(%rsi),%xmm12
pxor %xmm3,%xmm12
movdqu %xmm12,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm12
pxor %xmm7,%xmm12
movdqu %xmm12,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm12
pxor %xmm11,%xmm12
movdqu %xmm12,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm12
pxor %xmm15,%xmm12
movdqu %xmm12,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movdqu 0 + 192(%rsi),%xmm3
movdqu 16 + 192(%rsi),%xmm7
movdqu 32 + 192(%rsi),%xmm11
movdqu 48 + 192(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor 80(%rbp),%xmm15
movdqu %xmm0,0 + 192(%rdi)
movdqu %xmm4,16 + 192(%rdi)
movdqu %xmm8,32 + 192(%rdi)
movdqu %xmm15,48 + 192(%rdi)
leaq 256(%rsi),%rsi
leaq 256(%rdi),%rdi
subq $256,%rbx
jmp open_sse_main_loop
2:
testq %rbx,%rbx
jz open_sse_finalize
cmpq $64,%rbx
ja 3f
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa 96(%rbp),%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
xorq %r8,%r8
movq %rbx,%rcx
cmpq $16,%rcx
jb 2f
1:
addq 0(%rsi,%r8), %r10
adcq 8+0(%rsi,%r8), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
subq $16,%rcx
2:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
cmpq $16,%rcx
jae 1b
cmpq $160,%r8
jne 2b
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
jmp open_sse_tail_64_dec_loop
3:
cmpq $128,%rbx
ja 3f
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa 96(%rbp),%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movq %rbx,%rcx
andq $-16,%rcx
xorq %r8,%r8
1:
addq 0(%rsi,%r8), %r10
adcq 8+0(%rsi,%r8), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
2:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
cmpq %rcx,%r8
jb 1b
cmpq $160,%r8
jne 2b
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 0(%rdi)
movdqu %xmm5,16 + 0(%rdi)
movdqu %xmm9,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
subq $64,%rbx
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jmp open_sse_tail_64_dec_loop
3:
cmpq $192,%rbx
ja 3f
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa 96(%rbp),%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
movq %rbx,%rcx
movq $160,%r8
cmpq $160,%rcx
cmovgq %r8,%rcx
andq $-16,%rcx
xorq %r8,%r8
1:
addq 0(%rsi,%r8), %r10
adcq 8+0(%rsi,%r8), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
2:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
cmpq %rcx,%r8
jb 1b
cmpq $160,%r8
jne 2b
cmpq $176,%rbx
jb 1f
addq 160(%rsi),%r10
adcq 8+160(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
cmpq $192,%rbx
jb 1f
addq 176(%rsi),%r10
adcq 8+176(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
1:
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 0(%rdi)
movdqu %xmm6,16 + 0(%rdi)
movdqu %xmm10,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 64(%rdi)
movdqu %xmm5,16 + 64(%rdi)
movdqu %xmm9,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
subq $128,%rbx
leaq 128(%rsi),%rsi
leaq 128(%rdi),%rdi
jmp open_sse_tail_64_dec_loop
3:
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 96(%rbp),%xmm15
paddd .sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
movdqa %xmm15,144(%rbp)
xorq %r8,%r8
1:
addq 0(%rsi,%r8), %r10
adcq 8+0(%rsi,%r8), %r11
adcq $1,%r12
movdqa %xmm11,80(%rbp)
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $12,%xmm11
psrld $20,%xmm4
pxor %xmm11,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $7,%xmm11
psrld $25,%xmm4
pxor %xmm11,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $12,%xmm11
psrld $20,%xmm5
pxor %xmm11,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $7,%xmm11
psrld $25,%xmm5
pxor %xmm11,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $12,%xmm11
psrld $20,%xmm6
pxor %xmm11,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $7,%xmm11
psrld $25,%xmm6
pxor %xmm11,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
movdqa 80(%rbp),%xmm11
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa %xmm9,80(%rbp)
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb .rol16(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $12,%xmm9
psrld $20,%xmm7
pxor %xmm9,%xmm7
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb .rol8(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $7,%xmm9
psrld $25,%xmm7
pxor %xmm9,%xmm7
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
movdqa 80(%rbp),%xmm9
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
movdqa %xmm11,80(%rbp)
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $12,%xmm11
psrld $20,%xmm4
pxor %xmm11,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $7,%xmm11
psrld $25,%xmm4
pxor %xmm11,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $12,%xmm11
psrld $20,%xmm5
pxor %xmm11,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $7,%xmm11
psrld $25,%xmm5
pxor %xmm11,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $12,%xmm11
psrld $20,%xmm6
pxor %xmm11,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $7,%xmm11
psrld $25,%xmm6
pxor %xmm11,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
movdqa 80(%rbp),%xmm11
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movdqa %xmm9,80(%rbp)
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb .rol16(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $12,%xmm9
psrld $20,%xmm7
pxor %xmm9,%xmm7
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb .rol8(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $7,%xmm9
psrld $25,%xmm7
pxor %xmm9,%xmm7
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
movdqa 80(%rbp),%xmm9
addq $16,%r8
cmpq $160,%r8
jb 1b
movq %rbx,%rcx
andq $-16,%rcx
1:
addq 0(%rsi,%r8), %r10
adcq 8+0(%rsi,%r8), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq $16,%r8
cmpq %rcx,%r8
jb 1b
paddd .chacha20_consts(%rip),%xmm3
paddd 48(%rbp),%xmm7
paddd 64(%rbp),%xmm11
paddd 144(%rbp),%xmm15
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqa %xmm12,80(%rbp)
movdqu 0 + 0(%rsi),%xmm12
pxor %xmm3,%xmm12
movdqu %xmm12,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm12
pxor %xmm7,%xmm12
movdqu %xmm12,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm12
pxor %xmm11,%xmm12
movdqu %xmm12,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm12
pxor %xmm15,%xmm12
movdqu %xmm12,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movdqa 80(%rbp),%xmm12
subq $192,%rbx
leaq 192(%rsi),%rsi
leaq 192(%rdi),%rdi
open_sse_tail_64_dec_loop:
cmpq $16,%rbx
jb 1f
subq $16,%rbx
movdqu (%rsi),%xmm3
pxor %xmm3,%xmm0
movdqu %xmm0,(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
movdqa %xmm4,%xmm0
movdqa %xmm8,%xmm4
movdqa %xmm12,%xmm8
jmp open_sse_tail_64_dec_loop
1:
movdqa %xmm0,%xmm1
open_sse_tail_16:
testq %rbx,%rbx
jz open_sse_finalize
pxor %xmm3,%xmm3
leaq -1(%rsi,%rbx), %rsi
movq %rbx,%r8
2:
pslldq $1,%xmm3
pinsrb $0,(%rsi),%xmm3
subq $1,%rsi
subq $1,%r8
jnz 2b
3:
.byte 102,73,15,126,221
pextrq $1,%xmm3,%r14
pxor %xmm1,%xmm3
2:
pextrb $0,%xmm3,(%rdi)
psrldq $1,%xmm3
addq $1,%rdi
subq $1,%rbx
jne 2b
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
open_sse_finalize:
addq 32(%rbp),%r10
adcq 8+32(%rbp),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movq %r10,%r13
movq %r11,%r14
movq %r12,%r15
subq $-5,%r10
sbbq $-1,%r11
sbbq $3,%r12
cmovcq %r13,%r10
cmovcq %r14,%r11
cmovcq %r15,%r12
addq 0+16(%rbp),%r10
adcq 8+16(%rbp),%r11
addq $288 + 32,%rsp
.cfi_adjust_cfa_offset -(288 + 32)
popq %r9
.cfi_adjust_cfa_offset -8
movq %r10,(%r9)
movq %r11,8(%r9)
popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
popq %rbx
.cfi_adjust_cfa_offset -8
popq %rbp
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
open_sse_128:
movdqu .chacha20_consts(%rip),%xmm0
movdqa %xmm0,%xmm1
movdqa %xmm0,%xmm2
movdqu 0(%r9),%xmm4
movdqa %xmm4,%xmm5
movdqa %xmm4,%xmm6
movdqu 16(%r9),%xmm8
movdqa %xmm8,%xmm9
movdqa %xmm8,%xmm10
movdqu 32(%r9),%xmm12
movdqa %xmm12,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa %xmm13,%xmm15
movq $10,%r10
1:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
decq %r10
jnz 1b
paddd .chacha20_consts(%rip),%xmm0
paddd .chacha20_consts(%rip),%xmm1
paddd .chacha20_consts(%rip),%xmm2
paddd %xmm7,%xmm4
paddd %xmm7,%xmm5
paddd %xmm7,%xmm6
paddd %xmm11,%xmm9
paddd %xmm11,%xmm10
paddd %xmm15,%xmm13
paddd .sse_inc(%rip),%xmm15
paddd %xmm15,%xmm14
pand .clamp(%rip),%xmm0
movdqa %xmm0,0(%rbp)
movdqa %xmm4,16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
1:
cmpq $16,%rbx
jb open_sse_tail_16
subq $16,%rbx
addq 0(%rsi),%r10
adcq 8+0(%rsi),%r11
adcq $1,%r12
movdqu 0(%rsi),%xmm3
pxor %xmm3,%xmm1
movdqu %xmm1,0(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movdqa %xmm5,%xmm1
movdqa %xmm9,%xmm5
movdqa %xmm13,%xmm9
movdqa %xmm2,%xmm13
movdqa %xmm6,%xmm2
movdqa %xmm10,%xmm6
movdqa %xmm14,%xmm10
jmp 1b
jmp open_sse_tail_16
.size chacha20_poly1305_open, .-chacha20_poly1305_open
.cfi_endproc
.globl chacha20_poly1305_seal
.hidden chacha20_poly1305_seal
.type chacha20_poly1305_seal,@function
.align 64
chacha20_poly1305_seal:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
pushq %rbx
.cfi_adjust_cfa_offset 8
pushq %r12
.cfi_adjust_cfa_offset 8
pushq %r13
.cfi_adjust_cfa_offset 8
pushq %r14
.cfi_adjust_cfa_offset 8
pushq %r15
.cfi_adjust_cfa_offset 8
pushq %r9
.cfi_adjust_cfa_offset 8
subq $288 + 32,%rsp
.cfi_adjust_cfa_offset 288 + 32
.cfi_offset rbp, -16
.cfi_offset rbx, -24
.cfi_offset r12, -32
.cfi_offset r13, -40
.cfi_offset r14, -48
.cfi_offset r15, -56
leaq 32(%rsp),%rbp
andq $-32,%rbp
movq 56(%r9),%rbx
addq %rdx,%rbx
movq %rbx,8+32(%rbp)
movq %r8,0+32(%rbp)
movq %rdx,%rbx
movl OPENSSL_ia32cap_P+8(%rip),%eax
andl $288,%eax
xorl $288,%eax
jz chacha20_poly1305_seal_avx2
cmpq $128,%rbx
jbe seal_sse_128
movdqa .chacha20_consts(%rip),%xmm0
movdqu 0(%r9),%xmm4
movdqu 16(%r9),%xmm8
movdqu 32(%r9),%xmm12
movdqa %xmm0,%xmm1
movdqa %xmm0,%xmm2
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm5
movdqa %xmm4,%xmm6
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm9
movdqa %xmm8,%xmm10
movdqa %xmm8,%xmm11
movdqa %xmm12,%xmm15
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,%xmm14
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,%xmm13
paddd .sse_inc(%rip),%xmm12
movdqa %xmm4,48(%rbp)
movdqa %xmm8,64(%rbp)
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
movdqa %xmm15,144(%rbp)
movq $10,%r10
1:
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %r10
jnz 1b
paddd .chacha20_consts(%rip),%xmm3
paddd 48(%rbp),%xmm7
paddd 64(%rbp),%xmm11
paddd 144(%rbp),%xmm15
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
pand .clamp(%rip),%xmm3
movdqa %xmm3,0(%rbp)
movdqa %xmm7,16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 0(%rdi)
movdqu %xmm6,16 + 0(%rdi)
movdqu %xmm10,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 64(%rdi)
movdqu %xmm5,16 + 64(%rdi)
movdqu %xmm9,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
cmpq $192,%rbx
ja 1f
movq $128,%rcx
subq $128,%rbx
leaq 128(%rsi),%rsi
jmp seal_sse_128_seal_hash
1:
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor %xmm12,%xmm15
movdqu %xmm0,0 + 128(%rdi)
movdqu %xmm4,16 + 128(%rdi)
movdqu %xmm8,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movq $192,%rcx
subq $192,%rbx
leaq 192(%rsi),%rsi
movq $2,%rcx
movq $8,%r8
cmpq $64,%rbx
jbe seal_sse_tail_64
cmpq $128,%rbx
jbe seal_sse_tail_128
cmpq $192,%rbx
jbe seal_sse_tail_192
1:
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 96(%rbp),%xmm15
paddd .sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
movdqa %xmm15,144(%rbp)
2:
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,80(%rbp)
movdqa .rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa .rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
leaq 16(%rdi),%rdi
decq %r8
jge 2b
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
decq %rcx
jg 2b
paddd .chacha20_consts(%rip),%xmm3
paddd 48(%rbp),%xmm7
paddd 64(%rbp),%xmm11
paddd 144(%rbp),%xmm15
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqa %xmm14,80(%rbp)
movdqa %xmm14,80(%rbp)
movdqu 0 + 0(%rsi),%xmm14
pxor %xmm3,%xmm14
movdqu %xmm14,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm14
pxor %xmm7,%xmm14
movdqu %xmm14,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm14
pxor %xmm11,%xmm14
movdqu %xmm14,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm14
pxor %xmm15,%xmm14
movdqu %xmm14,48 + 0(%rdi)
movdqa 80(%rbp),%xmm14
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
cmpq $256,%rbx
ja 3f
movq $192,%rcx
subq $192,%rbx
leaq 192(%rsi),%rsi
jmp seal_sse_128_seal_hash
3:
movdqu 0 + 192(%rsi),%xmm3
movdqu 16 + 192(%rsi),%xmm7
movdqu 32 + 192(%rsi),%xmm11
movdqu 48 + 192(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor %xmm12,%xmm15
movdqu %xmm0,0 + 192(%rdi)
movdqu %xmm4,16 + 192(%rdi)
movdqu %xmm8,32 + 192(%rdi)
movdqu %xmm15,48 + 192(%rdi)
leaq 256(%rsi),%rsi
subq $256,%rbx
movq $6,%rcx
movq $4,%r8
cmpq $192,%rbx
jg 1b
movq %rbx,%rcx
testq %rbx,%rbx
je seal_sse_128_seal_hash
movq $6,%rcx
cmpq $64,%rbx
jg 3f
seal_sse_tail_64:
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa 96(%rbp),%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
jmp seal_sse_128_seal
3:
cmpq $128,%rbx
jg 3f
seal_sse_tail_128:
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa 96(%rbp),%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
leaq 16(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 0(%rdi)
movdqu %xmm5,16 + 0(%rdi)
movdqu %xmm9,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movq $64,%rcx
subq $64,%rbx
leaq 64(%rsi),%rsi
jmp seal_sse_128_seal_hash
3:
seal_sse_tail_192:
movdqa .chacha20_consts(%rip),%xmm0
movdqa 48(%rbp),%xmm4
movdqa 64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa 96(%rbp),%xmm14
paddd .sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,96(%rbp)
movdqa %xmm13,112(%rbp)
movdqa %xmm14,128(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
leaq 16(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
paddd .chacha20_consts(%rip),%xmm2
paddd 48(%rbp),%xmm6
paddd 64(%rbp),%xmm10
paddd 128(%rbp),%xmm14
paddd .chacha20_consts(%rip),%xmm1
paddd 48(%rbp),%xmm5
paddd 64(%rbp),%xmm9
paddd 112(%rbp),%xmm13
paddd .chacha20_consts(%rip),%xmm0
paddd 48(%rbp),%xmm4
paddd 64(%rbp),%xmm8
paddd 96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 0(%rdi)
movdqu %xmm6,16 + 0(%rdi)
movdqu %xmm10,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 64(%rdi)
movdqu %xmm5,16 + 64(%rdi)
movdqu %xmm9,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movq $128,%rcx
subq $128,%rbx
leaq 128(%rsi),%rsi
seal_sse_128_seal_hash:
cmpq $16,%rcx
jb seal_sse_128_seal
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
subq $16,%rcx
leaq 16(%rdi),%rdi
jmp seal_sse_128_seal_hash
seal_sse_128_seal:
cmpq $16,%rbx
jb seal_sse_tail_16
subq $16,%rbx
movdqu 0(%rsi),%xmm3
pxor %xmm3,%xmm0
movdqu %xmm0,0(%rdi)
addq 0(%rdi),%r10
adcq 8(%rdi),%r11
adcq $1,%r12
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movdqa %xmm4,%xmm0
movdqa %xmm8,%xmm4
movdqa %xmm12,%xmm8
movdqa %xmm1,%xmm12
movdqa %xmm5,%xmm1
movdqa %xmm9,%xmm5
movdqa %xmm13,%xmm9
jmp seal_sse_128_seal
seal_sse_tail_16:
testq %rbx,%rbx
jz process_blocks_of_extra_in
movq %rbx,%r8
movq %rbx,%rcx
leaq -1(%rsi,%rbx), %rsi
pxor %xmm15,%xmm15
1:
pslldq $1,%xmm15
pinsrb $0,(%rsi),%xmm15
leaq -1(%rsi),%rsi
decq %rcx
jne 1b
pxor %xmm0,%xmm15
movq %rbx,%rcx
movdqu %xmm15,%xmm0
2:
pextrb $0,%xmm0,(%rdi)
psrldq $1,%xmm0
addq $1,%rdi
subq $1,%rcx
jnz 2b
movq 288+32(%rsp),%r9
movq 56(%r9),%r14
movq 48(%r9),%r13
testq %r14,%r14
jz process_partial_block
movq $16,%r15
subq %rbx,%r15
cmpq %r15,%r14
jge load_extra_in
movq %r14,%r15
load_extra_in:
leaq -1(%r13,%r15), %rsi
addq %r15,%r13
subq %r15,%r14
movq %r13,48(%r9)
movq %r14,56(%r9)
addq %r15,%r8
pxor %xmm11,%xmm11
3:
pslldq $1,%xmm11
pinsrb $0,(%rsi),%xmm11
leaq -1(%rsi),%rsi
subq $1,%r15
jnz 3b
movq %rbx,%r15
4:
pslldq $1,%xmm11
subq $1,%r15
jnz 4b
leaq .and_masks(%rip),%r15
shlq $4,%rbx
pand -16(%r15,%rbx), %xmm15
por %xmm11,%xmm15
.byte 102,77,15,126,253
pextrq $1,%xmm15,%r14
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
process_blocks_of_extra_in:
movq 288+32(%rsp),%r9
movq 48(%r9),%rsi
movq 56(%r9),%r8
movq %r8,%rcx
shrq $4,%r8
5:
jz process_extra_in_trailer
addq 0(%rsi),%r10
adcq 8+0(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rsi),%rsi
subq $1,%r8
jmp 5b
process_extra_in_trailer:
andq $15,%rcx
movq %rcx,%rbx
jz do_length_block
leaq -1(%rsi,%rcx), %rsi
6:
pslldq $1,%xmm15
pinsrb $0,(%rsi),%xmm15
leaq -1(%rsi),%rsi
subq $1,%rcx
jnz 6b
process_partial_block:
leaq .and_masks(%rip),%r15
shlq $4,%rbx
pand -16(%r15,%rbx), %xmm15
.byte 102,77,15,126,253
pextrq $1,%xmm15,%r14
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
do_length_block:
addq 32(%rbp),%r10
adcq 8+32(%rbp),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movq %r10,%r13
movq %r11,%r14
movq %r12,%r15
subq $-5,%r10
sbbq $-1,%r11
sbbq $3,%r12
cmovcq %r13,%r10
cmovcq %r14,%r11
cmovcq %r15,%r12
addq 0+16(%rbp),%r10
adcq 8+16(%rbp),%r11
addq $288 + 32,%rsp
.cfi_adjust_cfa_offset -(288 + 32)
popq %r9
.cfi_adjust_cfa_offset -8
movq %r10,0(%r9)
movq %r11,8(%r9)
popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
popq %rbx
.cfi_adjust_cfa_offset -8
popq %rbp
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
seal_sse_128:
movdqu .chacha20_consts(%rip),%xmm0
movdqa %xmm0,%xmm1
movdqa %xmm0,%xmm2
movdqu 0(%r9),%xmm4
movdqa %xmm4,%xmm5
movdqa %xmm4,%xmm6
movdqu 16(%r9),%xmm8
movdqa %xmm8,%xmm9
movdqa %xmm8,%xmm10
movdqu 32(%r9),%xmm14
movdqa %xmm14,%xmm12
paddd .sse_inc(%rip),%xmm12
movdqa %xmm12,%xmm13
paddd .sse_inc(%rip),%xmm13
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa %xmm12,%xmm15
movq $10,%r10
1:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb .rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb .rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb .rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
decq %r10
jnz 1b
paddd .chacha20_consts(%rip),%xmm0
paddd .chacha20_consts(%rip),%xmm1
paddd .chacha20_consts(%rip),%xmm2
paddd %xmm7,%xmm4
paddd %xmm7,%xmm5
paddd %xmm7,%xmm6
paddd %xmm11,%xmm8
paddd %xmm11,%xmm9
paddd %xmm15,%xmm12
paddd .sse_inc(%rip),%xmm15
paddd %xmm15,%xmm13
pand .clamp(%rip),%xmm2
movdqa %xmm2,0(%rbp)
movdqa %xmm6,16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
jmp seal_sse_128_seal
.size chacha20_poly1305_seal, .-chacha20_poly1305_seal
.type chacha20_poly1305_open_avx2,@function
.align 64
chacha20_poly1305_open_avx2:
vzeroupper
vmovdqa .chacha20_consts(%rip),%ymm0
vbroadcasti128 0(%r9),%ymm4
vbroadcasti128 16(%r9),%ymm8
vbroadcasti128 32(%r9),%ymm12
vpaddd .avx2_init(%rip),%ymm12,%ymm12
cmpq $192,%rbx
jbe open_avx2_192
cmpq $320,%rbx
jbe open_avx2_320
vmovdqa %ymm4,64(%rbp)
vmovdqa %ymm8,96(%rbp)
vmovdqa %ymm12,160(%rbp)
movq $10,%r10
1:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
decq %r10
jne 1b
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vpand .clamp(%rip),%ymm3,%ymm3
vmovdqa %ymm3,0(%rbp)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
movq %r8,%r8
call poly_hash_ad_internal
xorq %rcx,%rcx
1:
addq 0(%rsi,%rcx), %r10
adcq 8+0(%rsi,%rcx), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq $16,%rcx
cmpq $64,%rcx
jne 1b
vpxor 0(%rsi),%ymm0,%ymm0
vpxor 32(%rsi),%ymm4,%ymm4
vmovdqu %ymm0,0(%rdi)
vmovdqu %ymm4,32(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
subq $64,%rbx
1:
cmpq $512,%rbx
jb 3f
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm15
vpaddd %ymm15,%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm15,256(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm12,160(%rbp)
xorq %rcx,%rcx
2:
addq 0*8(%rsi,%rcx), %r10
adcq 8+0*8(%rsi,%rcx), %r11
adcq $1,%r12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
addq %rax,%r15
adcq %rdx,%r9
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
addq 2*8(%rsi,%rcx), %r10
adcq 8+2*8(%rsi,%rcx), %r11
adcq $1,%r12
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
addq %rax,%r15
adcq %rdx,%r9
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
addq 4*8(%rsi,%rcx), %r10
adcq 8+4*8(%rsi,%rcx), %r11
adcq $1,%r12
leaq 48(%rcx),%rcx
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
addq %rax,%r15
adcq %rdx,%r9
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
cmpq $60*8,%rcx
jne 2b
vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
vpaddd 64(%rbp),%ymm7,%ymm7
vpaddd 96(%rbp),%ymm11,%ymm11
vpaddd 256(%rbp),%ymm15,%ymm15
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vmovdqa %ymm0,128(%rbp)
addq 60*8(%rsi),%r10
adcq 8+60*8(%rsi),%r11
adcq $1,%r12
vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
vpxor 0+0(%rsi),%ymm0,%ymm0
vpxor 32+0(%rsi),%ymm3,%ymm3
vpxor 64+0(%rsi),%ymm7,%ymm7
vpxor 96+0(%rsi),%ymm11,%ymm11
vmovdqu %ymm0,0+0(%rdi)
vmovdqu %ymm3,32+0(%rdi)
vmovdqu %ymm7,64+0(%rdi)
vmovdqu %ymm11,96+0(%rdi)
vmovdqa 128(%rbp),%ymm0
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm2,%ymm2
vpxor 64+128(%rsi),%ymm6,%ymm6
vpxor 96+128(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm2,32+128(%rdi)
vmovdqu %ymm6,64+128(%rdi)
vmovdqu %ymm10,96+128(%rdi)
addq 60*8+16(%rsi),%r10
adcq 8+60*8+16(%rsi),%r11
adcq $1,%r12
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+256(%rsi),%ymm3,%ymm3
vpxor 32+256(%rsi),%ymm1,%ymm1
vpxor 64+256(%rsi),%ymm5,%ymm5
vpxor 96+256(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+256(%rdi)
vmovdqu %ymm1,32+256(%rdi)
vmovdqu %ymm5,64+256(%rdi)
vmovdqu %ymm9,96+256(%rdi)
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
vpxor 0+384(%rsi),%ymm3,%ymm3
vpxor 32+384(%rsi),%ymm0,%ymm0
vpxor 64+384(%rsi),%ymm4,%ymm4
vpxor 96+384(%rsi),%ymm8,%ymm8
vmovdqu %ymm3,0+384(%rdi)
vmovdqu %ymm0,32+384(%rdi)
vmovdqu %ymm4,64+384(%rdi)
vmovdqu %ymm8,96+384(%rdi)
leaq 512(%rsi),%rsi
leaq 512(%rdi),%rdi
subq $512,%rbx
jmp 1b
3:
testq %rbx,%rbx
vzeroupper
je open_sse_finalize
3:
cmpq $128,%rbx
ja 3f
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
xorq %r8,%r8
movq %rbx,%rcx
andq $-16,%rcx
testq %rcx,%rcx
je 2f
1:
addq 0*8(%rsi,%r8), %r10
adcq 8+0*8(%rsi,%r8), %r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
2:
addq $16,%r8
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
cmpq %rcx,%r8
jb 1b
cmpq $160,%r8
jne 2b
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
jmp open_avx2_tail_loop
3:
cmpq $256,%rbx
ja 3f
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
movq %rbx,128(%rbp)
movq %rbx,%rcx
subq $128,%rcx
shrq $4,%rcx
movq $10,%r8
cmpq $10,%rcx
cmovgq %r8,%rcx
movq %rsi,%rbx
xorq %r8,%r8
1:
addq 0(%rbx),%r10
adcq 8+0(%rbx),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rbx),%rbx
2:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
incq %r8
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm6,%ymm6,%ymm6
cmpq %rcx,%r8
jb 1b
cmpq $10,%r8
jne 2b
movq %rbx,%r8
subq %rsi,%rbx
movq %rbx,%rcx
movq 128(%rbp),%rbx
1:
addq $16,%rcx
cmpq %rbx,%rcx
jg 1f
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
jmp 1b
1:
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+0(%rsi),%ymm3,%ymm3
vpxor 32+0(%rsi),%ymm1,%ymm1
vpxor 64+0(%rsi),%ymm5,%ymm5
vpxor 96+0(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+0(%rdi)
vmovdqu %ymm1,32+0(%rdi)
vmovdqu %ymm5,64+0(%rdi)
vmovdqu %ymm9,96+0(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
leaq 128(%rsi),%rsi
leaq 128(%rdi),%rdi
subq $128,%rbx
jmp open_avx2_tail_loop
3:
cmpq $384,%rbx
ja 3f
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm14,224(%rbp)
movq %rbx,128(%rbp)
movq %rbx,%rcx
subq $256,%rcx
shrq $4,%rcx
addq $6,%rcx
movq $10,%r8
cmpq $10,%rcx
cmovgq %r8,%rcx
movq %rsi,%rbx
xorq %r8,%r8
1:
addq 0(%rbx),%r10
adcq 8+0(%rbx),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rbx),%rbx
2:
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm6,%ymm6,%ymm6
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
addq 0(%rbx),%r10
adcq 8+0(%rbx),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rbx),%rbx
incq %r8
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm6,%ymm6,%ymm6
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
cmpq %rcx,%r8
jb 1b
cmpq $10,%r8
jne 2b
movq %rbx,%r8
subq %rsi,%rbx
movq %rbx,%rcx
movq 128(%rbp),%rbx
1:
addq $16,%rcx
cmpq %rbx,%rcx
jg 1f
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
jmp 1b
1:
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+0(%rsi),%ymm3,%ymm3
vpxor 32+0(%rsi),%ymm2,%ymm2
vpxor 64+0(%rsi),%ymm6,%ymm6
vpxor 96+0(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+0(%rdi)
vmovdqu %ymm2,32+0(%rdi)
vmovdqu %ymm6,64+0(%rdi)
vmovdqu %ymm10,96+0(%rdi)
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm1,%ymm1
vpxor 64+128(%rsi),%ymm5,%ymm5
vpxor 96+128(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm1,32+128(%rdi)
vmovdqu %ymm5,64+128(%rdi)
vmovdqu %ymm9,96+128(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
leaq 256(%rsi),%rsi
leaq 256(%rdi),%rdi
subq $256,%rbx
jmp open_avx2_tail_loop
3:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm15
vpaddd %ymm15,%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm15,256(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm12,160(%rbp)
xorq %rcx,%rcx
movq %rsi,%r8
1:
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
2:
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
addq 16(%r8),%r10
adcq 8+16(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%r8),%r8
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
incq %rcx
cmpq $4,%rcx
jl 1b
cmpq $10,%rcx
jne 2b
movq %rbx,%rcx
subq $384,%rcx
andq $-16,%rcx
1:
testq %rcx,%rcx
je 1f
addq 0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
subq $16,%rcx
jmp 1b
1:
vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
vpaddd 64(%rbp),%ymm7,%ymm7
vpaddd 96(%rbp),%ymm11,%ymm11
vpaddd 256(%rbp),%ymm15,%ymm15
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vmovdqa %ymm0,128(%rbp)
vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
vpxor 0+0(%rsi),%ymm0,%ymm0
vpxor 32+0(%rsi),%ymm3,%ymm3
vpxor 64+0(%rsi),%ymm7,%ymm7
vpxor 96+0(%rsi),%ymm11,%ymm11
vmovdqu %ymm0,0+0(%rdi)
vmovdqu %ymm3,32+0(%rdi)
vmovdqu %ymm7,64+0(%rdi)
vmovdqu %ymm11,96+0(%rdi)
vmovdqa 128(%rbp),%ymm0
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm2,%ymm2
vpxor 64+128(%rsi),%ymm6,%ymm6
vpxor 96+128(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm2,32+128(%rdi)
vmovdqu %ymm6,64+128(%rdi)
vmovdqu %ymm10,96+128(%rdi)
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+256(%rsi),%ymm3,%ymm3
vpxor 32+256(%rsi),%ymm1,%ymm1
vpxor 64+256(%rsi),%ymm5,%ymm5
vpxor 96+256(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+256(%rdi)
vmovdqu %ymm1,32+256(%rdi)
vmovdqu %ymm5,64+256(%rdi)
vmovdqu %ymm9,96+256(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
leaq 384(%rsi),%rsi
leaq 384(%rdi),%rdi
subq $384,%rbx
open_avx2_tail_loop:
cmpq $32,%rbx
jb open_avx2_tail
subq $32,%rbx
vpxor (%rsi),%ymm0,%ymm0
vmovdqu %ymm0,(%rdi)
leaq 32(%rsi),%rsi
leaq 32(%rdi),%rdi
vmovdqa %ymm4,%ymm0
vmovdqa %ymm8,%ymm4
vmovdqa %ymm12,%ymm8
jmp open_avx2_tail_loop
open_avx2_tail:
cmpq $16,%rbx
vmovdqa %xmm0,%xmm1
jb 1f
subq $16,%rbx
vpxor (%rsi),%xmm0,%xmm1
vmovdqu %xmm1,(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
vperm2i128 $0x11,%ymm0,%ymm0,%ymm0
vmovdqa %xmm0,%xmm1
1:
vzeroupper
jmp open_sse_tail_16
open_avx2_192:
vmovdqa %ymm0,%ymm1
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm5
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm9
vmovdqa %ymm8,%ymm10
vpaddd .avx2_inc(%rip),%ymm12,%ymm13
vmovdqa %ymm12,%ymm11
vmovdqa %ymm13,%ymm15
movq $10,%r10
1:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
decq %r10
jne 1b
vpaddd %ymm2,%ymm0,%ymm0
vpaddd %ymm2,%ymm1,%ymm1
vpaddd %ymm6,%ymm4,%ymm4
vpaddd %ymm6,%ymm5,%ymm5
vpaddd %ymm10,%ymm8,%ymm8
vpaddd %ymm10,%ymm9,%ymm9
vpaddd %ymm11,%ymm12,%ymm12
vpaddd %ymm15,%ymm13,%ymm13
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vpand .clamp(%rip),%ymm3,%ymm3
vmovdqa %ymm3,0(%rbp)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
open_avx2_short:
movq %r8,%r8
call poly_hash_ad_internal
open_avx2_hash_and_xor_loop:
cmpq $32,%rbx
jb open_avx2_short_tail_32
subq $32,%rbx
addq 0(%rsi),%r10
adcq 8+0(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq 16(%rsi),%r10
adcq 8+16(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpxor (%rsi),%ymm0,%ymm0
vmovdqu %ymm0,(%rdi)
leaq 32(%rsi),%rsi
leaq 32(%rdi),%rdi
vmovdqa %ymm4,%ymm0
vmovdqa %ymm8,%ymm4
vmovdqa %ymm12,%ymm8
vmovdqa %ymm1,%ymm12
vmovdqa %ymm5,%ymm1
vmovdqa %ymm9,%ymm5
vmovdqa %ymm13,%ymm9
vmovdqa %ymm2,%ymm13
vmovdqa %ymm6,%ymm2
jmp open_avx2_hash_and_xor_loop
open_avx2_short_tail_32:
cmpq $16,%rbx
vmovdqa %xmm0,%xmm1
jb 1f
subq $16,%rbx
addq 0(%rsi),%r10
adcq 8+0(%rsi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpxor (%rsi),%xmm0,%xmm3
vmovdqu %xmm3,(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
vextracti128 $1,%ymm0,%xmm1
1:
vzeroupper
jmp open_sse_tail_16
open_avx2_320:
vmovdqa %ymm0,%ymm1
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm5
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm9
vmovdqa %ymm8,%ymm10
vpaddd .avx2_inc(%rip),%ymm12,%ymm13
vpaddd .avx2_inc(%rip),%ymm13,%ymm14
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm14,224(%rbp)
movq $10,%r10
1:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm6,%ymm6,%ymm6
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm6,%ymm6,%ymm6
decq %r10
jne 1b
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd %ymm7,%ymm4,%ymm4
vpaddd %ymm7,%ymm5,%ymm5
vpaddd %ymm7,%ymm6,%ymm6
vpaddd %ymm11,%ymm8,%ymm8
vpaddd %ymm11,%ymm9,%ymm9
vpaddd %ymm11,%ymm10,%ymm10
vpaddd 160(%rbp),%ymm12,%ymm12
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd 224(%rbp),%ymm14,%ymm14
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vpand .clamp(%rip),%ymm3,%ymm3
vmovdqa %ymm3,0(%rbp)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
jmp open_avx2_short
.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
.type chacha20_poly1305_seal_avx2,@function
.align 64
chacha20_poly1305_seal_avx2:
vzeroupper
vmovdqa .chacha20_consts(%rip),%ymm0
vbroadcasti128 0(%r9),%ymm4
vbroadcasti128 16(%r9),%ymm8
vbroadcasti128 32(%r9),%ymm12
vpaddd .avx2_init(%rip),%ymm12,%ymm12
cmpq $192,%rbx
jbe seal_avx2_192
cmpq $320,%rbx
jbe seal_avx2_320
vmovdqa %ymm0,%ymm1
vmovdqa %ymm0,%ymm2
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm5
vmovdqa %ymm4,%ymm6
vmovdqa %ymm4,%ymm7
vmovdqa %ymm4,64(%rbp)
vmovdqa %ymm8,%ymm9
vmovdqa %ymm8,%ymm10
vmovdqa %ymm8,%ymm11
vmovdqa %ymm8,96(%rbp)
vmovdqa %ymm12,%ymm15
vpaddd .avx2_inc(%rip),%ymm15,%ymm14
vpaddd .avx2_inc(%rip),%ymm14,%ymm13
vpaddd .avx2_inc(%rip),%ymm13,%ymm12
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm15,256(%rbp)
movq $10,%r10
1:
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
decq %r10
jnz 1b
vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
vpaddd 64(%rbp),%ymm7,%ymm7
vpaddd 96(%rbp),%ymm11,%ymm11
vpaddd 256(%rbp),%ymm15,%ymm15
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
vperm2i128 $0x02,%ymm3,%ymm7,%ymm15
vperm2i128 $0x13,%ymm3,%ymm7,%ymm3
vpand .clamp(%rip),%ymm15,%ymm15
vmovdqa %ymm15,0(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
vpxor 0(%rsi),%ymm3,%ymm3
vpxor 32(%rsi),%ymm11,%ymm11
vmovdqu %ymm3,0(%rdi)
vmovdqu %ymm11,32(%rdi)
vperm2i128 $0x02,%ymm2,%ymm6,%ymm15
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+64(%rsi),%ymm15,%ymm15
vpxor 32+64(%rsi),%ymm2,%ymm2
vpxor 64+64(%rsi),%ymm6,%ymm6
vpxor 96+64(%rsi),%ymm10,%ymm10
vmovdqu %ymm15,0+64(%rdi)
vmovdqu %ymm2,32+64(%rdi)
vmovdqu %ymm6,64+64(%rdi)
vmovdqu %ymm10,96+64(%rdi)
vperm2i128 $0x02,%ymm1,%ymm5,%ymm15
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+192(%rsi),%ymm15,%ymm15
vpxor 32+192(%rsi),%ymm1,%ymm1
vpxor 64+192(%rsi),%ymm5,%ymm5
vpxor 96+192(%rsi),%ymm9,%ymm9
vmovdqu %ymm15,0+192(%rdi)
vmovdqu %ymm1,32+192(%rdi)
vmovdqu %ymm5,64+192(%rdi)
vmovdqu %ymm9,96+192(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm15
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm15,%ymm8
leaq 320(%rsi),%rsi
subq $320,%rbx
movq $320,%rcx
cmpq $128,%rbx
jbe seal_avx2_hash
vpxor 0(%rsi),%ymm0,%ymm0
vpxor 32(%rsi),%ymm4,%ymm4
vpxor 64(%rsi),%ymm8,%ymm8
vpxor 96(%rsi),%ymm12,%ymm12
vmovdqu %ymm0,320(%rdi)
vmovdqu %ymm4,352(%rdi)
vmovdqu %ymm8,384(%rdi)
vmovdqu %ymm12,416(%rdi)
leaq 128(%rsi),%rsi
subq $128,%rbx
movq $8,%rcx
movq $2,%r8
cmpq $128,%rbx
jbe seal_avx2_tail_128
cmpq $256,%rbx
jbe seal_avx2_tail_256
cmpq $384,%rbx
jbe seal_avx2_tail_384
cmpq $512,%rbx
jbe seal_avx2_tail_512
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm15
vpaddd %ymm15,%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm15,256(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
subq $16,%rdi
movq $9,%rcx
jmp 4f
1:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm15
vpaddd %ymm15,%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm15,256(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm12,160(%rbp)
movq $10,%rcx
2:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
addq %rax,%r15
adcq %rdx,%r9
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
4:
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
addq %rax,%r15
adcq %rdx,%r9
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
addq 32(%rdi),%r10
adcq 8+32(%rdi),%r11
adcq $1,%r12
leaq 48(%rdi),%rdi
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
addq %rax,%r15
adcq %rdx,%r9
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
decq %rcx
jne 2b
vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
vpaddd 64(%rbp),%ymm7,%ymm7
vpaddd 96(%rbp),%ymm11,%ymm11
vpaddd 256(%rbp),%ymm15,%ymm15
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
leaq 32(%rdi),%rdi
vmovdqa %ymm0,128(%rbp)
addq -32(%rdi),%r10
adcq 8+-32(%rdi),%r11
adcq $1,%r12
vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
vpxor 0+0(%rsi),%ymm0,%ymm0
vpxor 32+0(%rsi),%ymm3,%ymm3
vpxor 64+0(%rsi),%ymm7,%ymm7
vpxor 96+0(%rsi),%ymm11,%ymm11
vmovdqu %ymm0,0+0(%rdi)
vmovdqu %ymm3,32+0(%rdi)
vmovdqu %ymm7,64+0(%rdi)
vmovdqu %ymm11,96+0(%rdi)
vmovdqa 128(%rbp),%ymm0
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm2,%ymm2
vpxor 64+128(%rsi),%ymm6,%ymm6
vpxor 96+128(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm2,32+128(%rdi)
vmovdqu %ymm6,64+128(%rdi)
vmovdqu %ymm10,96+128(%rdi)
addq -16(%rdi),%r10
adcq 8+-16(%rdi),%r11
adcq $1,%r12
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+256(%rsi),%ymm3,%ymm3
vpxor 32+256(%rsi),%ymm1,%ymm1
vpxor 64+256(%rsi),%ymm5,%ymm5
vpxor 96+256(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+256(%rdi)
vmovdqu %ymm1,32+256(%rdi)
vmovdqu %ymm5,64+256(%rdi)
vmovdqu %ymm9,96+256(%rdi)
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
vpxor 0+384(%rsi),%ymm3,%ymm3
vpxor 32+384(%rsi),%ymm0,%ymm0
vpxor 64+384(%rsi),%ymm4,%ymm4
vpxor 96+384(%rsi),%ymm8,%ymm8
vmovdqu %ymm3,0+384(%rdi)
vmovdqu %ymm0,32+384(%rdi)
vmovdqu %ymm4,64+384(%rdi)
vmovdqu %ymm8,96+384(%rdi)
leaq 512(%rsi),%rsi
subq $512,%rbx
cmpq $512,%rbx
jg 1b
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%rdi),%rdi
movq $10,%rcx
xorq %r8,%r8
cmpq $128,%rbx
ja 3f
seal_avx2_tail_128:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
jmp seal_avx2_short_loop
3:
cmpq $256,%rbx
ja 3f
seal_avx2_tail_256:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+0(%rsi),%ymm3,%ymm3
vpxor 32+0(%rsi),%ymm1,%ymm1
vpxor 64+0(%rsi),%ymm5,%ymm5
vpxor 96+0(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+0(%rdi)
vmovdqu %ymm1,32+0(%rdi)
vmovdqu %ymm5,64+0(%rdi)
vmovdqu %ymm9,96+0(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
movq $128,%rcx
leaq 128(%rsi),%rsi
subq $128,%rbx
jmp seal_avx2_hash
3:
cmpq $384,%rbx
ja seal_avx2_tail_512
seal_avx2_tail_384:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm14,224(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm6,%ymm6,%ymm6
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm6,%ymm6,%ymm6
leaq 32(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+0(%rsi),%ymm3,%ymm3
vpxor 32+0(%rsi),%ymm2,%ymm2
vpxor 64+0(%rsi),%ymm6,%ymm6
vpxor 96+0(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+0(%rdi)
vmovdqu %ymm2,32+0(%rdi)
vmovdqu %ymm6,64+0(%rdi)
vmovdqu %ymm10,96+0(%rdi)
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm1,%ymm1
vpxor 64+128(%rsi),%ymm5,%ymm5
vpxor 96+128(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm1,32+128(%rdi)
vmovdqu %ymm5,64+128(%rdi)
vmovdqu %ymm9,96+128(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
movq $256,%rcx
leaq 256(%rsi),%rsi
subq $256,%rbx
jmp seal_avx2_hash
seal_avx2_tail_512:
vmovdqa .chacha20_consts(%rip),%ymm0
vmovdqa 64(%rbp),%ymm4
vmovdqa 96(%rbp),%ymm8
vmovdqa %ymm0,%ymm1
vmovdqa %ymm4,%ymm5
vmovdqa %ymm8,%ymm9
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm10
vmovdqa %ymm0,%ymm3
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa .avx2_inc(%rip),%ymm12
vpaddd 160(%rbp),%ymm12,%ymm15
vpaddd %ymm15,%ymm12,%ymm14
vpaddd %ymm14,%ymm12,%ymm13
vpaddd %ymm13,%ymm12,%ymm12
vmovdqa %ymm15,256(%rbp)
vmovdqa %ymm14,224(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm12,160(%rbp)
1:
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
2:
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $4,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $12,%ymm15,%ymm15,%ymm15
vpalignr $4,%ymm6,%ymm6,%ymm6
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $4,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
addq %rax,%r15
adcq %rdx,%r9
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $4,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm12,%ymm12,%ymm12
vmovdqa %ymm8,128(%rbp)
vmovdqa .rol16(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $20,%ymm7,%ymm8
vpslld $32-20,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $20,%ymm6,%ymm8
vpslld $32-20,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $20,%ymm5,%ymm8
vpslld $32-20,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $20,%ymm4,%ymm8
vpslld $32-20,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
vmovdqa .rol8(%rip),%ymm8
vpaddd %ymm7,%ymm3,%ymm3
vpaddd %ymm6,%ymm2,%ymm2
vpaddd %ymm5,%ymm1,%ymm1
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm3,%ymm15,%ymm15
vpxor %ymm2,%ymm14,%ymm14
vpxor %ymm1,%ymm13,%ymm13
vpxor %ymm0,%ymm12,%ymm12
vpshufb %ymm8,%ymm15,%ymm15
vpshufb %ymm8,%ymm14,%ymm14
vpshufb %ymm8,%ymm13,%ymm13
vpshufb %ymm8,%ymm12,%ymm12
vmovdqa 128(%rbp),%ymm8
vpaddd %ymm15,%ymm11,%ymm11
vpaddd %ymm14,%ymm10,%ymm10
vpaddd %ymm13,%ymm9,%ymm9
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm11,%ymm7,%ymm7
vpxor %ymm10,%ymm6,%ymm6
movq 0+0(%rbp),%rdx
movq %rdx,%r15
mulxq %r10,%r13,%r14
mulxq %r11,%rax,%rdx
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
vpxor %ymm9,%ymm5,%ymm5
vpxor %ymm8,%ymm4,%ymm4
vmovdqa %ymm8,128(%rbp)
vpsrld $25,%ymm7,%ymm8
vpslld $32-25,%ymm7,%ymm7
vpxor %ymm8,%ymm7,%ymm7
vpsrld $25,%ymm6,%ymm8
vpslld $32-25,%ymm6,%ymm6
vpxor %ymm8,%ymm6,%ymm6
vpsrld $25,%ymm5,%ymm8
vpslld $32-25,%ymm5,%ymm5
vpxor %ymm8,%ymm5,%ymm5
vpsrld $25,%ymm4,%ymm8
vpslld $32-25,%ymm4,%ymm4
vpxor %ymm8,%ymm4,%ymm4
vmovdqa 128(%rbp),%ymm8
vpalignr $12,%ymm7,%ymm7,%ymm7
vpalignr $8,%ymm11,%ymm11,%ymm11
vpalignr $4,%ymm15,%ymm15,%ymm15
vpalignr $12,%ymm6,%ymm6,%ymm6
movq 8+0(%rbp),%rdx
mulxq %r10,%r10,%rax
addq %r10,%r14
mulxq %r11,%r11,%r9
adcq %r11,%r15
adcq $0,%r9
imulq %r12,%rdx
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $12,%ymm5,%ymm5,%ymm5
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $12,%ymm4,%ymm4,%ymm4
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm12,%ymm12,%ymm12
addq %rax,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%rdi),%rdi
decq %rcx
jg 1b
decq %r8
jge 2b
vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
vpaddd 64(%rbp),%ymm7,%ymm7
vpaddd 96(%rbp),%ymm11,%ymm11
vpaddd 256(%rbp),%ymm15,%ymm15
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd 64(%rbp),%ymm6,%ymm6
vpaddd 96(%rbp),%ymm10,%ymm10
vpaddd 224(%rbp),%ymm14,%ymm14
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd 64(%rbp),%ymm5,%ymm5
vpaddd 96(%rbp),%ymm9,%ymm9
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd 64(%rbp),%ymm4,%ymm4
vpaddd 96(%rbp),%ymm8,%ymm8
vpaddd 160(%rbp),%ymm12,%ymm12
vmovdqa %ymm0,128(%rbp)
vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
vpxor 0+0(%rsi),%ymm0,%ymm0
vpxor 32+0(%rsi),%ymm3,%ymm3
vpxor 64+0(%rsi),%ymm7,%ymm7
vpxor 96+0(%rsi),%ymm11,%ymm11
vmovdqu %ymm0,0+0(%rdi)
vmovdqu %ymm3,32+0(%rdi)
vmovdqu %ymm7,64+0(%rdi)
vmovdqu %ymm11,96+0(%rdi)
vmovdqa 128(%rbp),%ymm0
vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
vpxor 0+128(%rsi),%ymm3,%ymm3
vpxor 32+128(%rsi),%ymm2,%ymm2
vpxor 64+128(%rsi),%ymm6,%ymm6
vpxor 96+128(%rsi),%ymm10,%ymm10
vmovdqu %ymm3,0+128(%rdi)
vmovdqu %ymm2,32+128(%rdi)
vmovdqu %ymm6,64+128(%rdi)
vmovdqu %ymm10,96+128(%rdi)
vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
vpxor 0+256(%rsi),%ymm3,%ymm3
vpxor 32+256(%rsi),%ymm1,%ymm1
vpxor 64+256(%rsi),%ymm5,%ymm5
vpxor 96+256(%rsi),%ymm9,%ymm9
vmovdqu %ymm3,0+256(%rdi)
vmovdqu %ymm1,32+256(%rdi)
vmovdqu %ymm5,64+256(%rdi)
vmovdqu %ymm9,96+256(%rdi)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
vmovdqa %ymm3,%ymm8
movq $384,%rcx
leaq 384(%rsi),%rsi
subq $384,%rbx
jmp seal_avx2_hash
seal_avx2_320:
vmovdqa %ymm0,%ymm1
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm5
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm9
vmovdqa %ymm8,%ymm10
vpaddd .avx2_inc(%rip),%ymm12,%ymm13
vpaddd .avx2_inc(%rip),%ymm13,%ymm14
vmovdqa %ymm4,%ymm7
vmovdqa %ymm8,%ymm11
vmovdqa %ymm12,160(%rbp)
vmovdqa %ymm13,192(%rbp)
vmovdqa %ymm14,224(%rbp)
movq $10,%r10
1:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $12,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $4,%ymm6,%ymm6,%ymm6
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol16(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpsrld $20,%ymm6,%ymm3
vpslld $12,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpaddd %ymm6,%ymm2,%ymm2
vpxor %ymm2,%ymm14,%ymm14
vpshufb .rol8(%rip),%ymm14,%ymm14
vpaddd %ymm14,%ymm10,%ymm10
vpxor %ymm10,%ymm6,%ymm6
vpslld $7,%ymm6,%ymm3
vpsrld $25,%ymm6,%ymm6
vpxor %ymm3,%ymm6,%ymm6
vpalignr $4,%ymm14,%ymm14,%ymm14
vpalignr $8,%ymm10,%ymm10,%ymm10
vpalignr $12,%ymm6,%ymm6,%ymm6
decq %r10
jne 1b
vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
vpaddd %ymm7,%ymm4,%ymm4
vpaddd %ymm7,%ymm5,%ymm5
vpaddd %ymm7,%ymm6,%ymm6
vpaddd %ymm11,%ymm8,%ymm8
vpaddd %ymm11,%ymm9,%ymm9
vpaddd %ymm11,%ymm10,%ymm10
vpaddd 160(%rbp),%ymm12,%ymm12
vpaddd 192(%rbp),%ymm13,%ymm13
vpaddd 224(%rbp),%ymm14,%ymm14
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vpand .clamp(%rip),%ymm3,%ymm3
vmovdqa %ymm3,0(%rbp)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
jmp seal_avx2_short
seal_avx2_192:
vmovdqa %ymm0,%ymm1
vmovdqa %ymm0,%ymm2
vmovdqa %ymm4,%ymm5
vmovdqa %ymm4,%ymm6
vmovdqa %ymm8,%ymm9
vmovdqa %ymm8,%ymm10
vpaddd .avx2_inc(%rip),%ymm12,%ymm13
vmovdqa %ymm12,%ymm11
vmovdqa %ymm13,%ymm15
movq $10,%r10
1:
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $12,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $4,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $12,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $4,%ymm5,%ymm5,%ymm5
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol16(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpsrld $20,%ymm4,%ymm3
vpslld $12,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpaddd %ymm4,%ymm0,%ymm0
vpxor %ymm0,%ymm12,%ymm12
vpshufb .rol8(%rip),%ymm12,%ymm12
vpaddd %ymm12,%ymm8,%ymm8
vpxor %ymm8,%ymm4,%ymm4
vpslld $7,%ymm4,%ymm3
vpsrld $25,%ymm4,%ymm4
vpxor %ymm3,%ymm4,%ymm4
vpalignr $4,%ymm12,%ymm12,%ymm12
vpalignr $8,%ymm8,%ymm8,%ymm8
vpalignr $12,%ymm4,%ymm4,%ymm4
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol16(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpsrld $20,%ymm5,%ymm3
vpslld $12,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpaddd %ymm5,%ymm1,%ymm1
vpxor %ymm1,%ymm13,%ymm13
vpshufb .rol8(%rip),%ymm13,%ymm13
vpaddd %ymm13,%ymm9,%ymm9
vpxor %ymm9,%ymm5,%ymm5
vpslld $7,%ymm5,%ymm3
vpsrld $25,%ymm5,%ymm5
vpxor %ymm3,%ymm5,%ymm5
vpalignr $4,%ymm13,%ymm13,%ymm13
vpalignr $8,%ymm9,%ymm9,%ymm9
vpalignr $12,%ymm5,%ymm5,%ymm5
decq %r10
jne 1b
vpaddd %ymm2,%ymm0,%ymm0
vpaddd %ymm2,%ymm1,%ymm1
vpaddd %ymm6,%ymm4,%ymm4
vpaddd %ymm6,%ymm5,%ymm5
vpaddd %ymm10,%ymm8,%ymm8
vpaddd %ymm10,%ymm9,%ymm9
vpaddd %ymm11,%ymm12,%ymm12
vpaddd %ymm15,%ymm13,%ymm13
vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
vpand .clamp(%rip),%ymm3,%ymm3
vmovdqa %ymm3,0(%rbp)
vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
seal_avx2_short:
movq %r8,%r8
call poly_hash_ad_internal
xorq %rcx,%rcx
seal_avx2_hash:
cmpq $16,%rcx
jb seal_avx2_short_loop
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
subq $16,%rcx
addq $16,%rdi
jmp seal_avx2_hash
seal_avx2_short_loop:
cmpq $32,%rbx
jb seal_avx2_short_tail
subq $32,%rbx
vpxor (%rsi),%ymm0,%ymm0
vmovdqu %ymm0,(%rdi)
leaq 32(%rsi),%rsi
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq 16(%rdi),%r10
adcq 8+16(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 32(%rdi),%rdi
vmovdqa %ymm4,%ymm0
vmovdqa %ymm8,%ymm4
vmovdqa %ymm12,%ymm8
vmovdqa %ymm1,%ymm12
vmovdqa %ymm5,%ymm1
vmovdqa %ymm9,%ymm5
vmovdqa %ymm13,%ymm9
vmovdqa %ymm2,%ymm13
vmovdqa %ymm6,%ymm2
jmp seal_avx2_short_loop
seal_avx2_short_tail:
cmpq $16,%rbx
jb 1f
subq $16,%rbx
vpxor (%rsi),%xmm0,%xmm3
vmovdqu %xmm3,(%rdi)
leaq 16(%rsi),%rsi
addq 0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r10
adcq %r14,%r11
adcq $0,%r12
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
vextracti128 $1,%ymm0,%xmm0
1:
vzeroupper
jmp seal_sse_tail_16
.cfi_endproc
#endif