| ; Copyright © 2018, VideoLAN and dav1d authors |
| ; Copyright © 2018, Two Orioles, LLC |
| ; All rights reserved. |
| ; |
| ; Redistribution and use in source and binary forms, with or without |
| ; modification, are permitted provided that the following conditions are met: |
| ; |
| ; 1. Redistributions of source code must retain the above copyright notice, this |
| ; list of conditions and the following disclaimer. |
| ; |
| ; 2. Redistributions in binary form must reproduce the above copyright notice, |
| ; this list of conditions and the following disclaimer in the documentation |
| ; and/or other materials provided with the distribution. |
| ; |
| ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| ; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| ; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| ; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| ; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| ; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| ; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| %include "config.asm" |
| %undef private_prefix |
| %define private_prefix checkasm |
| %include "ext/x86/x86inc.asm" |
| |
| SECTION_RODATA 16 |
| |
| %if ARCH_X86_64 |
| ; just random numbers to reduce the chance of incidental match |
| %if WIN64 |
| x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 |
| x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 |
| x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e |
| x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f |
| x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 |
| x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d |
| x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b |
| x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 |
| x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef |
| x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 |
| n7: dq 0x21f86d66c8ca00ce |
| n8: dq 0x75b6ba21077c48ad |
| %endif |
| n9: dq 0xed56bb2dcb3c7736 |
| n10: dq 0x8bda43d3fd1a7e06 |
| n11: dq 0xb64a9c9e5d318408 |
| n12: dq 0xdf9a54b303f1d3a3 |
| n13: dq 0x4a75479abd64e097 |
| n14: dq 0x249214109d5d1c88 |
| %endif |
| |
| errmsg_stack: db "stack corruption", 0 |
| errmsg_register: db "failed to preserve register:%s", 0 |
| errmsg_vzeroupper: db "missing vzeroupper", 0 |
| |
| SECTION .bss |
| |
| check_vzeroupper: resd 1 |
| |
| SECTION .text |
| |
| cextern fail_func |
| |
| ; max number of args used by any asm function. |
| ; (max_args % 4) must equal 3 for stack alignment |
| %define max_args 15 |
| |
| %if UNIX64 |
| DECLARE_REG_TMP 0 |
| %else |
| DECLARE_REG_TMP 4 |
| %endif |
| |
| ;----------------------------------------------------------------------------- |
| ; unsigned checkasm_init_x86(char *name) |
| ;----------------------------------------------------------------------------- |
| cglobal init_x86, 0, 5 |
| %if ARCH_X86_64 |
| push rbx |
| %endif |
| movifnidn t0, r0mp |
| mov eax, 0x80000000 |
| cpuid |
| cmp eax, 0x80000004 |
| jb .no_brand ; processor brand string not supported |
| mov eax, 0x80000002 |
| cpuid |
| mov [t0+4* 0], eax |
| mov [t0+4* 1], ebx |
| mov [t0+4* 2], ecx |
| mov [t0+4* 3], edx |
| mov eax, 0x80000003 |
| cpuid |
| mov [t0+4* 4], eax |
| mov [t0+4* 5], ebx |
| mov [t0+4* 6], ecx |
| mov [t0+4* 7], edx |
| mov eax, 0x80000004 |
| cpuid |
| mov [t0+4* 8], eax |
| mov [t0+4* 9], ebx |
| mov [t0+4*10], ecx |
| mov [t0+4*11], edx |
| xor eax, eax |
| cpuid |
| jmp .check_xcr1 |
| .no_brand: ; use manufacturer id as a fallback |
| xor eax, eax |
| mov [t0+4*3], eax |
| cpuid |
| mov [t0+4*0], ebx |
| mov [t0+4*1], edx |
| mov [t0+4*2], ecx |
| .check_xcr1: |
| test eax, eax |
| jz .end2 ; cpuid leaf 1 not supported |
| mov t0d, eax ; max leaf |
| mov eax, 1 |
| cpuid |
| and ecx, 0x18000000 |
| cmp ecx, 0x18000000 |
| jne .end2 ; osxsave/avx not supported |
| cmp t0d, 13 ; cpuid leaf 13 not supported |
| jb .end2 |
| mov t0d, eax ; cpuid signature |
| mov eax, 13 |
| mov ecx, 1 |
| cpuid |
| test al, 0x04 |
| jz .end ; xcr1 not supported |
| mov ecx, 1 |
| xgetbv |
| test al, 0x04 |
| jnz .end ; always-dirty ymm state |
| %if ARCH_X86_64 == 0 && PIC |
| LEA eax, check_vzeroupper |
| mov [eax], ecx |
| %else |
| mov [check_vzeroupper], ecx |
| %endif |
| .end: |
| mov eax, t0d |
| .end2: |
| %if ARCH_X86_64 |
| pop rbx |
| %endif |
| RET |
| |
| %if ARCH_X86_64 |
| %if WIN64 |
| %define stack_param rsp+32 ; shadow space |
| %define num_fn_args rsp+stack_offset+17*8 |
| %assign num_reg_args 4 |
| %assign free_regs 7 |
| %assign clobber_mask_stack_bit 16 |
| DECLARE_REG_TMP 4 |
| %else |
| %define stack_param rsp |
| %define num_fn_args rsp+stack_offset+11*8 |
| %assign num_reg_args 6 |
| %assign free_regs 9 |
| %assign clobber_mask_stack_bit 64 |
| DECLARE_REG_TMP 7 |
| %endif |
| |
| %macro CLOBBER_UPPER 2 ; reg, mask_bit |
| mov r13d, %1d |
| or r13, r8 |
| test r9b, %2 |
| cmovnz %1, r13 |
| %endmacro |
| |
| cglobal checked_call, 2, 15, 16, max_args*8+64+8 |
| mov r10d, [num_fn_args] |
| mov r8, 0xdeadbeef00000000 |
| mov r9d, [num_fn_args+r10*8+8] ; clobber_mask |
| mov t0, [num_fn_args+r10*8] ; func |
| |
| ; Clobber the upper halves of 32-bit parameters |
| CLOBBER_UPPER r0, 1 |
| CLOBBER_UPPER r1, 2 |
| CLOBBER_UPPER r2, 4 |
| CLOBBER_UPPER r3, 8 |
| %if UNIX64 |
| CLOBBER_UPPER r4, 16 |
| CLOBBER_UPPER r5, 32 |
| %else ; WIN64 |
| %assign i 6 |
| %rep 16-6 |
| mova m %+ i, [x %+ i] |
| %assign i i+1 |
| %endrep |
| %endif |
| |
| xor r11d, r11d |
| sub r10d, num_reg_args |
| cmovs r10d, r11d ; num stack args |
| |
| ; write stack canaries to the area above parameters passed on the stack |
| mov r12, [rsp+stack_offset] ; return address |
| not r12 |
| %assign i 0 |
| %rep 8 ; 64 bytes |
| mov [stack_param+(r10+i)*8], r12 |
| %assign i i+1 |
| %endrep |
| |
| test r10d, r10d |
| jz .stack_setup_done ; no stack parameters |
| .copy_stack_parameter: |
| mov r12, [stack_param+stack_offset+8+r11*8] |
| CLOBBER_UPPER r12, clobber_mask_stack_bit |
| shr r9d, 1 |
| mov [stack_param+r11*8], r12 |
| inc r11d |
| cmp r11d, r10d |
| jl .copy_stack_parameter |
| .stack_setup_done: |
| |
| %assign i 14 |
| %rep 15-free_regs |
| mov r %+ i, [n %+ i] |
| %assign i i-1 |
| %endrep |
| call t0 |
| |
| ; check for stack corruption |
| mov r0d, [num_fn_args] |
| xor r3d, r3d |
| sub r0d, num_reg_args |
| cmovs r0d, r3d ; num stack args |
| |
| mov r3, [rsp+stack_offset] |
| mov r4, [stack_param+r0*8] |
| not r3 |
| xor r4, r3 |
| %assign i 1 |
| %rep 6 |
| mov r5, [stack_param+(r0+i)*8] |
| xor r5, r3 |
| or r4, r5 |
| %assign i i+1 |
| %endrep |
| xor r3, [stack_param+(r0+7)*8] |
| or r4, r3 |
| jz .stack_ok |
| ; Save the return value located in rdx:rax first to prevent clobbering. |
| mov r10, rax |
| mov r11, rdx |
| lea r0, [errmsg_stack] |
| jmp .fail |
| .stack_ok: |
| |
| ; check for failure to preserve registers |
| %assign i 14 |
| %rep 15-free_regs |
| cmp r %+ i, [n %+ i] |
| setne r4b |
| lea r3d, [r4+r3*2] |
| %assign i i-1 |
| %endrep |
| %if WIN64 |
| lea r0, [rsp+32] ; account for shadow space |
| mov r5, r0 |
| test r3d, r3d |
| jz .gpr_ok |
| %else |
| test r3d, r3d |
| jz .gpr_xmm_ok |
| mov r0, rsp |
| %endif |
| %assign i free_regs |
| %rep 15-free_regs |
| %if i < 10 |
| mov dword [r0], " r0" + (i << 16) |
| lea r4, [r0+3] |
| %else |
| mov dword [r0], " r10" + ((i - 10) << 24) |
| lea r4, [r0+4] |
| %endif |
| test r3b, 1 << (i - free_regs) |
| cmovnz r0, r4 |
| %assign i i+1 |
| %endrep |
| %if WIN64 ; xmm registers |
| .gpr_ok: |
| %assign i 6 |
| %rep 16-6 |
| pxor m %+ i, [x %+ i] |
| %assign i i+1 |
| %endrep |
| packsswb m6, m7 |
| packsswb m8, m9 |
| packsswb m10, m11 |
| packsswb m12, m13 |
| packsswb m14, m15 |
| packsswb m6, m6 |
| packsswb m8, m10 |
| packsswb m12, m14 |
| packsswb m6, m6 |
| packsswb m8, m12 |
| packsswb m6, m8 |
| pxor m7, m7 |
| pcmpeqb m6, m7 |
| pmovmskb r3d, m6 |
| cmp r3d, 0xffff |
| je .xmm_ok |
| mov r7d, " xmm" |
| %assign i 6 |
| %rep 16-6 |
| mov [r0+0], r7d |
| %if i < 10 |
| mov byte [r0+4], "0" + i |
| lea r4, [r0+5] |
| %else |
| mov word [r0+4], "10" + ((i - 10) << 8) |
| lea r4, [r0+6] |
| %endif |
| test r3d, 1 << i |
| cmovz r0, r4 |
| %assign i i+1 |
| %endrep |
| .xmm_ok: |
| cmp r0, r5 |
| je .gpr_xmm_ok |
| mov byte [r0], 0 |
| mov r11, rdx |
| mov r1, r5 |
| %else |
| mov byte [r0], 0 |
| mov r11, rdx |
| mov r1, rsp |
| %endif |
| mov r10, rax |
| lea r0, [errmsg_register] |
| jmp .fail |
| .gpr_xmm_ok: |
| ; Check for dirty YMM state, i.e. missing vzeroupper |
| mov ecx, [check_vzeroupper] |
| test ecx, ecx |
| jz .ok ; not supported, skip |
| mov r10, rax |
| mov r11, rdx |
| xgetbv |
| test al, 0x04 |
| jz .restore_retval ; clean ymm state |
| lea r0, [errmsg_vzeroupper] |
| vzeroupper |
| .fail: |
| ; Call fail_func() with a descriptive message to mark it as a failure. |
| xor eax, eax |
| call fail_func |
| .restore_retval: |
| mov rax, r10 |
| mov rdx, r11 |
| .ok: |
| RET |
| |
| ; trigger a warmup of vector units |
| %macro WARMUP 0 |
| cglobal warmup, 0, 0 |
| xorps m0, m0 |
| mulps m0, m0 |
| RET |
| %endmacro |
| |
| INIT_YMM avx2 |
| WARMUP |
| INIT_ZMM avx512 |
| WARMUP |
| |
| %else |
| |
| ; just random numbers to reduce the chance of incidental match |
| %assign n3 0x6549315c |
| %assign n4 0xe02f3e23 |
| %assign n5 0xb78d0d1d |
| %assign n6 0x33627ba7 |
| |
| ;----------------------------------------------------------------------------- |
| ; void checkasm_checked_call(void *func, ...) |
| ;----------------------------------------------------------------------------- |
| cglobal checked_call, 1, 7 |
| mov r3, [esp+stack_offset] ; return address |
| mov r1, [esp+stack_offset+17*4] ; num_stack_params |
| mov r2, 27 |
| not r3 |
| sub r2, r1 |
| .push_canary: |
| push r3 |
| dec r2 |
| jg .push_canary |
| .push_parameter: |
| push dword [esp+32*4] |
| dec r1 |
| jg .push_parameter |
| mov r3, n3 |
| mov r4, n4 |
| mov r5, n5 |
| mov r6, n6 |
| call r0 |
| |
| ; check for failure to preserve registers |
| cmp r3, n3 |
| setne r3h |
| cmp r4, n4 |
| setne r3b |
| shl r3d, 16 |
| cmp r5, n5 |
| setne r3h |
| cmp r6, n6 |
| setne r3b |
| test r3, r3 |
| jz .gpr_ok |
| lea r1, [esp+16] |
| mov [esp+4], r1 |
| %assign i 3 |
| %rep 4 |
| mov dword [r1], " r0" + (i << 16) |
| lea r4, [r1+3] |
| test r3, 1 << ((6 - i) * 8) |
| cmovnz r1, r4 |
| %assign i i+1 |
| %endrep |
| mov byte [r1], 0 |
| mov r5, eax |
| mov r6, edx |
| LEA r1, errmsg_register |
| jmp .fail |
| .gpr_ok: |
| ; check for stack corruption |
| mov r3, [esp+48*4] ; num_stack_params |
| mov r6, [esp+31*4] ; return address |
| mov r4, [esp+r3*4] |
| sub r3, 26 |
| not r6 |
| xor r4, r6 |
| .check_canary: |
| mov r5, [esp+(r3+27)*4] |
| xor r5, r6 |
| or r4, r5 |
| inc r3 |
| jl .check_canary |
| mov r5, eax |
| mov r6, edx |
| test r4, r4 |
| jz .stack_ok |
| LEA r1, errmsg_stack |
| jmp .fail |
| .stack_ok: |
| ; check for dirty YMM state, i.e. missing vzeroupper |
| LEA ecx, check_vzeroupper |
| mov ecx, [ecx] |
| test ecx, ecx |
| jz .ok ; not supported, skip |
| xgetbv |
| test al, 0x04 |
| jz .ok ; clean ymm state |
| LEA r1, errmsg_vzeroupper |
| vzeroupper |
| .fail: |
| mov [esp], r1 |
| call fail_func |
| .ok: |
| add esp, 27*4 |
| mov eax, r5 |
| mov edx, r6 |
| RET |
| |
| %endif ; ARCH_X86_64 |