blob: 705f813d7da609474a5c99ed27ff8e6f63cb6f2d [file] [log] [blame]
; jsimdcpu.asm - SIMD instruction support check
; Copyright 2009 Pierre Ossman <> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from or
%include ""
; --------------------------------------------------------------------------
; Check if the CPU supports SIMD instructions
; GLOBAL(unsigned int)
; jpeg_simd_cpu_support(void)
align 32
push rbx
push rdi
xor rdi, rdi ; simd support flag
; Assume that all x86-64 processors support SSE & SSE2 instructions
or rdi, JSIMD_SSE2
or rdi, JSIMD_SSE
; Check whether CPUID leaf 07H is supported
; (leaf 07H is used to check for AVX2 instruction support)
mov rax, 0
cmp rax, 7
jl short .return ; Maximum leaf < 07H
; Check for AVX2 instruction support
mov rax, 7
xor rcx, rcx
mov rax, rbx ; rax = Extended feature flags
test rax, 1<<5 ; bit5:AVX2
jz short .return
; Check for AVX2 O/S support
mov rax, 1
xor rcx, rcx
test rcx, 1<<27
jz short .return ; O/S does not support XSAVE
test rcx, 1<<28
jz short .return ; CPU does not support AVX2
xor rcx, rcx
and rax, 6
cmp rax, 6 ; O/S does not manage XMM/YMM state
; using XSAVE
jnz short .return
or rdi, JSIMD_AVX2
mov rax, rdi
pop rdi
pop rbx
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 32