blob: bb7e4bd57ba316b67aae7d0c9b4e9b7efb6ed4a4 [file] [log] [blame]
#!/usr/bin/env python2.7
#
# Copyright 2017 Google Inc.
#
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import re
import subprocess
import sys
clang = 'clang-4.0'
objdump = 'gobjdump'
ccache = 'ccache'
stages = 'src/jumper/SkJumper_stages.cpp'
stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp'
generated = 'src/jumper/SkJumper_generated.S'
generated_win = 'src/jumper/SkJumper_generated_win.S'
clang = sys.argv[1] if len(sys.argv) > 1 else clang
objdump = sys.argv[2] if len(sys.argv) > 2 else objdump
ccache = sys.argv[3] if len(sys.argv) > 3 else ccache
stages = sys.argv[4] if len(sys.argv) > 4 else stages
stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp
generated = sys.argv[6] if len(sys.argv) > 6 else generated
generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win
clang = [ccache, clang, '-x', 'c++']
cflags = ['-std=c++11', '-Os', '-DJUMPER',
'-momit-leaf-frame-pointer', '-ffp-contract=fast',
'-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
x86 = [ '-m32' ]
win = ['-DWIN', '-mno-red-zone']
sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
subprocess.check_call(clang + cflags + sse2 +
['-c', stages] +
['-o', 'sse2.o'])
subprocess.check_call(clang + cflags + sse2 + win +
['-c', stages] +
['-o', 'win_sse2.o'])
subprocess.check_call(clang + cflags + sse2 + x86 +
['-c', stages] +
['-o', 'x86_sse2.o'])
subprocess.check_call(clang + cflags + sse2 + win + x86 +
['-c', stages] +
['-o', 'win_x86_sse2.o'])
ssse3 = ['-mssse3', '-mno-sse4.1']
subprocess.check_call(clang + cflags + ssse3 +
['-c', stages_lowp] +
['-o', 'lowp_ssse3.o'])
subprocess.check_call(clang + cflags + ssse3 + win +
['-c', stages_lowp] +
['-o', 'win_lowp_ssse3.o'])
sse41 = ['-msse4.1']
subprocess.check_call(clang + cflags + sse41 +
['-c', stages] +
['-o', 'sse41.o'])
subprocess.check_call(clang + cflags + sse41 + win +
['-c', stages] +
['-o', 'win_sse41.o'])
avx = ['-mavx']
subprocess.check_call(clang + cflags + avx +
['-c', stages] +
['-o', 'avx.o'])
subprocess.check_call(clang + cflags + avx + win +
['-c', stages] +
['-o', 'win_avx.o'])
hsw = ['-mavx2', '-mfma', '-mf16c']
subprocess.check_call(clang + cflags + hsw +
['-c', stages] +
['-o', 'hsw.o'])
subprocess.check_call(clang + cflags + hsw + win +
['-c', stages] +
['-o', 'win_hsw.o'])
subprocess.check_call(clang + cflags + hsw +
['-c', stages_lowp] +
['-o', 'lowp_hsw.o'])
subprocess.check_call(clang + cflags + hsw + win +
['-c', stages_lowp] +
['-o', 'win_lowp_hsw.o'])
aarch64 = [ '--target=aarch64' ]
subprocess.check_call(clang + cflags + aarch64 +
['-c', stages] +
['-o', 'aarch64.o'])
vfp4 = [
'--target=armv7a-linux-gnueabihf',
'-mfpu=neon-vfpv4',
]
subprocess.check_call(clang + cflags + vfp4 +
['-c', stages] +
['-o', 'vfp4.o'])
def parse_object_file(dot_o, directive, target=None):
globl, hidden, label, comment, align = \
'.globl', 'HIDDEN', ':', '// ', 'BALIGN'
if 'win' in dot_o:
globl, hidden, label, comment, align = \
'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN '
cmd = [objdump]
if target:
cmd += ['--target', target]
# Look for sections we know we can't handle.
section_headers = subprocess.check_output(cmd + ['-h', dot_o])
for snippet in ['.rodata']:
if snippet in section_headers:
print >>sys.stderr, 'Found %s in section.' % snippet
assert snippet not in section_headers
if directive == '.long':
disassemble = ['-d', dot_o]
dehex = lambda h: '0x'+h
else:
# x86-64... as long as we're using %rip-relative addressing,
# literal sections should be fine to just dump in with .text.
disassemble = ['-d', # DO NOT USE -D.
'-z', # Print zero bytes instead of ...
'--insn-width=11',
'-j', '.text',
'-j', '.literal4',
'-j', '.literal16',
'-j', '.const',
dot_o]
dehex = lambda h: str(int(h,16))
# Ok. Let's disassemble.
for line in subprocess.check_output(cmd + disassemble).split('\n'):
line = line.strip()
if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
continue
# E.g. 00000000000003a4 <_load_f16>:
m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
if m:
print
sym = m.group(1)
if sym.startswith('.literal'): # .literal4, .literal16, etc
print sym.replace('.literal', align)
elif sym.startswith('.const'): # 32-byte constants
print align + '32'
elif not sym.startswith('sk_'):
print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym
assert sym.startswith('sk_')
else: # a stage function
if hidden:
print hidden + ' _' + sym
print globl + ' _' + sym
if 'win' not in dot_o:
print 'FUNCTION(_' + sym + ')'
print '_' + sym + label
continue
columns = line.split('\t')
#print >>sys.stderr, columns
code = columns[1]
if len(columns) >= 4:
inst = columns[2]
args = columns[3]
else:
inst, args = columns[2], ''
if ' ' in columns[2]:
inst, args = columns[2].split(' ', 1)
code, inst, args = code.strip(), inst.strip(), args.strip()
hexed = ','.join(dehex(x) for x in code.split(' '))
print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \
comment + inst + (' '*(14-len(inst)) + args if args else '')
sys.stdout = open(generated, 'w')
print '''# Copyright 2017 Google Inc.
#
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is generated semi-automatically with this command:
# $ src/jumper/build_stages.py
'''
print '#if defined(__MACH__)'
print ' #define HIDDEN .private_extern'
print ' #define FUNCTION(name)'
print ' #define BALIGN4 .align 2'
print ' #define BALIGN16 .align 4'
print ' #define BALIGN32 .align 5'
print '#else'
print ' .section .note.GNU-stack,"",%progbits'
print ' #define HIDDEN .hidden'
print ' #define FUNCTION(name) .type name,%function'
print ' #define BALIGN4 .balign 4'
print ' #define BALIGN16 .balign 16'
print ' #define BALIGN32 .balign 32'
print '#endif'
print '.text'
print '#if defined(__aarch64__)'
print 'BALIGN4'
parse_object_file('aarch64.o', '.long')
print '#elif defined(__arm__)'
print 'BALIGN4'
parse_object_file('vfp4.o', '.long', target='elf32-littlearm')
print '#elif defined(__x86_64__)'
print 'BALIGN32'
parse_object_file('hsw.o', '.byte')
print 'BALIGN32'
parse_object_file('avx.o', '.byte')
print 'BALIGN32'
parse_object_file('sse41.o', '.byte')
print 'BALIGN32'
parse_object_file('sse2.o', '.byte')
print 'BALIGN32'
parse_object_file('lowp_hsw.o', '.byte')
print 'BALIGN32'
parse_object_file('lowp_ssse3.o', '.byte')
print '#elif defined(__i386__)'
print 'BALIGN32'
parse_object_file('x86_sse2.o', '.byte')
print '#endif'
sys.stdout = open(generated_win, 'w')
print '''; Copyright 2017 Google Inc.
;
; Use of this source code is governed by a BSD-style license that can be
; found in the LICENSE file.
; This file is generated semi-automatically with this command:
; $ src/jumper/build_stages.py
'''
print 'IFDEF RAX'
print "_text32 SEGMENT ALIGN(32) 'CODE'"
print 'ALIGN 32'
parse_object_file('win_hsw.o', 'DB')
print 'ALIGN 32'
parse_object_file('win_avx.o', 'DB')
print 'ALIGN 32'
parse_object_file('win_sse41.o', 'DB')
print 'ALIGN 32'
parse_object_file('win_sse2.o', 'DB')
print 'ALIGN 32'
parse_object_file('win_lowp_hsw.o', 'DB')
print 'ALIGN 32'
parse_object_file('win_lowp_ssse3.o', 'DB')
print 'ELSE'
print '.MODEL FLAT,C'
print "_text32 SEGMENT ALIGN(32) 'CODE'"
print 'ALIGN 32'
parse_object_file('win_x86_sse2.o', 'DB')
print 'ENDIF'
print 'END'