blob: 91c06a554a01cf659073a6c5f8465ca17090f324 [file] [log] [blame]
; Copyright (c) 2011 The Chromium Authors. All rights reserved.
; Use of this source code is governed by a BSD-style license that can be
; found in the LICENSE file.
global mangle(SYMBOL) PRIVATE
align function_align
mangle(SYMBOL):
%assign stack_offset 0
extern mangle(kCoefficientsRgbY)
; Parameters are in the following order:
; 1. Y plane
; 2. U plane
; 3. V plane
; 4. ARGB frame
; 5. Width
; 6. Source dx
PROLOGUE 6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP
%if gprsize == 8
%define WORD_SIZE QWORD
%else
%define WORD_SIZE DWORD
%endif
; Define register aliases.
%define Xq R1q ; Current X position
%define COMPLq R2q ; Component A value
%define COMPLd R2d ; Component A value
%define U_ARG_REGq R0q ; U plane address argument
%define V_ARG_REGq R1q ; V plane address argument
%define SOURCE_DX_ARG_REGq R3q ; Source dx argument
%define WIDTH_ARG_REGq R2q ; Width argument
%ifdef PIC
; PIC code shared COMPR, U and V with the same register. Need to be careful in the
; code they don't mix up. This allows R3q to be used for YUV table.
%define COMPRq R0q ; Component B value
%define COMPRd R0d ; Component B value
%define Uq R0q ; U plane address
%define Vq R0q ; V plane address
%define U_PLANE WORD_SIZE [rsp + 3 * gprsize]
%define TABLE R3q ; Address of the table
%else
; Non-PIC code defines.
%define COMPRq R3q ; Component B value
%define COMPRd R3d ; Component B value
%define Uq R0q ; U plane address
%define Vq R3q ; V plane address
%define TABLE mangle(kCoefficientsRgbY)
%endif
; Defines for stack variables. These are used in both PIC and non-PIC code.
%define V_PLANE WORD_SIZE [rsp + 2 * gprsize]
%define SOURCE_DX WORD_SIZE [rsp + gprsize]
%define SOURCE_WIDTH WORD_SIZE [rsp]
; Handle stack variables differently for PIC and non-PIC code.
%ifdef PIC
; Define stack usage for PIC code. PIC code push U plane onto stack.
PUSH U_ARG_REGq
PUSH V_ARG_REGq
PUSH SOURCE_DX_ARG_REGq
imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx
PUSH WIDTH_ARG_REGq
; Load the address of kCoefficientsRgbY into TABLE
mov TEMPq, SOURCE_DX_ARG_REGq ; Need to save source_dx first
LOAD_SYM TABLE, mangle(kCoefficientsRgbY)
%define SOURCE_DX_ARG_REGq TEMPq ; Overwrite SOURCE_DX_ARG_REGq to TEMPq
%else
; Define stack usage. Non-PIC code just push 3 registers to stack.
PUSH V_ARG_REGq
PUSH SOURCE_DX_ARG_REGq
imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx
PUSH WIDTH_ARG_REGq
%endif
%macro EPILOGUE 0
%ifdef PIC
ADD rsp, 4 * gprsize
%else
ADD rsp, 3 * gprsize
%endif
%endmacro
xor Xq, Xq ; x = 0
cmp SOURCE_DX_ARG_REGq, 0x20000
jl .lscaleend
mov Xq, 0x8000 ; x = 0.5 for 1/2 or less
jmp .lscaleend
.lscaleloop:
%ifdef PIC
mov Uq, U_PLANE ; PIC code saves U_PLANE on stack.
%endif
; Define macros for scaling YUV components since they are reused.
%macro SCALEUV 1
mov TEMPq, Xq
sar TEMPq, 0x11
movzx COMPLd, BYTE [%1 + TEMPq]
movzx COMPRd, BYTE [%1 + TEMPq + 1]
mov TEMPq, Xq
and TEMPq, 0x1fffe
imul COMPRq, TEMPq
xor TEMPq, 0x1fffe
imul COMPLq, TEMPq
add COMPLq, COMPRq
shr COMPLq, 17
%endmacro
SCALEUV Uq ; Use the above macro to scale U
movq mm0, [TABLE + 2048 + 8 * COMPLq]
mov Vq, V_PLANE ; Read V address from stack
SCALEUV Vq ; Use the above macro to scale V
paddsw mm0, [TABLE + 4096 + 8 * COMPLq]
%macro SCALEY 0
mov TEMPq, Xq
sar TEMPq, 0x10
movzx COMPLd, BYTE [Yq + TEMPq]
movzx COMPRd, BYTE [Yq + TEMPq + 1]
mov TEMPq, Xq
add Xq, SOURCE_DX ; Add source_dx from stack
and TEMPq, 0xffff
imul COMPRq, TEMPq
xor TEMPq, 0xffff
imul COMPLq, TEMPq
add COMPLq, COMPRq
shr COMPLq, 16
%endmacro
SCALEY ; Use the above macro to scale Y1
movq mm1, [TABLE + 8 * COMPLq]
cmp Xq, SOURCE_WIDTH ; Compare source_width from stack
jge .lscalelastpixel
SCALEY ; Use the above macro to sacle Y2
movq mm2, [TABLE + 8 * COMPLq]
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 0x6
psraw mm2, 0x6
packuswb mm1, mm2
MOVQ [ARGBq], mm1
add ARGBq, 0x8
.lscaleend:
cmp Xq, SOURCE_WIDTH ; Compare source_width from stack
jl .lscaleloop
EPILOGUE
RET
.lscalelastpixel:
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
movd [ARGBq], mm1
EPILOGUE
RET