| ; Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| ; Use of this source code is governed by a BSD-style license that can be |
| ; found in the LICENSE file. |
| |
| global mangle(SYMBOL) PRIVATE |
| align function_align |
| |
| mangle(SYMBOL): |
| %assign stack_offset 0 |
| |
| extern mangle(kCoefficientsRgbY) |
| |
| ; Parameters are in the following order: |
| ; 1. Y plane |
| ; 2. U plane |
| ; 3. V plane |
| ; 4. ARGB frame |
| ; 5. Width |
| ; 6. Source dx |
| |
| PROLOGUE 6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP |
| |
| %if gprsize == 8 |
| %define WORD_SIZE QWORD |
| %else |
| %define WORD_SIZE DWORD |
| %endif |
| |
| ; Define register aliases. |
| %define Xq R1q ; Current X position |
| %define COMPLq R2q ; Component A value |
| %define COMPLd R2d ; Component A value |
| %define U_ARG_REGq R0q ; U plane address argument |
| %define V_ARG_REGq R1q ; V plane address argument |
| %define SOURCE_DX_ARG_REGq R3q ; Source dx argument |
| %define WIDTH_ARG_REGq R2q ; Width argument |
| |
| %ifdef PIC |
| ; PIC code shared COMPR, U and V with the same register. Need to be careful in the |
| ; code they don't mix up. This allows R3q to be used for YUV table. |
| %define COMPRq R0q ; Component B value |
| %define COMPRd R0d ; Component B value |
| %define Uq R0q ; U plane address |
| %define Vq R0q ; V plane address |
| %define U_PLANE WORD_SIZE [rsp + 3 * gprsize] |
| %define TABLE R3q ; Address of the table |
| %else |
| ; Non-PIC code defines. |
| %define COMPRq R3q ; Component B value |
| %define COMPRd R3d ; Component B value |
| %define Uq R0q ; U plane address |
| %define Vq R3q ; V plane address |
| %define TABLE mangle(kCoefficientsRgbY) |
| %endif |
| |
| ; Defines for stack variables. These are used in both PIC and non-PIC code. |
| %define V_PLANE WORD_SIZE [rsp + 2 * gprsize] |
| %define SOURCE_DX WORD_SIZE [rsp + gprsize] |
| %define SOURCE_WIDTH WORD_SIZE [rsp] |
| |
| ; Handle stack variables differently for PIC and non-PIC code. |
| |
| %ifdef PIC |
| ; Define stack usage for PIC code. PIC code push U plane onto stack. |
| PUSH U_ARG_REGq |
| PUSH V_ARG_REGq |
| PUSH SOURCE_DX_ARG_REGq |
| imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx |
| PUSH WIDTH_ARG_REGq |
| |
| ; Load the address of kCoefficientsRgbY into TABLE |
| mov TEMPq, SOURCE_DX_ARG_REGq ; Need to save source_dx first |
| LOAD_SYM TABLE, mangle(kCoefficientsRgbY) |
| %define SOURCE_DX_ARG_REGq TEMPq ; Overwrite SOURCE_DX_ARG_REGq to TEMPq |
| %else |
| ; Define stack usage. Non-PIC code just push 3 registers to stack. |
| PUSH V_ARG_REGq |
| PUSH SOURCE_DX_ARG_REGq |
| imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx |
| PUSH WIDTH_ARG_REGq |
| %endif |
| |
| %macro EPILOGUE 0 |
| %ifdef PIC |
| ADD rsp, 4 * gprsize |
| %else |
| ADD rsp, 3 * gprsize |
| %endif |
| %endmacro |
| |
| xor Xq, Xq ; x = 0 |
| cmp SOURCE_DX_ARG_REGq, 0x20000 |
| jl .lscaleend |
| mov Xq, 0x8000 ; x = 0.5 for 1/2 or less |
| jmp .lscaleend |
| |
| .lscaleloop: |
| %ifdef PIC |
| mov Uq, U_PLANE ; PIC code saves U_PLANE on stack. |
| %endif |
| |
| ; Define macros for scaling YUV components since they are reused. |
| %macro SCALEUV 1 |
| mov TEMPq, Xq |
| sar TEMPq, 0x11 |
| movzx COMPLd, BYTE [%1 + TEMPq] |
| movzx COMPRd, BYTE [%1 + TEMPq + 1] |
| mov TEMPq, Xq |
| and TEMPq, 0x1fffe |
| imul COMPRq, TEMPq |
| xor TEMPq, 0x1fffe |
| imul COMPLq, TEMPq |
| add COMPLq, COMPRq |
| shr COMPLq, 17 |
| %endmacro |
| SCALEUV Uq ; Use the above macro to scale U |
| movq mm0, [TABLE + 2048 + 8 * COMPLq] |
| |
| mov Vq, V_PLANE ; Read V address from stack |
| SCALEUV Vq ; Use the above macro to scale V |
| paddsw mm0, [TABLE + 4096 + 8 * COMPLq] |
| |
| %macro SCALEY 0 |
| mov TEMPq, Xq |
| sar TEMPq, 0x10 |
| movzx COMPLd, BYTE [Yq + TEMPq] |
| movzx COMPRd, BYTE [Yq + TEMPq + 1] |
| mov TEMPq, Xq |
| add Xq, SOURCE_DX ; Add source_dx from stack |
| and TEMPq, 0xffff |
| imul COMPRq, TEMPq |
| xor TEMPq, 0xffff |
| imul COMPLq, TEMPq |
| add COMPLq, COMPRq |
| shr COMPLq, 16 |
| %endmacro |
| SCALEY ; Use the above macro to scale Y1 |
| movq mm1, [TABLE + 8 * COMPLq] |
| |
| cmp Xq, SOURCE_WIDTH ; Compare source_width from stack |
| jge .lscalelastpixel |
| |
| SCALEY ; Use the above macro to sacle Y2 |
| movq mm2, [TABLE + 8 * COMPLq] |
| |
| paddsw mm1, mm0 |
| paddsw mm2, mm0 |
| psraw mm1, 0x6 |
| psraw mm2, 0x6 |
| packuswb mm1, mm2 |
| MOVQ [ARGBq], mm1 |
| add ARGBq, 0x8 |
| |
| .lscaleend: |
| cmp Xq, SOURCE_WIDTH ; Compare source_width from stack |
| jl .lscaleloop |
| EPILOGUE |
| RET |
| |
| .lscalelastpixel: |
| paddsw mm1, mm0 |
| psraw mm1, 6 |
| packuswb mm1, mm1 |
| movd [ARGBq], mm1 |
| EPILOGUE |
| RET |