| ; Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| ; Use of this source code is governed by a BSD-style license that can be |
| ; found in the LICENSE file. |
| |
| %include "media/base/simd/media_export.asm" |
| |
| EXPORT SYMBOL |
| align function_align |
| |
| mangle(SYMBOL): |
| %assign stack_offset 0 |
| |
| ; Parameters are in the following order: |
| ; 1. Y plane |
| ; 2. U plane |
| ; 3. V plane |
| ; 4. ARGB frame |
| ; 5. Width |
| ; 6. Source dx |
| ; 7. Conversion lookup table |
| |
| PROLOGUE 7, 7, 3, Y, R0, R1, ARGB, R2, TEMP, R3 |
| |
| %if gprsize == 8 |
| %define WORD_SIZE QWORD |
| %else |
| %define WORD_SIZE DWORD |
| %endif |
| |
| ; Define register aliases. |
| %define Xq R1q ; Current X position |
| %define COMPLq R2q ; Component A value |
| %define COMPLd R2d ; Component A value |
| %define U_ARG_REGq R0q ; U plane address argument |
| %define V_ARG_REGq R1q ; V plane address argument |
| %define SOURCE_DX_ARG_REGq TEMPq ; Source dx argument |
| %define WIDTH_ARG_REGq R2q ; Width argument |
| |
| %define COMPRq R0q ; Component B value |
| %define COMPRd R0d ; Component B value |
| %define Uq R0q ; U plane address |
| %define Vq R0q ; V plane address |
| %define U_PLANE WORD_SIZE [rsp + 3 * gprsize] |
| %define TABLE R3q ; Address of the table |
| |
| ; Defines for stack variables. |
| %define V_PLANE WORD_SIZE [rsp + 2 * gprsize] |
| %define SOURCE_DX WORD_SIZE [rsp + gprsize] |
| %define SOURCE_WIDTH WORD_SIZE [rsp] |
| |
| ; Define stack usage. |
| PUSH U_ARG_REGq |
| PUSH V_ARG_REGq |
| PUSH SOURCE_DX_ARG_REGq |
| imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx |
| PUSH WIDTH_ARG_REGq |
| |
| %macro EPILOGUE 0 |
| ADD rsp, 4 * gprsize |
| %endmacro |
| |
| xor Xq, Xq ; x = 0 |
| cmp SOURCE_DX_ARG_REGq, 0x20000 |
| jl .lscaleend |
| mov Xq, 0x8000 ; x = 0.5 for 1/2 or less |
| jmp .lscaleend |
| |
| .lscaleloop: |
| mov Uq, U_PLANE |
| |
| ; Define macros for scaling YUV components since they are reused. |
| %macro SCALEUV 1 |
| mov TEMPq, Xq |
| sar TEMPq, 0x11 |
| movzx COMPLd, BYTE [%1 + TEMPq] |
| movzx COMPRd, BYTE [%1 + TEMPq + 1] |
| mov TEMPq, Xq |
| and TEMPq, 0x1fffe |
| imul COMPRq, TEMPq |
| xor TEMPq, 0x1fffe |
| imul COMPLq, TEMPq |
| add COMPLq, COMPRq |
| shr COMPLq, 17 |
| %endmacro |
| SCALEUV Uq ; Use the above macro to scale U |
| movq mm0, [TABLE + 2048 + 8 * COMPLq] |
| |
| mov Vq, V_PLANE ; Read V address from stack |
| SCALEUV Vq ; Use the above macro to scale V |
| paddsw mm0, [TABLE + 4096 + 8 * COMPLq] |
| |
| %macro SCALEY 0 |
| mov TEMPq, Xq |
| sar TEMPq, 0x10 |
| movzx COMPLd, BYTE [Yq + TEMPq] |
| movzx COMPRd, BYTE [Yq + TEMPq + 1] |
| mov TEMPq, Xq |
| add Xq, SOURCE_DX ; Add source_dx from stack |
| and TEMPq, 0xffff |
| imul COMPRq, TEMPq |
| xor TEMPq, 0xffff |
| imul COMPLq, TEMPq |
| add COMPLq, COMPRq |
| shr COMPLq, 16 |
| %endmacro |
| SCALEY ; Use the above macro to scale Y1 |
| movq mm1, [TABLE + 8 * COMPLq] |
| |
| cmp Xq, SOURCE_WIDTH ; Compare source_width from stack |
| jge .lscalelastpixel |
| |
| SCALEY ; Use the above macro to sacle Y2 |
| movq mm2, [TABLE + 8 * COMPLq] |
| |
| paddsw mm1, mm0 |
| paddsw mm2, mm0 |
| psraw mm1, 0x6 |
| psraw mm2, 0x6 |
| packuswb mm1, mm2 |
| MOVQ [ARGBq], mm1 |
| add ARGBq, 0x8 |
| |
| .lscaleend: |
| cmp Xq, SOURCE_WIDTH ; Compare source_width from stack |
| jl .lscaleloop |
| EPILOGUE |
| RET |
| |
| .lscalelastpixel: |
| paddsw mm1, mm0 |
| psraw mm1, 6 |
| packuswb mm1, mm1 |
| movd [ARGBq], mm1 |
| EPILOGUE |
| RET |