| ; Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| ; Use of this source code is governed by a BSD-style license that can be |
| ; found in the LICENSE file. |
| |
| %include "media/base/simd/media_export.asm" |
| |
| EXPORT SYMBOL |
| align function_align |
| |
| mangle(SYMBOL): |
| %assign stack_offset 0 |
| PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE |
| PUSH WIDTHq |
| DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP |
| mov TABLEq, TEMPq |
| jmp .convertend |
| |
| .convertloop: |
| movzx TEMPd, BYTE [Uq] |
| movq mm0, [TABLEq + 2048 + 8 * TEMPq] |
| add Uq, 1 |
| |
| movzx TEMPd, BYTE [Vq] |
| paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
| add Vq, 1 |
| |
| movzx TEMPd, BYTE [Yq] |
| movq mm1, [TABLEq + 8 * TEMPq] |
| |
| movzx TEMPd, BYTE [Yq + 1] |
| movq mm2, [TABLEq + 8 * TEMPq] |
| add Yq, 2 |
| |
| ; Add UV components to Y component. |
| paddsw mm1, mm0 |
| paddsw mm2, mm0 |
| |
| ; Down shift and then pack. |
| psraw mm1, 6 |
| psraw mm2, 6 |
| packuswb mm1, mm2 |
| |
| ; Unpack |
| movq mm0, mm1 |
| pxor mm2, mm2 |
| punpcklbw mm0, mm2 |
| punpckhbw mm1, mm2 |
| |
| ; Add one to our alpha values, this is a somewhat unfortunate hack; while |
| ; the pack/unpack above handle saturating any negative numbers to 0, they also |
| ; truncate the alpha value to 255. The math ahead wants to produce the same |
| ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha |
| ; value in |mm0| and |mm1| being 256, (let A be the source image alpha, |
| ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0). |
| mov TEMPq, 0x00010000 |
| movd mm2, TEMPd |
| psllq mm2, 32 |
| paddsw mm0, mm2 |
| paddsw mm1, mm2 |
| |
| ; Multiply by alpha value, then repack high bytes of words. |
| movzx TEMPd, BYTE [Aq] |
| movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
| pmullw mm0, mm2 |
| psrlw mm0, 8 |
| movzx TEMPd, BYTE [Aq + 1] |
| movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
| add Aq, 2 |
| pmullw mm1, mm2 |
| psrlw mm1, 8 |
| packuswb mm0, mm1 |
| |
| MOVQ [ARGBq], mm0 |
| add ARGBq, 8 |
| |
| .convertend: |
| sub dword [rsp], 2 |
| jns .convertloop |
| |
| ; If number of pixels is odd then compute it. |
| and dword [rsp], 1 |
| jz .convertdone |
| |
| movzx TEMPd, BYTE [Uq] |
| movq mm0, [TABLEq + 2048 + 8 * TEMPq] |
| movzx TEMPd, BYTE [Vq] |
| paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
| movzx TEMPd, BYTE [Yq] |
| movq mm1, [TABLEq + 8 * TEMPq] |
| paddsw mm1, mm0 |
| psraw mm1, 6 |
| packuswb mm1, mm1 |
| |
| ; Multiply ARGB by alpha value. |
| pxor mm0, mm0 |
| punpcklbw mm1, mm0 |
| |
| ; See above note about this hack. |
| mov TEMPq, 0x00010000 |
| movd mm0, TEMPd |
| psllq mm0, 32 |
| paddsw mm1, mm0 |
| |
| movzx TEMPd, BYTE [Aq] |
| movq mm0, [TABLEq + 6144 + 8 * TEMPq] |
| pmullw mm1, mm0 |
| psrlw mm1, 8 |
| packuswb mm1, mm1 |
| |
| movd [ARGBq], mm1 |
| |
| .convertdone: |
| POP TABLEq |
| RET |