| ; |
| ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| ; |
| ; Use of this source code is governed by a BSD-style license |
| ; that can be found in the LICENSE file in the root of the source |
| ; tree. An additional intellectual property rights grant can be found |
| ; in the file PATENTS. All contributing project authors may |
| ; be found in the AUTHORS file in the root of the source tree. |
| ; |
| |
| |
| EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6| |
| EXPORT |vp8_loop_filter_simple_vertical_edge_armv6| |
| |
| AREA |.text|, CODE, READONLY ; name this block of code |
| |
| MACRO |
| TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3 |
| ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3 |
| ; a0: 03 02 01 00 |
| ; a1: 13 12 11 10 |
| ; a2: 23 22 21 20 |
| ; a3: 33 32 31 30 |
| ; b3 b2 b1 b0 |
| |
| uxtb16 $b1, $a1 ; xx 12 xx 10 |
| uxtb16 $b0, $a0 ; xx 02 xx 00 |
| uxtb16 $b3, $a3 ; xx 32 xx 30 |
| uxtb16 $b2, $a2 ; xx 22 xx 20 |
| orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00 |
| orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20 |
| |
| uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11 |
| uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31 |
| uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01 |
| uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21 |
| orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01 |
| orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21 |
| |
| pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1 |
| pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3 |
| |
| pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0 |
| pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2 |
| MEND |
| |
| |
| |
| src RN r0 |
| pstep RN r1 |
| |
| ;r0 unsigned char *src_ptr, |
| ;r1 int src_pixel_step, |
| ;r2 const char *blimit |
| |
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- |
| |vp8_loop_filter_simple_horizontal_edge_armv6| PROC |
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- |
| stmdb sp!, {r4 - r11, lr} |
| |
| ldrb r12, [r2] ; blimit |
| ldr r3, [src, -pstep, lsl #1] ; p1 |
| ldr r4, [src, -pstep] ; p0 |
| ldr r5, [src] ; q0 |
| ldr r6, [src, pstep] ; q1 |
| orr r12, r12, r12, lsl #8 ; blimit |
| ldr r2, c0x80808080 |
| orr r12, r12, r12, lsl #16 ; blimit |
| mov r9, #4 ; double the count. we're doing 4 at a time |
| mov lr, #0 ; need 0 in a couple places |
| |
| |simple_hnext8| |
| ; vp8_simple_filter_mask() |
| |
| uqsub8 r7, r3, r6 ; p1 - q1 |
| uqsub8 r8, r6, r3 ; q1 - p1 |
| uqsub8 r10, r4, r5 ; p0 - q0 |
| uqsub8 r11, r5, r4 ; q0 - p0 |
| orr r8, r8, r7 ; abs(p1 - q1) |
| orr r10, r10, r11 ; abs(p0 - q0) |
| uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2 |
| uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1 |
| uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 |
| mvn r8, #0 |
| usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags |
| sel r10, r8, lr ; filter mask: F or 0 |
| cmp r10, #0 |
| beq simple_hskip_filter ; skip filtering if all masks are 0x00 |
| |
| ;vp8_simple_filter() |
| |
| eor r3, r3, r2 ; p1 offset to convert to a signed value |
| eor r6, r6, r2 ; q1 offset to convert to a signed value |
| eor r4, r4, r2 ; p0 offset to convert to a signed value |
| eor r5, r5, r2 ; q0 offset to convert to a signed value |
| |
| qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 |
| qsub8 r6, r5, r4 ; q0 - p0 |
| qadd8 r3, r3, r6 ; += q0 - p0 |
| ldr r7, c0x04040404 |
| qadd8 r3, r3, r6 ; += q0 - p0 |
| ldr r8, c0x03030303 |
| qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) |
| ;STALL |
| and r3, r3, r10 ; vp8_filter &= mask |
| |
| qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4 |
| qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3 |
| |
| shadd8 r7 , r7 , lr |
| shadd8 r8 , r8 , lr |
| shadd8 r7 , r7 , lr |
| shadd8 r8 , r8 , lr |
| shadd8 r7 , r7 , lr ; Filter1 >>= 3 |
| shadd8 r8 , r8 , lr ; Filter2 >>= 3 |
| |
| qsub8 r5 ,r5, r7 ; u = q0 - Filter1 |
| qadd8 r4, r4, r8 ; u = p0 + Filter2 |
| eor r5, r5, r2 ; *oq0 = u^0x80 |
| str r5, [src] ; store oq0 result |
| eor r4, r4, r2 ; *op0 = u^0x80 |
| str r4, [src, -pstep] ; store op0 result |
| |
| |simple_hskip_filter| |
| subs r9, r9, #1 |
| addne src, src, #4 ; next row |
| |
| ldrne r3, [src, -pstep, lsl #1] ; p1 |
| ldrne r4, [src, -pstep] ; p0 |
| ldrne r5, [src] ; q0 |
| ldrne r6, [src, pstep] ; q1 |
| |
| bne simple_hnext8 |
| |
| ldmia sp!, {r4 - r11, pc} |
| ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6| |
| |
| |
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- |
| |vp8_loop_filter_simple_vertical_edge_armv6| PROC |
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- |
| stmdb sp!, {r4 - r11, lr} |
| |
| ldrb r12, [r2] ; r12: blimit |
| ldr r2, c0x80808080 |
| orr r12, r12, r12, lsl #8 |
| |
| ; load soure data to r7, r8, r9, r10 |
| ldrh r3, [src, #-2] |
| pld [src, #23] ; preload for next block |
| ldrh r4, [src], pstep |
| orr r12, r12, r12, lsl #16 |
| |
| ldrh r5, [src, #-2] |
| pld [src, #23] |
| ldrh r6, [src], pstep |
| |
| pkhbt r7, r3, r4, lsl #16 |
| |
| ldrh r3, [src, #-2] |
| pld [src, #23] |
| ldrh r4, [src], pstep |
| |
| pkhbt r8, r5, r6, lsl #16 |
| |
| ldrh r5, [src, #-2] |
| pld [src, #23] |
| ldrh r6, [src], pstep |
| mov r11, #4 ; double the count. we're doing 4 at a time |
| |
| |simple_vnext8| |
| ; vp8_simple_filter_mask() function |
| pkhbt r9, r3, r4, lsl #16 |
| pkhbt r10, r5, r6, lsl #16 |
| |
| ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 |
| TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 |
| |
| uqsub8 r7, r3, r6 ; p1 - q1 |
| uqsub8 r8, r6, r3 ; q1 - p1 |
| uqsub8 r9, r4, r5 ; p0 - q0 |
| uqsub8 r10, r5, r4 ; q0 - p0 |
| orr r7, r7, r8 ; abs(p1 - q1) |
| orr r9, r9, r10 ; abs(p0 - q0) |
| mov r8, #0 |
| uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 |
| uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2 |
| uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 |
| mvn r10, #0 ; r10 == -1 |
| |
| usub8 r7, r12, r7 ; compare to flimit |
| sel lr, r10, r8 ; filter mask |
| |
| cmp lr, #0 |
| beq simple_vskip_filter ; skip filtering |
| |
| ;vp8_simple_filter() function |
| eor r3, r3, r2 ; p1 offset to convert to a signed value |
| eor r6, r6, r2 ; q1 offset to convert to a signed value |
| eor r4, r4, r2 ; p0 offset to convert to a signed value |
| eor r5, r5, r2 ; q0 offset to convert to a signed value |
| |
| qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 |
| qsub8 r6, r5, r4 ; q0 - p0 |
| |
| qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 |
| ldr r9, c0x03030303 ; r9 = 3 |
| |
| qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 |
| ldr r7, c0x04040404 |
| |
| qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) |
| ;STALL |
| and r3, r3, lr ; vp8_filter &= mask |
| |
| qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3 |
| qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4 |
| |
| shadd8 r9 , r9 , r8 |
| shadd8 r3 , r3 , r8 |
| shadd8 r9 , r9 , r8 |
| shadd8 r3 , r3 , r8 |
| shadd8 r9 , r9 , r8 ; Filter2 >>= 3 |
| shadd8 r3 , r3 , r8 ; Filter1 >>= 3 |
| |
| ;calculate output |
| sub src, src, pstep, lsl #2 |
| |
| qadd8 r4, r4, r9 ; u = p0 + Filter2 |
| qsub8 r5, r5, r3 ; u = q0 - Filter1 |
| eor r4, r4, r2 ; *op0 = u^0x80 |
| eor r5, r5, r2 ; *oq0 = u^0x80 |
| |
| strb r4, [src, #-1] ; store the result |
| mov r4, r4, lsr #8 |
| strb r5, [src], pstep |
| mov r5, r5, lsr #8 |
| |
| strb r4, [src, #-1] |
| mov r4, r4, lsr #8 |
| strb r5, [src], pstep |
| mov r5, r5, lsr #8 |
| |
| strb r4, [src, #-1] |
| mov r4, r4, lsr #8 |
| strb r5, [src], pstep |
| mov r5, r5, lsr #8 |
| |
| strb r4, [src, #-1] |
| strb r5, [src], pstep |
| |
| |simple_vskip_filter| |
| subs r11, r11, #1 |
| |
| ; load soure data to r7, r8, r9, r10 |
| ldrneh r3, [src, #-2] |
| pld [src, #23] ; preload for next block |
| ldrneh r4, [src], pstep |
| |
| ldrneh r5, [src, #-2] |
| pld [src, #23] |
| ldrneh r6, [src], pstep |
| |
| pkhbt r7, r3, r4, lsl #16 |
| |
| ldrneh r3, [src, #-2] |
| pld [src, #23] |
| ldrneh r4, [src], pstep |
| |
| pkhbt r8, r5, r6, lsl #16 |
| |
| ldrneh r5, [src, #-2] |
| pld [src, #23] |
| ldrneh r6, [src], pstep |
| |
| bne simple_vnext8 |
| |
| ldmia sp!, {r4 - r11, pc} |
| ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6| |
| |
| ; Constant Pool |
| c0x80808080 DCD 0x80808080 |
| c0x03030303 DCD 0x03030303 |
| c0x04040404 DCD 0x04040404 |
| |
| END |