| /* |
| * Copyright © 2018, VideoLAN and dav1d authors |
| * Copyright © 2020, Martin Storsjo |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, this |
| * list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "src/arm/asm.S" |
| #include "util.S" |
| #include "cdef_tmpl.S" |
| |
| // r1 = d0/q0 |
| // r2 = d2/q1 |
| .macro pad_top_bot_16 s1, s2, w, stride, r1, r2, align, ret |
| tst r7, #1 // CDEF_HAVE_LEFT |
| beq 2f |
| // CDEF_HAVE_LEFT |
| tst r7, #2 // CDEF_HAVE_RIGHT |
| beq 1f |
| // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT |
| vldr s8, [\s1, #-4] |
| vld1.16 {\r1}, [\s1, :\align] |
| vldr s9, [\s1, #2*\w] |
| vldr s10, [\s2, #-4] |
| vld1.16 {\r2}, [\s2, :\align] |
| vldr s11, [\s2, #2*\w] |
| vstr s8, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s9, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| vstr s10, [r0, #-4] |
| vst1.16 {\r2}, [r0, :\align] |
| vstr s11, [r0, #2*\w] |
| .if \ret |
| pop {r4-r8,pc} |
| .else |
| add r0, r0, #2*\stride |
| b 3f |
| .endif |
| |
| 1: |
| // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT |
| vldr s8, [\s1, #-4] |
| vld1.16 {\r1}, [\s1, :\align] |
| vldr s9, [\s2, #-4] |
| vld1.16 {\r2}, [\s2, :\align] |
| vstr s8, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| vstr s9, [r0, #-4] |
| vst1.16 {\r2}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| .if \ret |
| pop {r4-r8,pc} |
| .else |
| add r0, r0, #2*\stride |
| b 3f |
| .endif |
| |
| 2: |
| // !CDEF_HAVE_LEFT |
| tst r7, #2 // CDEF_HAVE_RIGHT |
| beq 1f |
| // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT |
| vld1.16 {\r1}, [\s1, :\align] |
| vldr s8, [\s1, #2*\w] |
| vld1.16 {\r2}, [\s2, :\align] |
| vldr s9, [\s2, #2*\w] |
| vstr s12, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s8, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| vstr s12, [r0, #-4] |
| vst1.16 {\r2}, [r0, :\align] |
| vstr s9, [r0, #2*\w] |
| .if \ret |
| pop {r4-r8,pc} |
| .else |
| add r0, r0, #2*\stride |
| b 3f |
| .endif |
| |
| 1: |
| // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT |
| vld1.16 {\r1}, [\s1, :\align] |
| vld1.16 {\r2}, [\s2, :\align] |
| vstr s12, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| vstr s12, [r0, #-4] |
| vst1.16 {\r2}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| .if \ret |
| pop {r4-r8,pc} |
| .else |
| add r0, r0, #2*\stride |
| .endif |
| 3: |
| .endm |
| |
| // void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src, |
| // ptrdiff_t src_stride, const pixel (*left)[2], |
| // const pixel *const top, |
| // const pixel *const bottom, int h, |
| // enum CdefEdgeFlags edges); |
| |
| // r1 = d0/q0 |
| // r2 = d2/q1 |
| .macro padding_func_16 w, stride, r1, r2, align |
| function cdef_padding\w\()_16bpc_neon, export=1 |
| push {r4-r8,lr} |
| ldrd r4, r5, [sp, #24] |
| ldrd r6, r7, [sp, #32] |
| vmov.i16 q3, #0x8000 |
| tst r7, #4 // CDEF_HAVE_TOP |
| bne 1f |
| // !CDEF_HAVE_TOP |
| sub r12, r0, #2*(2*\stride+2) |
| vmov.i16 q2, #0x8000 |
| vst1.16 {q2,q3}, [r12]! |
| .if \w == 8 |
| vst1.16 {q2,q3}, [r12]! |
| .endif |
| b 3f |
| 1: |
| // CDEF_HAVE_TOP |
| add r8, r4, r2 |
| sub r0, r0, #2*(2*\stride) |
| pad_top_bot_16 r4, r8, \w, \stride, \r1, \r2, \align, 0 |
| |
| // Middle section |
| 3: |
| tst r7, #1 // CDEF_HAVE_LEFT |
| beq 2f |
| // CDEF_HAVE_LEFT |
| tst r7, #2 // CDEF_HAVE_RIGHT |
| beq 1f |
| // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT |
| 0: |
| vld1.32 {d2[]}, [r3, :32]! |
| vldr s5, [r1, #2*\w] |
| vld1.16 {\r1}, [r1, :\align], r2 |
| subs r6, r6, #1 |
| vstr s4, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s5, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| bgt 0b |
| b 3f |
| 1: |
| // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT |
| vld1.32 {d2[]}, [r3, :32]! |
| vld1.16 {\r1}, [r1, :\align], r2 |
| subs r6, r6, #1 |
| vstr s4, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| bgt 1b |
| b 3f |
| 2: |
| tst r7, #2 // CDEF_HAVE_RIGHT |
| beq 1f |
| // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT |
| 0: |
| vldr s4, [r1, #2*\w] |
| vld1.16 {\r1}, [r1, :\align], r2 |
| subs r6, r6, #1 |
| vstr s12, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s4, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| bgt 0b |
| b 3f |
| 1: |
| // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT |
| vld1.16 {\r1}, [r1, :\align], r2 |
| subs r6, r6, #1 |
| vstr s12, [r0, #-4] |
| vst1.16 {\r1}, [r0, :\align] |
| vstr s12, [r0, #2*\w] |
| add r0, r0, #2*\stride |
| bgt 1b |
| |
| 3: |
| tst r7, #8 // CDEF_HAVE_BOTTOM |
| bne 1f |
| // !CDEF_HAVE_BOTTOM |
| sub r12, r0, #4 |
| vmov.i16 q2, #0x8000 |
| vst1.16 {q2,q3}, [r12]! |
| .if \w == 8 |
| vst1.16 {q2,q3}, [r12]! |
| .endif |
| pop {r4-r8,pc} |
| 1: |
| // CDEF_HAVE_BOTTOM |
| add r8, r5, r2 |
| pad_top_bot_16 r5, r8, \w, \stride, \r1, \r2, \align, 1 |
| endfunc |
| .endm |
| |
| padding_func_16 8, 16, q0, q1, 128 |
| padding_func_16 4, 8, d0, d2, 64 |
| |
| tables |
| |
| filter 8, 16 |
| filter 4, 16 |
| |
| find_dir 16 |