| /* |
| * Copyright © 2021, VideoLAN and dav1d authors |
| * Copyright © 2021, Martin Storsjo |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, this |
| * list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "src/arm/asm.S" |
| #include "util.S" |
| |
| // void dav1d_splat_mv_neon(refmvs_block **rr, const refmvs_block *rmv, |
| // int bx4, int bw4, int bh4) |
| |
| function splat_mv_neon, export=1 |
| ld1 {v3.16b}, [x1] |
| clz w3, w3 |
| adr x5, L(splat_tbl) |
| sub w3, w3, #26 |
| ext v2.16b, v3.16b, v3.16b, #12 |
| ldrh w3, [x5, w3, uxtw #1] |
| add w2, w2, w2, lsl #1 |
| ext v0.16b, v2.16b, v3.16b, #4 |
| sub x3, x5, w3, uxtw |
| ext v1.16b, v2.16b, v3.16b, #8 |
| lsl w2, w2, #2 |
| ext v2.16b, v2.16b, v3.16b, #12 |
| 1: |
| ldr x1, [x0], #8 |
| subs w4, w4, #1 |
| add x1, x1, x2 |
| br x3 |
| |
| 10: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.8b}, [x1] |
| str s2, [x1, #8] |
| b.gt 1b |
| ret |
| 20: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.16b}, [x1] |
| str d1, [x1, #16] |
| b.gt 1b |
| ret |
| 320: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| 160: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| 80: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.16b, v1.16b, v2.16b}, [x1], #48 |
| 40: |
| AARCH64_VALID_JUMP_TARGET |
| st1 {v0.16b, v1.16b, v2.16b}, [x1] |
| b.gt 1b |
| ret |
| |
| L(splat_tbl): |
| .hword L(splat_tbl) - 320b |
| .hword L(splat_tbl) - 160b |
| .hword L(splat_tbl) - 80b |
| .hword L(splat_tbl) - 40b |
| .hword L(splat_tbl) - 20b |
| .hword L(splat_tbl) - 10b |
| endfunc |