| /* |
| * Copyright © 2018, VideoLAN and dav1d authors |
| * Copyright © 2018, Two Orioles, LLC |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, this |
| * list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| |
| #include <stdint.h> |
| #include <string.h> |
| |
| #include "common/intops.h" |
| |
| #include "src/wedge.h" |
| |
| enum WedgeDirectionType { |
| WEDGE_HORIZONTAL = 0, |
| WEDGE_VERTICAL = 1, |
| WEDGE_OBLIQUE27 = 2, |
| WEDGE_OBLIQUE63 = 3, |
| WEDGE_OBLIQUE117 = 4, |
| WEDGE_OBLIQUE153 = 5, |
| N_WEDGE_DIRECTIONS |
| }; |
| |
| typedef struct { |
| uint8_t /* enum WedgeDirectionType */ direction; |
| uint8_t x_offset; |
| uint8_t y_offset; |
| } wedge_code_type; |
| |
| static const wedge_code_type wedge_codebook_16_hgtw[16] = { |
| { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
| { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
| { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, |
| { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, |
| { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
| { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
| { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
| { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
| }; |
| |
| static const wedge_code_type wedge_codebook_16_hltw[16] = { |
| { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
| { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
| { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, |
| { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, |
| { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
| { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
| { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
| { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
| }; |
| |
| static const wedge_code_type wedge_codebook_16_heqw[16] = { |
| { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
| { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
| { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, |
| { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, |
| { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
| { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
| { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
| { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
| }; |
| |
| static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 64); |
| |
| static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 64); |
| static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_422_4x8 [2 * 16 * 4 * 8], 32); |
| |
| static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 64); |
| static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 64); |
| static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 64); |
| static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 64); |
| static uint8_t ALIGN(wedge_masks_420_4x8 [2 * 16 * 4 * 8], 32); |
| static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 16); |
| |
| const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16]; |
| |
| static void insert_border(uint8_t *const dst, const uint8_t *const src, |
| const int ctr) |
| { |
| if (ctr > 4) memset(dst, 0, ctr - 4); |
| memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8)); |
| if (ctr < 64 - 4) |
| memset(dst + ctr + 4, 64, 64 - 4 - ctr); |
| } |
| |
| static void transpose(uint8_t *const dst, const uint8_t *const src) { |
| for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) |
| for (int x = 0, x_off = 0; x < 64; x++, x_off += 64) |
| dst[x_off + y] = src[y_off + x]; |
| } |
| |
| static void hflip(uint8_t *const dst, const uint8_t *const src) { |
| for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) |
| for (int x = 0; x < 64; x++) |
| dst[y_off + 64 - 1 - x] = src[y_off + x]; |
| } |
| |
| static void invert(uint8_t *const dst, const uint8_t *const src, |
| const int w, const int h) |
| { |
| for (int y = 0, y_off = 0; y < h; y++, y_off += w) |
| for (int x = 0; x < w; x++) |
| dst[y_off + x] = 64 - src[y_off + x]; |
| } |
| |
| static void copy2d(uint8_t *dst, const uint8_t *src, |
| const int w, const int h, const int x_off, const int y_off) |
| { |
| src += y_off * 64 + x_off; |
| for (int y = 0; y < h; y++) { |
| memcpy(dst, src, w); |
| src += 64; |
| dst += w; |
| } |
| } |
| |
| static COLD void init_chroma(uint8_t *chroma, const uint8_t *luma, |
| const int sign, const int w, const int h, |
| const int ss_ver) |
| { |
| for (int y = 0; y < h; y += 1 + ss_ver) { |
| for (int x = 0; x < w; x += 2) { |
| int sum = luma[x] + luma[x + 1] + 1; |
| if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1; |
| chroma[x >> 1] = (sum - sign) >> (1 + ss_ver); |
| } |
| luma += w << ss_ver; |
| chroma += w >> 1; |
| } |
| } |
| |
| static COLD void fill2d_16x2(uint8_t *dst, const int w, const int h, |
| const enum BlockSize bs, |
| const uint8_t (*const master)[64 * 64], |
| const wedge_code_type *const cb, |
| uint8_t *masks_444, uint8_t *masks_422, |
| uint8_t *masks_420, const unsigned signs) |
| { |
| uint8_t *ptr = dst; |
| for (int n = 0; n < 16; n++) { |
| copy2d(ptr, master[cb[n].direction], w, h, |
| 32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3)); |
| ptr += w * h; |
| } |
| for (int n = 0, off = 0; n < 16; n++, off += w * h) |
| invert(ptr + off, dst + off, w, h); |
| |
| const int n_stride_444 = (w * h); |
| const int n_stride_422 = n_stride_444 >> 1; |
| const int n_stride_420 = n_stride_444 >> 2; |
| const int sign_stride_444 = 16 * n_stride_444; |
| const int sign_stride_422 = 16 * n_stride_422; |
| const int sign_stride_420 = 16 * n_stride_420; |
| // assign pointers in externally visible array |
| for (int n = 0; n < 16; n++) { |
| const int sign = (signs >> n) & 1; |
| dav1d_wedge_masks[bs][0][0][n] = &masks_444[ sign * sign_stride_444]; |
| // not using !sign is intentional here, since 444 does not require |
| // any rounding since no chroma subsampling is applied. |
| dav1d_wedge_masks[bs][0][1][n] = &masks_444[ sign * sign_stride_444]; |
| dav1d_wedge_masks[bs][1][0][n] = &masks_422[ sign * sign_stride_422]; |
| dav1d_wedge_masks[bs][1][1][n] = &masks_422[!sign * sign_stride_422]; |
| dav1d_wedge_masks[bs][2][0][n] = &masks_420[ sign * sign_stride_420]; |
| dav1d_wedge_masks[bs][2][1][n] = &masks_420[!sign * sign_stride_420]; |
| masks_444 += n_stride_444; |
| masks_422 += n_stride_422; |
| masks_420 += n_stride_420; |
| |
| // since the pointers come from inside, we know that |
| // violation of the const is OK here. Any other approach |
| // means we would have to duplicate the sign correction |
| // logic in two places, which isn't very nice, or mark |
| // the table faced externally as non-const, which also sucks |
| init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][0][n], |
| dav1d_wedge_masks[bs][0][0][n], 0, w, h, 0); |
| init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][1][n], |
| dav1d_wedge_masks[bs][0][0][n], 1, w, h, 0); |
| init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][0][n], |
| dav1d_wedge_masks[bs][0][0][n], 0, w, h, 1); |
| init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][1][n], |
| dav1d_wedge_masks[bs][0][0][n], 1, w, h, 1); |
| } |
| } |
| |
| COLD void dav1d_init_wedge_masks(void) { |
| // This function is guaranteed to be called only once |
| |
| enum WedgeMasterLineType { |
| WEDGE_MASTER_LINE_ODD, |
| WEDGE_MASTER_LINE_EVEN, |
| WEDGE_MASTER_LINE_VERT, |
| N_WEDGE_MASTER_LINES, |
| }; |
| static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = { |
| [WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 }, |
| [WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 }, |
| [WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 }, |
| }; |
| uint8_t master[6][64 * 64]; |
| |
| // create master templates |
| for (int y = 0, off = 0; y < 64; y++, off += 64) |
| insert_border(&master[WEDGE_VERTICAL][off], |
| wedge_master_border[WEDGE_MASTER_LINE_VERT], 32); |
| for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--) |
| { |
| insert_border(&master[WEDGE_OBLIQUE63][off], |
| wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr); |
| insert_border(&master[WEDGE_OBLIQUE63][off + 64], |
| wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1); |
| } |
| |
| transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]); |
| transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]); |
| hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]); |
| hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]); |
| |
| #define fill(w, h, sz_422, sz_420, hvsw, signs) \ |
| fill2d_16x2((uint8_t *) wedge_masks_444_##w##x##h, w, h, BS_##w##x##h, \ |
| master, wedge_codebook_16_##hvsw, wedge_masks_444_##w##x##h, \ |
| wedge_masks_422_##sz_422, wedge_masks_420_##sz_420, signs) |
| |
| fill(32, 32, 16x32, 16x16, heqw, 0x7bfb); |
| fill(32, 16, 16x16, 16x8, hltw, 0x7beb); |
| fill(32, 8, 16x8, 16x4, hltw, 0x6beb); |
| fill(16, 32, 8x32, 8x16, hgtw, 0x7beb); |
| fill(16, 16, 8x16, 8x8, heqw, 0x7bfb); |
| fill(16, 8, 8x8, 8x4, hltw, 0x7beb); |
| fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb); |
| fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb); |
| fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb); |
| #undef fill |
| } |
| |
| #define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1) |
| static uint8_t ALIGN(ii_dc_mask[32 * 32], 64); |
| static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64); |
| static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64); |
| static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64); |
| static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64); |
| static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64); |
| static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 64); |
| static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64); |
| static uint8_t ALIGN(ii_nondc_mask_4x8 [N_II_PRED_MODES][ 4 * 8], 32); |
| static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 16); |
| #undef N_II_PRED_MODES |
| |
| #define set1(sz) \ |
| [II_DC_PRED] = ii_dc_mask, \ |
| [II_VERT_PRED] = ii_nondc_mask_##sz[II_VERT_PRED - 1], \ |
| [II_HOR_PRED] = ii_nondc_mask_##sz[II_HOR_PRED - 1], \ |
| [II_SMOOTH_PRED] = ii_nondc_mask_##sz[II_SMOOTH_PRED - 1] |
| #define set(sz_444, sz_422, sz_420) \ |
| { { set1(sz_444) }, { set1(sz_422) }, { set1(sz_420) } } |
| const uint8_t *dav1d_ii_masks[N_BS_SIZES][3][N_INTER_INTRA_PRED_MODES] = { |
| [BS_8x8] = set( 8x8, 4x8, 4x4), |
| [BS_8x16] = set( 8x16, 4x16, 4x8), |
| [BS_16x8] = set(16x16, 8x8, 8x8), |
| [BS_16x16] = set(16x16, 8x16, 8x8), |
| [BS_16x32] = set(16x32, 8x32, 8x16), |
| [BS_32x16] = set(32x32, 16x16, 16x16), |
| [BS_32x32] = set(32x32, 16x32, 16x16), |
| }; |
| #undef set |
| #undef set1 |
| |
| static COLD void build_nondc_ii_masks(uint8_t *const mask_v, |
| uint8_t *const mask_h, |
| uint8_t *const mask_sm, |
| const int w, const int h, const int step) |
| { |
| static const uint8_t ii_weights_1d[] = { |
| 60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7, |
| 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, |
| }; |
| |
| for (int y = 0, off = 0; y < h; y++, off += w) { |
| memset(&mask_v[off], ii_weights_1d[y * step], w); |
| for (int x = 0; x < w; x++) { |
| mask_sm[off + x] = ii_weights_1d[imin(x, y) * step]; |
| mask_h[off + x] = ii_weights_1d[x * step]; |
| } |
| } |
| } |
| |
| COLD void dav1d_init_interintra_masks(void) { |
| // This function is guaranteed to be called only once |
| |
| memset(ii_dc_mask, 32, 32 * 32); |
| #define set(a) a[II_VERT_PRED - 1], a[II_HOR_PRED - 1], a[II_SMOOTH_PRED - 1] |
| build_nondc_ii_masks(set(ii_nondc_mask_32x32), 32, 32, 1); |
| build_nondc_ii_masks(set(ii_nondc_mask_16x32), 16, 32, 1); |
| build_nondc_ii_masks(set(ii_nondc_mask_16x16), 16, 16, 2); |
| build_nondc_ii_masks(set(ii_nondc_mask_8x32), 8, 32, 1); |
| build_nondc_ii_masks(set(ii_nondc_mask_8x16), 8, 16, 2); |
| build_nondc_ii_masks(set(ii_nondc_mask_8x8), 8, 8, 4); |
| build_nondc_ii_masks(set(ii_nondc_mask_4x16), 4, 16, 2); |
| build_nondc_ii_masks(set(ii_nondc_mask_4x8), 4, 8, 4); |
| build_nondc_ii_masks(set(ii_nondc_mask_4x4), 4, 4, 8); |
| #undef set |
| } |