| /* |
| * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include <float.h> |
| #include <limits.h> |
| #include <math.h> |
| #include <stdio.h> |
| |
| #include "./vp9_rtcd.h" |
| #include "./vpx_dsp_rtcd.h" |
| #include "./vpx_config.h" |
| |
| #include "vpx_dsp/vpx_dsp_common.h" |
| #include "vpx_ports/mem.h" |
| #include "vpx_ports/vpx_timer.h" |
| #include "vpx_ports/system_state.h" |
| |
| #if CONFIG_MISMATCH_DEBUG |
| #include "vpx_util/vpx_debug_util.h" |
| #endif // CONFIG_MISMATCH_DEBUG |
| |
| #include "vp9/common/vp9_common.h" |
| #include "vp9/common/vp9_entropy.h" |
| #include "vp9/common/vp9_entropymode.h" |
| #include "vp9/common/vp9_idct.h" |
| #include "vp9/common/vp9_mvref_common.h" |
| #include "vp9/common/vp9_pred_common.h" |
| #include "vp9/common/vp9_quant_common.h" |
| #include "vp9/common/vp9_reconintra.h" |
| #include "vp9/common/vp9_reconinter.h" |
| #include "vp9/common/vp9_seg_common.h" |
| #include "vp9/common/vp9_tile_common.h" |
| #if !CONFIG_REALTIME_ONLY |
| #include "vp9/encoder/vp9_aq_360.h" |
| #include "vp9/encoder/vp9_aq_complexity.h" |
| #endif |
| #include "vp9/encoder/vp9_aq_cyclicrefresh.h" |
| #if !CONFIG_REALTIME_ONLY |
| #include "vp9/encoder/vp9_aq_variance.h" |
| #endif |
| #include "vp9/encoder/vp9_encodeframe.h" |
| #include "vp9/encoder/vp9_encodemb.h" |
| #include "vp9/encoder/vp9_encodemv.h" |
| #include "vp9/encoder/vp9_ethread.h" |
| #include "vp9/encoder/vp9_extend.h" |
| #include "vp9/encoder/vp9_multi_thread.h" |
| #include "vp9/encoder/vp9_partition_models.h" |
| #include "vp9/encoder/vp9_pickmode.h" |
| #include "vp9/encoder/vp9_rd.h" |
| #include "vp9/encoder/vp9_rdopt.h" |
| #include "vp9/encoder/vp9_segmentation.h" |
| #include "vp9/encoder/vp9_tokenize.h" |
| |
| static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, |
| int output_enabled, int mi_row, int mi_col, |
| BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); |
| |
| // This is used as a reference when computing the source variance for the |
| // purpose of activity masking. |
| // Eventually this should be replaced by custom no-reference routines, |
| // which will be faster. |
| static const uint8_t VP9_VAR_OFFS[64] = { |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
| }; |
| |
| #if CONFIG_VP9_HIGHBITDEPTH |
| static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
| 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
| }; |
| |
| static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
| 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 |
| }; |
| |
| static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
| 128 * 16 |
| }; |
| #endif // CONFIG_VP9_HIGHBITDEPTH |
| |
| unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
| BLOCK_SIZE bs) { |
| unsigned int sse; |
| const unsigned int var = |
| cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); |
| return var; |
| } |
| |
| #if CONFIG_VP9_HIGHBITDEPTH |
| unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
| BLOCK_SIZE bs, int bd) { |
| unsigned int var, sse; |
| switch (bd) { |
| case 10: |
| var = |
| cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
| CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); |
| break; |
| case 12: |
| var = |
| cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
| CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); |
| break; |
| case 8: |
| default: |
| var = |
| cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
| CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); |
| break; |
| } |
| return var; |
| } |
| #endif // CONFIG_VP9_HIGHBITDEPTH |
| |
| unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, |
| const struct buf_2d *ref, |
| BLOCK_SIZE bs) { |
| return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), |
| num_pels_log2_lookup[bs]); |
| } |
| |
| #if CONFIG_VP9_HIGHBITDEPTH |
| unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, |
| const struct buf_2d *ref, |
| BLOCK_SIZE bs, int bd) { |
| return (unsigned int)ROUND64_POWER_OF_TWO( |
| (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), |
| num_pels_log2_lookup[bs]); |
| } |
| #endif // CONFIG_VP9_HIGHBITDEPTH |
| |
| #if !CONFIG_REALTIME_ONLY |
| static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, |
| const struct buf_2d *ref, |
| int mi_row, int mi_col, |
| BLOCK_SIZE bs) { |
| unsigned int sse, var; |
| uint8_t *last_y; |
| const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); |
| |
| assert(last != NULL); |
| last_y = |
| &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; |
| var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); |
| return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); |
| } |
| |
| static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, |
| int mi_row, int mi_col) { |
| unsigned int var = get_sby_perpixel_diff_variance( |
| cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); |
| if (var < 8) |
| return BLOCK_64X64; |
| else if (var < 128) |
| return BLOCK_32X32; |
| else if (var < 2048) |
| return BLOCK_16X16; |
| else |
| return BLOCK_8X8; |
| } |
| #endif // !CONFIG_REALTIME_ONLY |
| |
| static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, int segment_index) { |
| VP9_COMMON *const cm = &cpi->common; |
| const struct segmentation *const seg = &cm->seg; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| MODE_INFO *mi = xd->mi[0]; |
| |
| const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
| const uint8_t *const map = |
| seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
| |
| // Initialize the segmentation index as 0. |
| mi->segment_id = 0; |
| |
| // Skip the rest if AQ mode is disabled. |
| if (!seg->enabled) return; |
| |
| switch (aq_mode) { |
| case CYCLIC_REFRESH_AQ: |
| mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| break; |
| #if !CONFIG_REALTIME_ONLY |
| case VARIANCE_AQ: |
| if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
| cpi->force_update_segmentation || |
| (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { |
| int min_energy; |
| int max_energy; |
| // Get sub block energy range |
| if (bsize >= BLOCK_32X32) { |
| vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
| &max_energy); |
| } else { |
| min_energy = bsize <= BLOCK_16X16 ? x->mb_energy |
| : vp9_block_energy(cpi, x, bsize); |
| } |
| mi->segment_id = vp9_vaq_segment_id(min_energy); |
| } else { |
| mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| } |
| break; |
| case EQUATOR360_AQ: |
| if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) |
| mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); |
| else |
| mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| break; |
| #endif |
| case LOOKAHEAD_AQ: |
| mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| break; |
| case PSNR_AQ: mi->segment_id = segment_index; break; |
| case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break; |
| default: |
| // NO_AQ or PSNR_AQ |
| break; |
| } |
| |
| // Set segment index from ROI map if it's enabled. |
| if (cpi->roi.enabled) |
| mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| |
| vp9_init_plane_quantizers(cpi, x); |
| } |
| |
| // Lighter version of set_offsets that only sets the mode info |
| // pointers. |
| static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, |
| MACROBLOCK *const x, |
| MACROBLOCKD *const xd, int mi_row, |
| int mi_col) { |
| const int idx_str = xd->mi_stride * mi_row + mi_col; |
| xd->mi = cm->mi_grid_visible + idx_str; |
| xd->mi[0] = cm->mi + idx_str; |
| x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); |
| } |
| |
| static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
| const BLOCK_SIZE bsize, const int mi_row, |
| const int mi_col, int *const rdmult) { |
| const VP9_COMMON *const cm = &cpi->common; |
| |
| const int bsize_base = BLOCK_16X16; |
| const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base]; |
| const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base]; |
| const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; |
| const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; |
| const int num_bcols = |
| (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w; |
| const int num_brows = |
| (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h; |
| int row, col; |
| double num_of_mi = 0.0; |
| double geom_mean_of_scale = 0.0; |
| |
| assert(cpi->oxcf.tuning == VP8_TUNE_SSIM); |
| |
| for (row = mi_row / num_8x8_w; |
| row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) { |
| for (col = mi_col / num_8x8_h; |
| col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) { |
| const int index = row * num_cols + col; |
| geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]); |
| num_of_mi += 1.0; |
| } |
| } |
| geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); |
| |
| *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale); |
| *rdmult = VPXMAX(*rdmult, 0); |
| set_error_per_bit(x, *rdmult); |
| vpx_clear_system_state(); |
| } |
| |
| static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, |
| MACROBLOCK *const x, int mi_row, int mi_col, |
| BLOCK_SIZE bsize) { |
| VP9_COMMON *const cm = &cpi->common; |
| const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
| const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
| MvLimits *const mv_limits = &x->mv_limits; |
| |
| set_skip_context(xd, mi_row, mi_col); |
| |
| set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
| |
| // Set up destination pointers. |
| vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); |
| |
| // Set up limit values for MV components. |
| // Mv beyond the range do not produce new/different prediction block. |
| mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); |
| mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); |
| mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; |
| mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; |
| |
| // Set up distance of MB to edge of frame in 1/8th pel units. |
| assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); |
| set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, |
| cm->mi_cols); |
| |
| // Set up source buffers. |
| vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
| |
| // R/D setup. |
| x->rddiv = cpi->rd.RDDIV; |
| x->rdmult = cpi->rd.RDMULT; |
| if (oxcf->tuning == VP8_TUNE_SSIM) { |
| set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
| } |
| |
| // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() |
| xd->tile = *tile; |
| } |
| |
| static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, |
| int mi_row, int mi_col, |
| BLOCK_SIZE bsize) { |
| const int block_width = |
| VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); |
| const int block_height = |
| VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); |
| const int mi_stride = xd->mi_stride; |
| MODE_INFO *const src_mi = xd->mi[0]; |
| int i, j; |
| |
| for (j = 0; j < block_height; ++j) |
| for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; |
| } |
| |
| static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, |
| MACROBLOCKD *const xd, int mi_row, int mi_col, |
| BLOCK_SIZE bsize) { |
| if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { |
| set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); |
| xd->mi[0]->sb_type = bsize; |
| } |
| } |
| |
| typedef struct { |
| // This struct is used for computing variance in choose_partitioning(), where |
| // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even |
| // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 |
| // * 16 = 2^32). |
| uint32_t sum_square_error; |
| int32_t sum_error; |
| int log2_count; |
| int variance; |
| } var; |
| |
| typedef struct { |
| var none; |
| var horz[2]; |
| var vert[2]; |
| } partition_variance; |
| |
| typedef struct { |
| partition_variance part_variances; |
| var split[4]; |
| } v4x4; |
| |
| typedef struct { |
| partition_variance part_variances; |
| v4x4 split[4]; |
| } v8x8; |
| |
| typedef struct { |
| partition_variance part_variances; |
| v8x8 split[4]; |
| } v16x16; |
| |
| typedef struct { |
| partition_variance part_variances; |
| v16x16 split[4]; |
| } v32x32; |
| |
| typedef struct { |
| partition_variance part_variances; |
| v32x32 split[4]; |
| } v64x64; |
| |
| typedef struct { |
| partition_variance *part_variances; |
| var *split[4]; |
| } variance_node; |
| |
| typedef enum { |
| V16X16, |
| V32X32, |
| V64X64, |
| } TREE_LEVEL; |
| |
| static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { |
| int i; |
| node->part_variances = NULL; |
| switch (bsize) { |
| case BLOCK_64X64: { |
| v64x64 *vt = (v64x64 *)data; |
| node->part_variances = &vt->part_variances; |
| for (i = 0; i < 4; i++) |
| node->split[i] = &vt->split[i].part_variances.none; |
| break; |
| } |
| case BLOCK_32X32: { |
| v32x32 *vt = (v32x32 *)data; |
| node->part_variances = &vt->part_variances; |
| for (i = 0; i < 4; i++) |
| node->split[i] = &vt->split[i].part_variances.none; |
| break; |
| } |
| case BLOCK_16X16: { |
| v16x16 *vt = (v16x16 *)data; |
| node->part_variances = &vt->part_variances; |
| for (i = 0; i < 4; i++) |
| node->split[i] = &vt->split[i].part_variances.none; |
| break; |
| } |
| case BLOCK_8X8: { |
| v8x8 *vt = (v8x8 *)data; |
| node->part_variances = &vt->part_variances; |
| for (i = 0; i < 4; i++) |
| node->split[i] = &vt->split[i].part_variances.none; |
| break; |
| } |
| default: { |
| v4x4 *vt = (v4x4 *)data; |
| assert(bsize == BLOCK_4X4); |
| node->part_variances = &vt->part_variances; |
| for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; |
| break; |
| } |
| } |
| } |
| |
| // Set variance values given sum square error, sum error, count. |
| static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { |
| v->sum_square_error = s2; |
| v->sum_error = s; |
| v->log2_count = c; |
| } |
| |
| static void get_variance(var *v) { |
| v->variance = |
| (int)(256 * (v->sum_square_error - |
| (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> |
| v->log2_count)) >> |
| v->log2_count); |
| } |
| |
| static void sum_2_variances(const var *a, const var *b, var *r) { |
| assert(a->log2_count == b->log2_count); |
| fill_variance(a->sum_square_error + b->sum_square_error, |
| a->sum_error + b->sum_error, a->log2_count + 1, r); |
| } |
| |
| static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { |
| variance_node node; |
| memset(&node, 0, sizeof(node)); |
| tree_to_node(data, bsize, &node); |
| sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); |
| sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); |
| sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); |
| sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); |
| sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], |
| &node.part_variances->none); |
| } |
| |
| static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, |
| MACROBLOCKD *const xd, void *data, |
| BLOCK_SIZE bsize, int mi_row, int mi_col, |
| int64_t threshold, BLOCK_SIZE bsize_min, |
| int force_split) { |
| VP9_COMMON *const cm = &cpi->common; |
| variance_node vt; |
| const int block_width = num_8x8_blocks_wide_lookup[bsize]; |
| const int block_height = num_8x8_blocks_high_lookup[bsize]; |
| |
| assert(block_height == block_width); |
| tree_to_node(data, bsize, &vt); |
| |
| if (force_split == 1) return 0; |
| |
| // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if |
| // variance is below threshold, otherwise split will be selected. |
| // No check for vert/horiz split as too few samples for variance. |
| if (bsize == bsize_min) { |
| // Variance already computed to set the force_split. |
| if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
| if (mi_col + block_width / 2 < cm->mi_cols && |
| mi_row + block_height / 2 < cm->mi_rows && |
| vt.part_variances->none.variance < threshold) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
| return 1; |
| } |
| return 0; |
| } else if (bsize > bsize_min) { |
| // Variance already computed to set the force_split. |
| if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
| // For key frame: take split for bsize above 32X32 or very high variance. |
| if (frame_is_intra_only(cm) && |
| (bsize > BLOCK_32X32 || |
| vt.part_variances->none.variance > (threshold << 4))) { |
| return 0; |
| } |
| // If variance is low, take the bsize (no split). |
| if (mi_col + block_width / 2 < cm->mi_cols && |
| mi_row + block_height / 2 < cm->mi_rows && |
| vt.part_variances->none.variance < threshold) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
| return 1; |
| } |
| |
| // Check vertical split. |
| if (mi_row + block_height / 2 < cm->mi_rows) { |
| BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); |
| get_variance(&vt.part_variances->vert[0]); |
| get_variance(&vt.part_variances->vert[1]); |
| if (vt.part_variances->vert[0].variance < threshold && |
| vt.part_variances->vert[1].variance < threshold && |
| get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
| set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); |
| return 1; |
| } |
| } |
| // Check horizontal split. |
| if (mi_col + block_width / 2 < cm->mi_cols) { |
| BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); |
| get_variance(&vt.part_variances->horz[0]); |
| get_variance(&vt.part_variances->horz[1]); |
| if (vt.part_variances->horz[0].variance < threshold && |
| vt.part_variances->horz[1].variance < threshold && |
| get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
| set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); |
| return 1; |
| } |
| } |
| |
| return 0; |
| } |
| return 0; |
| } |
| |
| static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, |
| int width, int height, |
| int content_state) { |
| if (speed >= 8) { |
| if (width <= 640 && height <= 480) |
| return (5 * threshold_base) >> 2; |
| else if ((content_state == kLowSadLowSumdiff) || |
| (content_state == kHighSadLowSumdiff) || |
| (content_state == kLowVarHighSumdiff)) |
| return (5 * threshold_base) >> 2; |
| } else if (speed == 7) { |
| if ((content_state == kLowSadLowSumdiff) || |
| (content_state == kHighSadLowSumdiff) || |
| (content_state == kLowVarHighSumdiff)) { |
| return (5 * threshold_base) >> 2; |
| } |
| } |
| return threshold_base; |
| } |
| |
| // Set the variance split thresholds for following the block sizes: |
| // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, |
| // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is |
| // currently only used on key frame. |
| static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, |
| int content_state) { |
| VP9_COMMON *const cm = &cpi->common; |
| const int is_key_frame = frame_is_intra_only(cm); |
| const int threshold_multiplier = |
| is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult; |
| int64_t threshold_base = |
| (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); |
| |
| if (is_key_frame) { |
| thresholds[0] = threshold_base; |
| thresholds[1] = threshold_base >> 2; |
| thresholds[2] = threshold_base >> 2; |
| thresholds[3] = threshold_base << 2; |
| } else { |
| // Increase base variance threshold based on estimated noise level. |
| if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { |
| NOISE_LEVEL noise_level = |
| vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
| if (noise_level == kHigh) |
| threshold_base = 3 * threshold_base; |
| else if (noise_level == kMedium) |
| threshold_base = threshold_base << 1; |
| else if (noise_level < kLow) |
| threshold_base = (7 * threshold_base) >> 3; |
| } |
| #if CONFIG_VP9_TEMPORAL_DENOISING |
| if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && |
| cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) |
| threshold_base = |
| vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, |
| content_state, cpi->svc.temporal_layer_id); |
| else |
| threshold_base = |
| scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, |
| cm->height, content_state); |
| #else |
| // Increase base variance threshold based on content_state/sum_diff level. |
| threshold_base = scale_part_thresh_sumdiff( |
| threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); |
| #endif |
| thresholds[0] = threshold_base; |
| thresholds[2] = threshold_base << cpi->oxcf.speed; |
| if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) |
| thresholds[2] = thresholds[2] << 1; |
| if (cm->width <= 352 && cm->height <= 288) { |
| thresholds[0] = threshold_base >> 3; |
| thresholds[1] = threshold_base >> 1; |
| thresholds[2] = threshold_base << 3; |
| if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 220) |
| thresholds[2] = thresholds[2] << 2; |
| else if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 200) |
| thresholds[2] = thresholds[2] << 1; |
| } else if (cm->width < 1280 && cm->height < 720) { |
| thresholds[1] = (5 * threshold_base) >> 2; |
| } else if (cm->width < 1920 && cm->height < 1080) { |
| thresholds[1] = threshold_base << 1; |
| } else { |
| thresholds[1] = (5 * threshold_base) >> 1; |
| } |
| if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; |
| } |
| } |
| |
| void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, |
| int content_state) { |
| VP9_COMMON *const cm = &cpi->common; |
| SPEED_FEATURES *const sf = &cpi->sf; |
| const int is_key_frame = frame_is_intra_only(cm); |
| if (sf->partition_search_type != VAR_BASED_PARTITION && |
| sf->partition_search_type != REFERENCE_PARTITION) { |
| return; |
| } else { |
| set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); |
| // The thresholds below are not changed locally. |
| if (is_key_frame) { |
| cpi->vbp_threshold_sad = 0; |
| cpi->vbp_threshold_copy = 0; |
| cpi->vbp_bsize_min = BLOCK_8X8; |
| } else { |
| if (cm->width <= 352 && cm->height <= 288) |
| cpi->vbp_threshold_sad = 10; |
| else |
| cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 |
| ? (cpi->y_dequant[q][1] << 1) |
| : 1000; |
| cpi->vbp_bsize_min = BLOCK_16X16; |
| if (cm->width <= 352 && cm->height <= 288) |
| cpi->vbp_threshold_copy = 4000; |
| else if (cm->width <= 640 && cm->height <= 360) |
| cpi->vbp_threshold_copy = 8000; |
| else |
| cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 |
| ? (cpi->y_dequant[q][1] << 3) |
| : 8000; |
| if (cpi->rc.high_source_sad || |
| (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { |
| cpi->vbp_threshold_sad = 0; |
| cpi->vbp_threshold_copy = 0; |
| } |
| } |
| cpi->vbp_threshold_minmax = 15 + (q >> 3); |
| } |
| } |
| |
| // Compute the minmax over the 8x8 subblocks. |
| static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, |
| int dp, int x16_idx, int y16_idx, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| int highbd_flag, |
| #endif |
| int pixels_wide, int pixels_high) { |
| int k; |
| int minmax_max = 0; |
| int minmax_min = 255; |
| // Loop over the 4 8x8 subblocks. |
| for (k = 0; k < 4; k++) { |
| int x8_idx = x16_idx + ((k & 1) << 3); |
| int y8_idx = y16_idx + ((k >> 1) << 3); |
| int min = 0; |
| int max = 0; |
| if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
| vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, |
| d + y8_idx * dp + x8_idx, dp, &min, &max); |
| } else { |
| vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, |
| dp, &min, &max); |
| } |
| #else |
| vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, |
| &min, &max); |
| #endif |
| if ((max - min) > minmax_max) minmax_max = (max - min); |
| if ((max - min) < minmax_min) minmax_min = (max - min); |
| } |
| } |
| return (minmax_max - minmax_min); |
| } |
| |
| static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, |
| int dp, int x8_idx, int y8_idx, v8x8 *vst, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| int highbd_flag, |
| #endif |
| int pixels_wide, int pixels_high, |
| int is_key_frame) { |
| int k; |
| for (k = 0; k < 4; k++) { |
| int x4_idx = x8_idx + ((k & 1) << 2); |
| int y4_idx = y8_idx + ((k >> 1) << 2); |
| unsigned int sse = 0; |
| int sum = 0; |
| if (x4_idx < pixels_wide && y4_idx < pixels_high) { |
| int s_avg; |
| int d_avg = 128; |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
| s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
| if (!is_key_frame) |
| d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
| } else { |
| s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
| if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
| } |
| #else |
| s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
| if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
| #endif |
| sum = s_avg - d_avg; |
| sse = sum * sum; |
| } |
| fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
| } |
| } |
| |
| static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, |
| int dp, int x16_idx, int y16_idx, v16x16 *vst, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| int highbd_flag, |
| #endif |
| int pixels_wide, int pixels_high, |
| int is_key_frame) { |
| int k; |
| for (k = 0; k < 4; k++) { |
| int x8_idx = x16_idx + ((k & 1) << 3); |
| int y8_idx = y16_idx + ((k >> 1) << 3); |
| unsigned int sse = 0; |
| int sum = 0; |
| if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
| int s_avg; |
| int d_avg = 128; |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
| s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
| if (!is_key_frame) |
| d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
| } else { |
| s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
| if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
| } |
| #else |
| s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
| if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
| #endif |
| sum = s_avg - d_avg; |
| sse = sum * sum; |
| } |
| fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
| } |
| } |
| |
| // Check if most of the superblock is skin content, and if so, force split to |
| // 32x32, and set x->sb_is_skin for use in mode selection. |
| static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, |
| int mi_row, int mi_col, int *force_split) { |
| VP9_COMMON *const cm = &cpi->common; |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (cm->use_highbitdepth) return 0; |
| #endif |
| // Avoid checking superblocks on/near boundary and avoid low resolutions. |
| // Note superblock may still pick 64X64 if y_sad is very small |
| // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. |
| if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && |
| mi_row + 8 < cm->mi_rows)) { |
| int num_16x16_skin = 0; |
| int num_16x16_nonskin = 0; |
| uint8_t *ysignal = x->plane[0].src.buf; |
| uint8_t *usignal = x->plane[1].src.buf; |
| uint8_t *vsignal = x->plane[2].src.buf; |
| int sp = x->plane[0].src.stride; |
| int spuv = x->plane[1].src.stride; |
| const int block_index = mi_row * cm->mi_cols + mi_col; |
| const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
| const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
| const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
| const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
| // Loop through the 16x16 sub-blocks. |
| int i, j; |
| for (i = 0; i < ymis; i += 2) { |
| for (j = 0; j < xmis; j += 2) { |
| int bl_index = block_index + i * cm->mi_cols + j; |
| int is_skin = cpi->skin_map[bl_index]; |
| num_16x16_skin += is_skin; |
| num_16x16_nonskin += (1 - is_skin); |
| if (num_16x16_nonskin > 3) { |
| // Exit loop if at least 4 of the 16x16 blocks are not skin. |
| i = ymis; |
| break; |
| } |
| ysignal += 16; |
| usignal += 8; |
| vsignal += 8; |
| } |
| ysignal += (sp << 4) - 64; |
| usignal += (spuv << 3) - 32; |
| vsignal += (spuv << 3) - 32; |
| } |
| if (num_16x16_skin > 12) { |
| *force_split = 1; |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
| v64x64 *vt, int64_t thresholds[], |
| MV_REFERENCE_FRAME ref_frame_partition, |
| int mi_col, int mi_row) { |
| int i, j; |
| VP9_COMMON *const cm = &cpi->common; |
| const int mv_thr = cm->width > 640 ? 8 : 4; |
| // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and |
| // int_pro mv is small. If the temporal variance is small set the flag |
| // variance_low for the block. The variance threshold can be adjusted, the |
| // higher the more aggressive. |
| if (ref_frame_partition == LAST_FRAME && |
| (cpi->sf.short_circuit_low_temp_var == 1 || |
| (xd->mi[0]->mv[0].as_mv.col < mv_thr && |
| xd->mi[0]->mv[0].as_mv.col > -mv_thr && |
| xd->mi[0]->mv[0].as_mv.row < mv_thr && |
| xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { |
| if (xd->mi[0]->sb_type == BLOCK_64X64) { |
| if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) |
| x->variance_low[0] = 1; |
| } else if (xd->mi[0]->sb_type == BLOCK_64X32) { |
| for (i = 0; i < 2; i++) { |
| if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) |
| x->variance_low[i + 1] = 1; |
| } |
| } else if (xd->mi[0]->sb_type == BLOCK_32X64) { |
| for (i = 0; i < 2; i++) { |
| if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) |
| x->variance_low[i + 3] = 1; |
| } |
| } else { |
| for (i = 0; i < 4; i++) { |
| const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; |
| const int idx_str = |
| cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; |
| MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; |
| |
| if (cm->mi_cols <= mi_col + idx[i][1] || |
| cm->mi_rows <= mi_row + idx[i][0]) |
| continue; |
| |
| if ((*this_mi)->sb_type == BLOCK_32X32) { |
| int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || |
| cpi->sf.short_circuit_low_temp_var == 3) |
| ? ((5 * thresholds[1]) >> 3) |
| : (thresholds[1] >> 1); |
| if (vt->split[i].part_variances.none.variance < threshold_32x32) |
| x->variance_low[i + 5] = 1; |
| } else if (cpi->sf.short_circuit_low_temp_var >= 2) { |
| // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block |
| // inside. |
| if ((*this_mi)->sb_type == BLOCK_16X16 || |
| (*this_mi)->sb_type == BLOCK_32X16 || |
| (*this_mi)->sb_type == BLOCK_16X32) { |
| for (j = 0; j < 4; j++) { |
| if (vt->split[i].split[j].part_variances.none.variance < |
| (thresholds[2] >> 8)) |
| x->variance_low[(i << 2) + j + 9] = 1; |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, |
| MACROBLOCKD *xd, BLOCK_SIZE bsize, |
| int mi_row, int mi_col) { |
| VP9_COMMON *const cm = &cpi->common; |
| BLOCK_SIZE *prev_part = cpi->prev_partition; |
| int start_pos = mi_row * cm->mi_stride + mi_col; |
| |
| const int bsl = b_width_log2_lookup[bsize]; |
| const int bs = (1 << bsl) >> 2; |
| BLOCK_SIZE subsize; |
| PARTITION_TYPE partition; |
| |
| if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
| |
| partition = partition_lookup[bsl][prev_part[start_pos]]; |
| subsize = get_subsize(bsize, partition); |
| |
| if (subsize < BLOCK_8X8) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
| } else { |
| switch (partition) { |
| case PARTITION_NONE: |
| set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
| break; |
| case PARTITION_HORZ: |
| set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
| set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); |
| break; |
| case PARTITION_VERT: |
| set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
| set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); |
| break; |
| default: |
| assert(partition == PARTITION_SPLIT); |
| copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); |
| copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); |
| copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); |
| copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); |
| break; |
| } |
| } |
| } |
| |
| static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
| int mi_row, int mi_col, int segment_id, |
| int sb_offset) { |
| int svc_copy_allowed = 1; |
| int frames_since_key_thresh = 1; |
| if (cpi->use_svc) { |
| // For SVC, don't allow copy if base spatial layer is key frame, or if |
| // frame is not a temporal enhancement layer frame. |
| int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, |
| cpi->svc.number_temporal_layers); |
| const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; |
| if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; |
| frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; |
| } |
| if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && |
| !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && |
| cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && |
| cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { |
| if (cpi->prev_partition != NULL) { |
| copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); |
| cpi->copied_frame_cnt[sb_offset] += 1; |
| memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), |
| sizeof(x->variance_low)); |
| return 1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
| BLOCK_SIZE bsize, int mi_row, int mi_col, |
| int mi_row_high, int mi_col_high) { |
| VP9_COMMON *const cm = &cpi->common; |
| SVC *const svc = &cpi->svc; |
| BLOCK_SIZE *prev_part = svc->prev_partition_svc; |
| // Variables with _high are for higher resolution. |
| int bsize_high = 0; |
| int subsize_high = 0; |
| const int bsl_high = b_width_log2_lookup[bsize]; |
| const int bs_high = (1 << bsl_high) >> 2; |
| const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; |
| const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; |
| |
| const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 1, 0, |
| 1, 1, 0, 1, 1, |
| 0, 1, 0 }; |
| const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 2, 2, |
| 0, 2, 2, 0, 2, |
| 2, 0, 0 }; |
| int start_pos; |
| BLOCK_SIZE bsize_low; |
| PARTITION_TYPE partition_high; |
| |
| if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; |
| if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || |
| mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) |
| return 0; |
| |
| // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. |
| start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; |
| bsize_low = prev_part[start_pos]; |
| // The block size is too big for boundaries. Do variance based partitioning. |
| if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1; |
| |
| // For reference frames: return 1 (do variance-based partitioning) if the |
| // superblock is not low source sad and lower-resoln bsize is below 32x32. |
| if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && |
| bsize_low < BLOCK_32X32) |
| return 1; |
| |
| // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. |
| if (bsize_low < BLOCK_32X32) { |
| bsize_high = bsize_low + 3; |
| } else if (bsize_low >= BLOCK_32X32) { |
| bsize_high = BLOCK_64X64; |
| } |
| // Scale up blocks on boundary. |
| if (!has_cols && has_rows) { |
| bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low]; |
| } else if (has_cols && !has_rows) { |
| bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low]; |
| } else if (!has_cols && !has_rows) { |
| bsize_high = bsize_low; |
| } |
| |
| partition_high = partition_lookup[bsl_high][bsize_high]; |
| subsize_high = get_subsize(bsize, partition_high); |
| |
| if (subsize_high < BLOCK_8X8) { |
| set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
| } else { |
| const int bsl = b_width_log2_lookup[bsize]; |
| const int bs = (1 << bsl) >> 2; |
| switch (partition_high) { |
| case PARTITION_NONE: |
| set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
| break; |
| case PARTITION_HORZ: |
| set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
| if (subsize_high < BLOCK_64X64) |
| set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high, |
| subsize_high); |
| break; |
| case PARTITION_VERT: |
| set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
| if (subsize_high < BLOCK_64X64) |
| set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, |
| subsize_high); |
| break; |
| default: |
| assert(partition_high == PARTITION_SPLIT); |
| if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, |
| mi_row_high, mi_col_high)) |
| return 1; |
| if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
| mi_col, mi_row_high + bs_high, mi_col_high)) |
| return 1; |
| if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, |
| mi_col + (bs >> 1), mi_row_high, |
| mi_col_high + bs_high)) |
| return 1; |
| if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
| mi_col + (bs >> 1), mi_row_high + bs_high, |
| mi_col_high + bs_high)) |
| return 1; |
| break; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
| int mi_col) { |
| VP9_COMMON *const cm = &cpi->common; |
| BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; |
| int start_pos = mi_row * cm->mi_stride + mi_col; |
| const int bsl = b_width_log2_lookup[bsize]; |
| const int bs = (1 << bsl) >> 2; |
| BLOCK_SIZE subsize; |
| PARTITION_TYPE partition; |
| const MODE_INFO *mi = NULL; |
| int xx, yy; |
| |
| if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
| |
| mi = cm->mi_grid_visible[start_pos]; |
| partition = partition_lookup[bsl][mi->sb_type]; |
| subsize = get_subsize(bsize, partition); |
| if (subsize < BLOCK_8X8) { |
| prev_part[start_pos] = bsize; |
| } else { |
| switch (partition) { |
| case PARTITION_NONE: |
| prev_part[start_pos] = bsize; |
| if (bsize == BLOCK_64X64) { |
| for (xx = 0; xx < 8; xx += 4) |
| for (yy = 0; yy < 8; yy += 4) { |
| if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) |
| prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; |
| } |
| } |
| break; |
| case PARTITION_HORZ: |
| prev_part[start_pos] = subsize; |
| if (mi_row + bs < cm->mi_rows) |
| prev_part[start_pos + bs * cm->mi_stride] = subsize; |
| break; |
| case PARTITION_VERT: |
| prev_part[start_pos] = subsize; |
| if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
| break; |
| default: |
| assert(partition == PARTITION_SPLIT); |
| update_partition_svc(cpi, subsize, mi_row, mi_col); |
| update_partition_svc(cpi, subsize, mi_row + bs, mi_col); |
| update_partition_svc(cpi, subsize, mi_row, mi_col + bs); |
| update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); |
| break; |
| } |
| } |
| } |
| |
| static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, |
| int mi_row, int mi_col) { |
| VP9_COMMON *const cm = &cpi->common; |
| BLOCK_SIZE *prev_part = cpi->prev_partition; |
| int start_pos = mi_row * cm->mi_stride + mi_col; |
| const int bsl = b_width_log2_lookup[bsize]; |
| const int bs = (1 << bsl) >> 2; |
| BLOCK_SIZE subsize; |
| PARTITION_TYPE partition; |
| const MODE_INFO *mi = NULL; |
| |
| if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
| |
| mi = cm->mi_grid_visible[start_pos]; |
| partition = partition_lookup[bsl][mi->sb_type]; |
| subsize = get_subsize(bsize, partition); |
| if (subsize < BLOCK_8X8) { |
| prev_part[start_pos] = bsize; |
| } else { |
| switch (partition) { |
| case PARTITION_NONE: prev_part[start_pos] = bsize; break; |
| case PARTITION_HORZ: |
| prev_part[start_pos] = subsize; |
| if (mi_row + bs < cm->mi_rows) |
| prev_part[start_pos + bs * cm->mi_stride] = subsize; |
| break; |
| case PARTITION_VERT: |
| prev_part[start_pos] = subsize; |
| if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
| break; |
| default: |
| assert(partition == PARTITION_SPLIT); |
| update_prev_partition_helper(cpi, subsize, mi_row, mi_col); |
| update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); |
| update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); |
| update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); |
| break; |
| } |
| } |
| } |
| |
| static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, |
| int mi_row, int mi_col, int sb_offset) { |
| update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); |
| cpi->prev_segment_id[sb_offset] = segment_id; |
| memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, |
| sizeof(x->variance_low)); |
| // Reset the counter for copy partitioning |
| cpi->copied_frame_cnt[sb_offset] = 0; |
| } |
| |
| static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, |
| unsigned int y_sad, int is_key_frame, |
| int scene_change_detected) { |
| int i; |
| MACROBLOCKD *xd = &x->e_mbd; |
| int shift = 2; |
| |
| if (is_key_frame) return; |
| |
| // For speed > 8, avoid the chroma check if y_sad is above threshold. |
| if (cpi->oxcf.speed > 8) { |
| if (y_sad > cpi->vbp_thresholds[1] && |
| (!cpi->noise_estimate.enabled || |
| vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) |
| return; |
| } |
| |
| if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && scene_change_detected) |
| shift = 5; |
| |
| for (i = 1; i <= 2; ++i) { |
| unsigned int uv_sad = UINT_MAX; |
| struct macroblock_plane *p = &x->plane[i]; |
| struct macroblockd_plane *pd = &xd->plane[i]; |
| const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); |
| |
| if (bs != BLOCK_INVALID) |
| uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, |
| pd->dst.stride); |
| |
| // TODO(marpan): Investigate if we should lower this threshold if |
| // superblock is detected as skin. |
| x->color_sensitivity[i - 1] = uv_sad > (y_sad >> shift); |
| } |
| } |
| |
| static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, |
| int sb_offset) { |
| unsigned int tmp_sse; |
| uint64_t tmp_sad; |
| unsigned int tmp_variance; |
| const BLOCK_SIZE bsize = BLOCK_64X64; |
| uint8_t *src_y = cpi->Source->y_buffer; |
| int src_ystride = cpi->Source->y_stride; |
| uint8_t *last_src_y = cpi->Last_Source->y_buffer; |
| int last_src_ystride = cpi->Last_Source->y_stride; |
| uint64_t avg_source_sad_threshold = 10000; |
| uint64_t avg_source_sad_threshold2 = 12000; |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (cpi->common.use_highbitdepth) return 0; |
| #endif |
| src_y += shift; |
| last_src_y += shift; |
| tmp_sad = |
| cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); |
| tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, |
| last_src_ystride, &tmp_sse); |
| // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) |
| if (tmp_sad < avg_source_sad_threshold) |
| x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff |
| : kLowSadHighSumdiff; |
| else |
| x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff |
| : kHighSadHighSumdiff; |
| |
| // Detect large lighting change. |
| if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && |
| cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && |
| (tmp_sse - tmp_variance) > 10000) |
| x->content_state_sb = kLowVarHighSumdiff; |
| else if (tmp_sad > (avg_source_sad_threshold << 1)) |
| x->content_state_sb = kVeryHighSad; |
| |
| if (cpi->content_state_sb_fd != NULL) { |
| if (tmp_sad < avg_source_sad_threshold2) { |
| // Cap the increment to 255. |
| if (cpi->content_state_sb_fd[sb_offset] < 255) |
| cpi->content_state_sb_fd[sb_offset]++; |
| } else { |
| cpi->content_state_sb_fd[sb_offset] = 0; |
| } |
| } |
| if (tmp_sad == 0) x->zero_temp_sad_source = 1; |
| return tmp_sad; |
| } |
| |
| // This function chooses partitioning based on the variance between source and |
| // reconstructed last, where variance is computed for down-sampled inputs. |
| static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, |
| MACROBLOCK *x, int mi_row, int mi_col) { |
| VP9_COMMON *const cm = &cpi->common; |
| MACROBLOCKD *xd = &x->e_mbd; |
| int i, j, k, m; |
| v64x64 vt; |
| v16x16 *vt2 = NULL; |
| int force_split[21]; |
| int avg_32x32; |
| int max_var_32x32 = 0; |
| int min_var_32x32 = INT_MAX; |
| int var_32x32; |
| int avg_16x16[4]; |
| int maxvar_16x16[4]; |
| int minvar_16x16[4]; |
| int64_t threshold_4x4avg; |
| NOISE_LEVEL noise_level = kLow; |
| int content_state = 0; |
| uint8_t *s; |
| const uint8_t *d; |
| int sp; |
| int dp; |
| int compute_minmax_variance = 1; |
| unsigned int y_sad = UINT_MAX; |
| BLOCK_SIZE bsize = BLOCK_64X64; |
| // Ref frame used in partitioning. |
| MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; |
| int pixels_wide = 64, pixels_high = 64; |
| int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], |
| cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; |
| int scene_change_detected = |
| cpi->rc.high_source_sad || |
| (cpi->use_svc && cpi->svc.high_source_sad_superframe); |
| int force_64_split = scene_change_detected || |
| (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
| cpi->compute_source_sad_onepass && |
| cpi->sf.use_source_sad && !x->zero_temp_sad_source); |
| |
| // For the variance computation under SVC mode, we treat the frame as key if |
| // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). |
| int is_key_frame = |
| (frame_is_intra_only(cm) || |
| (is_one_pass_cbr_svc(cpi) && |
| cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); |
| // Always use 4x4 partition for key frame. |
| const int use_4x4_partition = frame_is_intra_only(cm); |
| const int low_res = (cm->width <= 352 && cm->height <= 288); |
| int variance4x4downsample[16]; |
| int segment_id; |
| int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); |
| |
| // For SVC: check if LAST frame is NULL or if the resolution of LAST is |
| // different than the current frame resolution, and if so, treat this frame |
| // as a key frame, for the purpose of the superblock partitioning. |
| // LAST == NULL can happen in some cases where enhancement spatial layers are |
| // enabled dyanmically in the stream and the only reference is the spatial |
| // reference (GOLDEN). |
| if (cpi->use_svc) { |
| const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); |
| if (ref == NULL || ref->y_crop_height != cm->height || |
| ref->y_crop_width != cm->width) |
| is_key_frame = 1; |
| } |
| |
| set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); |
| set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); |
| segment_id = xd->mi[0]->segment_id; |
| |
| if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) |
| compute_minmax_variance = 0; |
| |
| memset(x->variance_low, 0, sizeof(x->variance_low)); |
| |
| if (cpi->sf.use_source_sad && !is_key_frame) { |
| int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
| content_state = x->content_state_sb; |
| x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || |
| content_state == kLowSadHighSumdiff) |
| ? 1 |
| : 0; |
| x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; |
| if (cpi->content_state_sb_fd != NULL) |
| x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; |
| |
| // For SVC on top spatial layer: use/scale the partition from |
| // the lower spatial resolution if svc_use_lowres_part is enabled. |
| if (cpi->sf.svc_use_lowres_part && |
| cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && |
| cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { |
| if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, |
| mi_col >> 1, mi_row, mi_col)) { |
| if (cpi->sf.copy_partition_flag) { |
| update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
| } |
| return 0; |
| } |
| } |
| // If source_sad is low copy the partition without computing the y_sad. |
| if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && |
| !force_64_split && |
| copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
| x->sb_use_mv_part = 1; |
| if (cpi->sf.svc_use_lowres_part && |
| cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
| update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
| return 0; |
| } |
| } |
| |
| if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && |
| cyclic_refresh_segment_id_boosted(segment_id)) { |
| int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); |
| set_vbp_thresholds(cpi, thresholds, q, content_state); |
| } else { |
| set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); |
| } |
| // Decrease 32x32 split threshold for screen on base layer, for scene |
| // change/high motion frames. |
| if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
| cpi->svc.spatial_layer_id == 0 && force_64_split) |
| thresholds[1] = 3 * thresholds[1] >> 2; |
| |
| // For non keyframes, disable 4x4 average for low resolution when speed = 8 |
| threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; |
| |
| if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); |
| if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); |
| |
| s = x->plane[0].src.buf; |
| sp = x->plane[0].src.stride; |
| |
| // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, |
| // 5-20 for the 16x16 blocks. |
| force_split[0] = force_64_split; |
| |
| if (!is_key_frame) { |
| // In the case of spatial/temporal scalable coding, the assumption here is |
| // that the temporal reference frame will always be of type LAST_FRAME. |
| // TODO(marpan): If that assumption is broken, we need to revisit this code. |
| MODE_INFO *mi = xd->mi[0]; |
| YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); |
| |
| const YV12_BUFFER_CONFIG *yv12_g = NULL; |
| unsigned int y_sad_g, y_sad_thr, y_sad_last; |
| bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + |
| (mi_row + 4 < cm->mi_rows); |
| |
| assert(yv12 != NULL); |
| |
| if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || |
| cpi->svc.use_gf_temporal_ref_current_layer) { |
| // For now, GOLDEN will not be used for non-zero spatial layers, since |
| // it may not be a temporal reference. |
| yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); |
| } |
| |
| // Only compute y_sad_g (sad for golden reference) for speed < 8. |
| if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && |
| (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { |
| vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
| &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
| y_sad_g = cpi->fn_ptr[bsize].sdf( |
| x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
| xd->plane[0].pre[0].stride); |
| } else { |
| y_sad_g = UINT_MAX; |
| } |
| |
| if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && |
| cpi->rc.is_src_frame_alt_ref) { |
| yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); |
| vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
| &cm->frame_refs[ALTREF_FRAME - 1].sf); |
| mi->ref_frame[0] = ALTREF_FRAME; |
| y_sad_g = UINT_MAX; |
| } else { |
| vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
| &cm->frame_refs[LAST_FRAME - 1].sf); |
| mi->ref_frame[0] = LAST_FRAME; |
| } |
| mi->ref_frame[1] = NONE; |
| mi->sb_type = BLOCK_64X64; |
| mi->mv[0].as_int = 0; |
| mi->interp_filter = BILINEAR; |
| |
| if (cpi->oxcf.speed >= 8 && !low_res && |
| x->content_state_sb != kVeryHighSad) { |
| y_sad = cpi->fn_ptr[bsize].sdf( |
| x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
| xd->plane[0].pre[0].stride); |
| } else { |
| const MV dummy_mv = { 0, 0 }; |
| y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, |
| &dummy_mv); |
| x->sb_use_mv_part = 1; |
| x->sb_mvcol_part = mi->mv[0].as_mv.col; |
| x->sb_mvrow_part = mi->mv[0].as_mv.row; |
| if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
| cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && |
| cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && |
| cm->width > 640 && cm->height > 480) { |
| // Disable split below 16x16 block size when scroll motion (horz or |
| // vert) is detected. |
| // TODO(marpan/jianj): Improve this condition: issue is that search |
| // range is hard-coded/limited in vp9_int_pro_motion_estimation() so |
| // scroll motion may not be detected here. |
| if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || |
| (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && |
| y_sad < 100000) { |
| compute_minmax_variance = 0; |
| thresholds[2] = INT64_MAX; |
| } |
| } |
| } |
| |
| y_sad_last = y_sad; |
| // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad |
| // are close if short_circuit_low_temp_var is on. |
| y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; |
| if (y_sad_g < y_sad_thr) { |
| vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
| &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
| mi->ref_frame[0] = GOLDEN_FRAME; |
| mi->mv[0].as_int = 0; |
| y_sad = y_sad_g; |
| ref_frame_partition = GOLDEN_FRAME; |
| } else { |
| x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; |
| ref_frame_partition = LAST_FRAME; |
| } |
| |
| set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
| vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); |
| |
| if (cpi->use_skin_detection) |
| x->sb_is_skin = |
| skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split); |
| |
| d = xd->plane[0].dst.buf; |
| dp = xd->plane[0].dst.stride; |
| |
| // If the y_sad is very small, take 64x64 as partition and exit. |
| // Don't check on boosted segment for now, as 64x64 is suppressed there. |
| if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { |
| const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
| const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
| if (mi_col + block_width / 2 < cm->mi_cols && |
| mi_row + block_height / 2 < cm->mi_rows) { |
| set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); |
| x->variance_low[0] = 1; |
| chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
| if (cpi->sf.svc_use_lowres_part && |
| cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
| update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
| if (cpi->sf.copy_partition_flag) { |
| update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
| } |
| return 0; |
| } |
| } |
| |
| // If the y_sad is small enough, copy the partition of the superblock in the |
| // last frame to current frame only if the last frame is not a keyframe. |
| // Stop the copy every cpi->max_copied_frame to refresh the partition. |
| // TODO(jianj) : tune the threshold. |
| if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && |
| copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
| chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
| if (cpi->sf.svc_use_lowres_part && |
| cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
| update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
| return 0; |
| } |
| } else { |
| d = VP9_VAR_OFFS; |
| dp = 0; |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| switch (xd->bd) { |
| case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; |
| case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; |
| case 8: |
| default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; |
| } |
| } |
| #endif // CONFIG_VP9_HIGHBITDEPTH |
| } |
| |
| if (low_res && threshold_4x4avg < INT64_MAX) |
| CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2))); |
| // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances |
| // for splits. |
| for (i = 0; i < 4; i++) { |
| const int x32_idx = ((i & 1) << 5); |
| const int y32_idx = ((i >> 1) << 5); |
| const int i2 = i << 2; |
| force_split[i + 1] = 0; |
| avg_16x16[i] = 0; |
| maxvar_16x16[i] = 0; |
| minvar_16x16[i] = INT_MAX; |
| for (j = 0; j < 4; j++) { |
| const int x16_idx = x32_idx + ((j & 1) << 4); |
| const int y16_idx = y32_idx + ((j >> 1) << 4); |
| const int split_index = 5 + i2 + j; |
| v16x16 *vst = &vt.split[i].split[j]; |
| force_split[split_index] = 0; |
| variance4x4downsample[i2 + j] = 0; |
| if (!is_key_frame) { |
| fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| xd->cur_buf->flags, |
| #endif |
| pixels_wide, pixels_high, is_key_frame); |
| fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); |
| get_variance(&vt.split[i].split[j].part_variances.none); |
| avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; |
| if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) |
| minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
| if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) |
| maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
| if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { |
| // 16X16 variance is above threshold for split, so force split to 8x8 |
| // for this 16x16 block (this also forces splits for upper levels). |
| force_split[split_index] = 1; |
| force_split[i + 1] = 1; |
| force_split[0] = 1; |
| } else if (compute_minmax_variance && |
| vt.split[i].split[j].part_variances.none.variance > |
| thresholds[1] && |
| !cyclic_refresh_segment_id_boosted(segment_id)) { |
| // We have some nominal amount of 16x16 variance (based on average), |
| // compute the minmax over the 8x8 sub-blocks, and if above threshold, |
| // force split to 8x8 block for this 16x16 block. |
| int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| xd->cur_buf->flags, |
| #endif |
| pixels_wide, pixels_high); |
| int thresh_minmax = (int)cpi->vbp_threshold_minmax; |
| if (x->content_state_sb == kVeryHighSad) |
| thresh_minmax = thresh_minmax << 1; |
| if (minmax > thresh_minmax) { |
| force_split[split_index] = 1; |
| force_split[i + 1] = 1; |
| force_split[0] = 1; |
| } |
| } |
| } |
| if (is_key_frame || |
| (low_res && vt.split[i].split[j].part_variances.none.variance > |
| threshold_4x4avg)) { |
| force_split[split_index] = 0; |
| // Go down to 4x4 down-sampling for variance. |
| variance4x4downsample[i2 + j] = 1; |
| for (k = 0; k < 4; k++) { |
| int x8_idx = x16_idx + ((k & 1) << 3); |
| int y8_idx = y16_idx + ((k >> 1) << 3); |
| v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; |
| fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, |
| #if CONFIG_VP9_HIGHBITDEPTH |
| xd->cur_buf->flags, |
| #endif |
| pixels_wide, pixels_high, is_key_frame); |
| } |
| } |
| } |
| } |
| if (cpi->noise_estimate.enabled) |
| noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
| // Fill the rest of the variance tree by summing split partition values. |
| avg_32x32 = 0; |
| for (i = 0; i < 4; i++) { |
| const int i2 = i << 2; |
| for (j = 0; j < 4; j++) { |
| if (variance4x4downsample[i2 + j] == 1) { |
| v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; |
| for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); |
| fill_variance_tree(vtemp, BLOCK_16X16); |
| // If variance of this 16x16 block is above the threshold, force block |
| // to split. This also forces a split on the upper levels. |
| get_variance(&vtemp->part_variances.none); |
| if (vtemp->part_variances.none.variance > thresholds[2]) { |
| force_split[5 + i2 + j] = 1; |
| force_split[i + 1] = 1; |
| force_split[0] = 1; |
| } |
| } |
| } |
| fill_variance_tree(&vt.split[i], BLOCK_32X32); |
| // If variance of this 32x32 block is above the threshold, or if its above |
| // (some threshold of) the average variance over the sub-16x16 blocks, then |
| // force this block to split. This also forces a split on the upper |
| // (64x64) level. |
| if (!force_split[i + 1]) { |
| get_variance(&vt.split[i].part_variances.none); |
| var_32x32 = vt.split[i].part_variances.none.variance; |
| max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); |
| min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); |
| if (vt.split[i].part_variances.none.variance > thresholds[1] || |
| (!is_key_frame && |
| vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && |
| vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { |
| force_split[i + 1] = 1; |
| force_split[0] = 1; |
| } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && |
| (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && |
| maxvar_16x16[i] > thresholds[1]) { |
| force_split[i + 1] = 1; |
| force_split[0] = 1; |
| } |
| avg_32x32 += var_32x32; |
| } |
| } |
| if (!force_split[0]) { |
| fill_variance_tree(&vt, BLOCK_64X64); |
| get_variance(&vt.part_variances.none); |
| // If variance of this 64x64 block is above (some threshold of) the average |
| // variance over the sub-32x32 blocks, then force this block to split. |
| // Only checking this for noise level >= medium for now. |
| if (!is_key_frame && noise_level >= kMedium && |
| vt.part_variances.none.variance > (9 * avg_32x32) >> 5) |
| force_split[0] = 1; |
| // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in |
| // a 64x64 block is greater than threshold and the maximum 32x32 variance is |
| // above a miniumum threshold, then force the split of a 64x64 block |
| // Only check this for low noise. |
| else if (!is_key_frame && noise_level < kMedium && |
| (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && |
| max_var_32x32 > thresholds[0] >> 1) |
| force_split[0] = 1; |
| } |
| |
| // Now go through the entire structure, splitting every block size until |
| // we get to one that's got a variance lower than our threshold. |
| if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || |
| !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, |
| thresholds[0], BLOCK_16X16, force_split[0])) { |
| for (i = 0; i < 4; ++i) { |
| const int x32_idx = ((i & 1) << 2); |
| const int y32_idx = ((i >> 1) << 2); |
| const int i2 = i << 2; |
| if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, |
| (mi_row + y32_idx), (mi_col + x32_idx), |
| thresholds[1], BLOCK_16X16, |
| force_split[i + 1])) { |
| for (j = 0; j < 4; ++j) { |
| const int x16_idx = ((j & 1) << 1); |
| const int y16_idx = ((j >> 1) << 1); |
| // For inter frames: if variance4x4downsample[] == 1 for this 16x16 |
| // block, then the variance is based on 4x4 down-sampling, so use vt2 |
| // in set_vt_partioning(), otherwise use vt. |
| v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) |
| ? &vt2[i2 + j] |
| : &vt.split[i].split[j]; |
| if (!set_vt_partitioning( |
| cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, |
| mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, |
| force_split[5 + i2 + j])) { |
| for (k = 0; k < 4; ++k) { |
| const int x8_idx = (k & 1); |
| const int y8_idx = (k >> 1); |
| if (use_4x4_partition) { |
| if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], |
| BLOCK_8X8, |
| mi_row + y32_idx + y16_idx + y8_idx, |
| mi_col + x32_idx + x16_idx + x8_idx, |
| thresholds[3], BLOCK_8X8, 0)) { |
| set_block_size( |
| cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
| (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); |
| } |
| } else { |
| set_block_size( |
| cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
| (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { |
| update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
| } |
| |
| if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && |
| cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
| update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
| |
| if (cpi->sf.short_circuit_low_temp_var) { |
| set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, |
| mi_col, mi_row); |
| } |
| |
| chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
| if (vt2) vpx_free(vt2); |
| return 0; |
| } |
| |
| #if !CONFIG_REALTIME_ONLY |
| static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, |
| int mi_row, int mi_col, BLOCK_SIZE bsize, |
| int output_enabled) { |
| int i, x_idx, y; |
| VP9_COMMON *const cm = &cpi->common; |
| RD_COUNTS *const rdc = &td->rd_counts; |
| MACROBLOCK *const x = &td->mb; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| struct macroblock_plane *const p = x->plane; |
| struct macroblockd_plane *const pd = xd->plane; |
| MODE_INFO *mi = &ctx->mic; |
| MODE_INFO *const xdmi = xd->mi[0]; |
| MODE_INFO *mi_addr = xd->mi[0]; |
| const struct segmentation *const seg = &cm->seg; |
| const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; |
| const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; |
| const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); |
| const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); |
| MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; |
| int w, h; |
| |
| const int mis = cm->mi_stride; |
| const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
| const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
| int max_plane; |
| |
| assert(mi->sb_type == bsize); |
| |
| *mi_addr = *mi; |
| *x->mbmi_ext = ctx->mbmi_ext; |
| |
| // If segmentation in use |
| if (seg->enabled) { |
| // For in frame complexity AQ copy the segment id from the segment map. |
| if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { |
| const uint8_t *const map = |
| seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
| mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
| } |
| // Else for cyclic refresh mode update the segment map, set the segment id |
| // and then update the quantizer. |
| if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { |
| vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, |
| ctx->rate, ctx->dist, x->skip, p); |
| } |
| } |
| |
| max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; |
| for (i = 0; i < max_plane; ++i) { |
| p[i].coeff = ctx->coeff_pbuf[i][1]; |
| p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; |
| pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; |
| p[i].eobs = ctx->eobs_pbuf[i][1]; |
| } |
| |
| for (i = max_plane; i < MAX_MB_PLANE; ++i) { |
| p[i].coeff = ctx->coeff_pbuf[i][2]; |
| p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; |
| pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; |
| p[i].eobs = ctx->eobs_pbuf[i][2]; |
| } |
| |
| // Restore the coding context of the MB to that that was in place |
| // when the mode was picked for it |
| for (y = 0; y < mi_height; y++) |
| for (x_idx = 0; x_idx < mi_width; x_idx++) |
| if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && |
| (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { |
| xd->mi[x_idx + y * mis] = mi_addr; |
| } |
| |
| if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); |
| |
| if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { |
| xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; |
| xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; |
| } |
| |
| x->skip = ctx->skip; |
| memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, |
| sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); |
| |
| if (!output_enabled) return; |
| |
| #if CONFIG_INTERNAL_STATS |
| if (frame_is_intra_only(cm)) { |
| static const int kf_mode_index[] = { |
| THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, |
| THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, |
| THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, |
| THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, |
| THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, |
| }; |
| ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; |
| } else { |
| // Note how often each mode chosen as best |
| ++cpi->mode_chosen_counts[ctx->best_mode_index]; |
| } |
| #endif |
| if (!frame_is_intra_only(cm)) { |
| if (is_inter_block(xdmi)) { |
| vp9_update_mv_count(td); |
| |
| if (cm->interp_filter == SWITCHABLE) { |
| const int ctx = get_pred_context_switchable_interp(xd); |
| ++td->counts->switchable_interp[ctx][xdmi->interp_filter]; |
| } |
| } |
| |
| rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; |
| rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; |
| rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; |
| |
| for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
| rdc->filter_diff[i] += ctx->best_filter_diff[i]; |
| } |
| |
| for (h = 0; h < y_mis; ++h) { |
| MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; |
| for (w = 0; w < x_mis; ++w) { |
| MV_REF *const mv = frame_mv + w; |
| mv->ref_frame[0] = mi->ref_frame[0]; |
| mv->ref_frame[1] = mi->ref_frame[1]; |
| mv->mv[0].as_int = mi->mv[0].as_int; |
| mv->mv[1].as_int = mi->mv[1].as_int; |
| } |
| } |
| } |
| #endif // !CONFIG_REALTIME_ONLY |
| |
| void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, |
| int mi_row, int mi_col) { |
| uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; |
| const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; |
| int i; |
| |
| // Set current frame pointer. |
| x->e_mbd.cur_buf = src; |
| |
| for (i = 0; i < MAX_MB_PLANE; i++) |
| setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, |
| NULL, x->e_mbd.plane[i].subsampling_x, |
| x->e_mbd.plane[i].subsampling_y); |
| } |
| |
| static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, |
| RD_COST *rd_cost, BLOCK_SIZE bsize) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| MODE_INFO *const mi = xd->mi[0]; |
| INTERP_FILTER filter_ref; |
| |
| filter_ref = get_pred_context_switchable_interp(xd); |
| if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP; |
| |
| mi->sb_type = bsize; |
| mi->mode = ZEROMV; |
| mi->tx_size = |
| VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); |
| mi->skip = 1; |
| mi->uv_mode = DC_PRED; |
| mi->ref_frame[0] = LAST_FRAME; |
| mi->ref_frame[1] = NONE; |
| mi->mv[0].as_int = 0; |
| mi->interp_filter = filter_ref; |
| |
| xd->mi[0]->bmi[0].as_mv[0].as_int = 0; |
| x->skip = 1; |
| |
| vp9_rd_cost_init(rd_cost); |
| } |
| |
| #if !CONFIG_REALTIME_ONLY |
| static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
| int mi_row, int mi_col, BLOCK_SIZE bsize, |
| AQ_MODE aq_mode) { |
| VP9_COMMON *const cm = &cpi->common; |
| const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
| const uint8_t *const map = |
| cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
| |
| vp9_init_plane_quantizers(cpi, x); |
| vpx_clear_system_state(); |
| |
| if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { |
| if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; |
| } else if (aq_mode == PERCEPTUAL_AQ) { |
| x->rdmult = x->cb_rdmult; |
| } else if (aq_mode == CYCLIC_REFRESH_AQ) { |
| // If segment is boosted, use rdmult for that segment. |
| if (cyclic_refresh_segment_id_boosted( |
| get_segment_id(cm, map, bsize, mi_row, mi_col))) |
| x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); |
| } else { |
| x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); |
| } |
| |
| if (oxcf->tuning == VP8_TUNE_SSIM) { |
| set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
| } |
| } |
| |
| static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, |
| MACROBLOCK *const x, int mi_row, int mi_col, |
| RD_COST *rd_cost, BLOCK_SIZE bsize, |
| PICK_MODE_CONTEXT *ctx, int rate_in_best_rd, |
| int64_t dist_in_best_rd) { |
| VP9_COMMON *const cm = &cpi->common; |
| TileInfo *const tile_info = &tile_data->tile_info; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| MODE_INFO *mi; |
| struct macroblock_plane *const p = x->plane; |
| struct macroblockd_plane *const pd = xd->plane; |
| const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
| int i, orig_rdmult; |
| int64_t best_rd = INT64_MAX; |
| |
| vpx_clear_system_state(); |
| |
| // Use the lower precision, but faster, 32x32 fdct for mode selection. |
| x->use_lp32x32fdct = 1; |
| |
| set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
| mi = xd->mi[0]; |
| mi->sb_type = bsize; |
| |
| for (i = 0; i < MAX_MB_PLANE; ++i) { |
| p[i].coeff = ctx->coeff_pbuf[i][0]; |
| p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; |
| pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; |
| p[i].eobs = ctx->eobs_pbuf[i][0]; |
| } |
| ctx->is_coded = 0; |
| ctx->skippable = 0; |
| ctx->pred_pixel_ready = 0; |
| x->skip_recode = 0; |
| |
| // Set to zero to make sure we do not use the previous encoded frame stats |
| mi->skip = 0; |
| |
| #if CONFIG_VP9_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| x->source_variance = vp9_high_get_sby_perpixel_variance( |
| cpi, &x->plane[0].src, bsize, xd->bd); |
| } else { |
| x->source_variance = |
| vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
| } |
| #else |
| x->source_variance = |
| vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
| #endif // CONFIG_VP9_HIGHBITDEPTH |
| |
| // Save rdmult before it might be changed, so it can be restored later. |
| orig_rdmult = x->rdmult; |
| |
| if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) { |
| double logvar = vp9_log_block_var(cpi, x, bsize); |
| // Check block complexity as part of descision on using pixel or transform |
| // domain distortion in rd tests. |
| x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && |
| (logvar >= cpi->sf.tx_domain_thresh); |
| |
| // Check block complexity as part of descision on using quantized |
| // coefficient optimisation inside the rd loop. |
| x->block_qcoeff_opt = |
| cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh); |
| } else { |
| x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; |
| x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt; |
| } |
| |
| set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); |
| set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); |
| if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) { |
| best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd, |
| dist_in_best_rd); |
| } |
| |
| // Find best coding mode & reconstruct the MB so it is available |
| // as a predictor for MBs that follow in the SB |
| if (frame_is_intra_only(cm)) { |
| vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); |
| } else { |
| if (bsize >= BLOCK_8X8) { |
| if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) |
| vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, |
| ctx, best_rd); |
| else |
| vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
| bsize, ctx, best_rd); |
| } else { |
| vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
| bsize, ctx, best_rd); |
| } |
| } |
| |
| // Examine the resulting rate and for AQ mode 2 make a segment choice. |
| if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && |
| (bsize >= BLOCK_16X16) && |
| (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
| (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { |
| vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); |
| } |
| |
| // TODO(jingning) The rate-distortion optimization flow needs to be |
| // refactored to provide proper exit/return handle. |
| if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX) |
| rd_cost->rdcost = INT64_MAX; |
| else |
| rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); |
| |
| x->rdmult = orig_rdmult; |
| |
| ctx->rate = rd_cost->rate; |
| ctx->dist = rd_cost->dist; |
| } |
| #endif // !CONFIG_REALTIME_ONLY |
| |
| static void update_stats(VP9_COMMON *cm, ThreadData *td) { |
| const MACROBLOCK *x = &td->mb; |
| const MACROBLOCKD *const xd = &x->e_mbd; |
| const MODE_INFO *const mi = xd->mi[0]; |
| const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
| const BLOCK_SIZE bsize = mi->sb_type; |
| |
| if (!frame_is_intra_only(cm)) { |
| FRAME_COUNTS *const counts = td->counts; |
| const int inter_block = is_inter_block(mi); |
| const int seg_ref_active = |
| segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); |
| if (!seg_ref_active) { |
| counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; |
| // If the segment reference feature is enabled we have only a single |
| // reference frame allowed for the segment so exclude it from |
| // the reference frame counts used to work out probabilities. |
| if (inter_block) { |
| const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; |
| if (cm->reference_mode == REFERENCE_MODE_SELECT) |
| counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] |
| [has_second_ref(mi)]++; |
| |
| if (has_second_ref(mi)) { |
| const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; |
| const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); |
| const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; |
| counts->comp_ref[ctx][bit]++; |
| } else { |
| counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] |
| [ref0 != LAST_FRAME]++; |
| if (ref0 != LAST_FRAME) |
| counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] |
| [ref0 != GOLDEN_FRAME]++; |
| } |
| } |
| } |
| if (inter_block && |
| !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { |
| const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; |
| if (bsize >= BLOCK_8X8) { |
| const PREDICTION_MODE mode = mi->mode; |
| ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; |
| } else { |
| const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; |
| const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; |
| int idx, idy; |
| for (idy = 0; idy < 2; idy += num_4x4_h) { |
| for (idx = 0; idx < 2; idx += num_4x4_w) { |
| const int j = idy * 2 + idx; |
| const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; |
| ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| #if !CONFIG_REALTIME_ONLY |
| static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, |
| ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
| ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
| PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
| BLOCK_SIZE bsize) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| int p; |
| const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
| const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
| int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
| int mi_height = num_8x8_blocks_high_lookup[bsize]; |
| for (p = 0; p < MAX_MB_PLANE; p++) { |
| memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), |
| a + num_4x4_blocks_wide * p, |
| (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
| xd->plane[p].subsampling_x); |
| memcpy(xd->left_context[p] + |
| ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
| l + num_4x4_blocks_high * p, |
| (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
| xd->plane[p].subsampling_y); |
| } |
| memcpy(xd->above_seg_context + mi_col, sa, |
| sizeof(*xd->above_seg_context) * mi_width); |
| memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, |
| sizeof(xd->left_seg_context[0]) * mi_height); |
| } |
| |
| static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, |
| ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
| ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
| PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
| BLOCK_SIZE bsize) { |
| const MACROBLOCKD *const xd = &x->e_mbd; |
| int p; |
| const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
| const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
| int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
| int mi_height = num_8x8_blocks_high_lookup[bsize]; |
| |
| // buffer the above/left context information of the block in search. |
| for (p = 0; p < MAX_MB_PLANE; ++p) { |
| memcpy(a + num_4x4_blocks_wide * p, |
| xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), |
| (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
| xd->plane[p].subsampling_x); |
| memcpy(l + num_4x4_blocks_high * p, |
| xd->left_context[p] + |
| ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
| (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
| xd->plane[p].subsampling_y); |
| } |
| memcpy(sa, xd->above_seg_context + mi_col, |
| sizeof(*xd->above_seg_context) * mi_width); |
| memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), |
| sizeof(xd->left_seg_context[0]) * mi_height); |
| } |
| |
| static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, |
| TOKENEXTRA **tp, int mi_row, int mi_col, |
| int output_enabled, BLOCK_SIZE bsize, |
| PICK_MODE_CONTEXT *ctx) { |
| MACROBLOCK *const x = &td->mb; |
| set_offsets(cpi, tile, x, mi_row, mi_col, bsize); |
| |
| if (cpi->sf.enable_tpl_model && |
| (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) { |
| const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
| x->rdmult = x->cb_rdmult; |
| if (oxcf->tuning == VP8_TUNE_SSIM) { |
| set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
| } |
| } |
| |
| update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); |
| encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); |
| |
| if (output_enabled) { |
| update_stats(&cpi->common, td); |
| |
| (*tp)->token = EOSB_TOKEN; |
| (*tp)++; |
| } |
| } |
| |
| static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, |
| TOKENEXTRA **tp, int mi_row, int mi_col, |
| int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
| VP9_COMMON *const cm = &cpi->common; |
| MACROBLOCK *const x = &td->mb; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| |
| const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
| int ctx; |
| PARTITION_TYPE partition; |
| BLOCK_SIZE subsize = bsize; |
| |
| if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
| |
| if (bsize >= BLOCK_8X8) { |
| ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
| subsize = get_subsize(bsize, pc_tree->partitioning); |
| } else { |
| ctx = 0; |
| subsize = BLOCK_4X4; |
| } |
| |
| partition = partition_lookup[bsl][subsize]; |
| if (output_enabled && bsize != BLOCK_4X4) |
| td->counts->partition[ctx][partition]++; |
| |
| switch (partition) { |
| case PARTITION_NONE: |
| encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
| &pc_tree->none); |
| break; |
| case PARTITION_VERT: |
| encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
| &pc_tree->vertical[0]); |
| if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
| encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, |
| subsize, &pc_tree->vertical[1]); |
| } |
| break; |
| case PARTITION_HORZ: |
| encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
| &pc_tree->horizontal[0]); |
| if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
| encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, |
| subsize, &pc_tree->horizontal[1]); |
| } |
| break; |
| default: |
| assert(partition == PARTITION_SPLIT); |
| if (bsize == BLOCK_8X8) { |
| encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
| pc_tree->leaf_split[0]); |
| } else { |
| encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
| pc_tree->split[0]); |
| encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
| subsize, pc_tree->split[1]); |
| encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
| subsize, pc_tree->split[2]); |
| encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, |
| subsize, pc_tree->split[3]); |
| } |
| break; |
| } |
| |
| if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
| update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
| } |
| #endif // !CONFIG_REALTIME_ONLY |
| |
| // Check to see if the given partition size is allowed for a specified number |
| // of 8x8 block rows and columns remaining in the image. |
| // If not then return the largest allowed partition size |
| static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, |
| int cols_left, int *bh, int *bw) { |
| if (rows_left <= 0 || cols_left <= 0) { |
| return VPXMIN(bsize, BLOCK_8X8); |
| } else { |
| for (; bsize > 0; bsize -= 3) { |
| *bh = num_8x8_blocks_high_lookup[bsize]; |
| *bw = num_8x8_blocks_wide_lookup[bsize]; |
| if ((*bh <= rows_left) && (*bw <= cols_left)) { |
| break; |
| |