Blame - src/third_party/libvpx/test/convolve_test.cc - cobalt

blob: 73b0edb99b59402b88c7533c60e9191b562ccd2b [file] [log] [blame]

David Ghandehari	8c5039b	2016-08-17 19:39:30 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include <string.h>
				12
				13	#include "third_party/googletest/src/include/gtest/gtest.h"
				14
				15	#include "./vpx_config.h"
				16	#include "./vp9_rtcd.h"
				17	#include "./vpx_dsp_rtcd.h"
				18	#include "test/acm_random.h"
				19	#include "test/clear_system_state.h"
				20	#include "test/register_state_check.h"
				21	#include "test/util.h"
				22	#include "vp9/common/vp9_common.h"
				23	#include "vp9/common/vp9_filter.h"
				24	#include "vpx_dsp/vpx_dsp_common.h"
				25	#include "vpx_dsp/vpx_filter.h"
				26	#include "vpx_mem/vpx_mem.h"
				27	#include "vpx_ports/mem.h"
				28
				29	namespace {
				30
				31	static const unsigned int kMaxDimension = 64;
				32
				33	typedef void (ConvolveFunc)(const uint8_t src, ptrdiff_t src_stride,
				34	uint8_t *dst, ptrdiff_t dst_stride,
				35	const int16_t *filter_x, int filter_x_stride,
				36	const int16_t *filter_y, int filter_y_stride,
				37	int w, int h);
				38
				39	struct ConvolveFunctions {
				40	ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
				41	ConvolveFunc h8, ConvolveFunc h8_avg,
				42	ConvolveFunc v8, ConvolveFunc v8_avg,
				43	ConvolveFunc hv8, ConvolveFunc hv8_avg,
				44	ConvolveFunc sh8, ConvolveFunc sh8_avg,
				45	ConvolveFunc sv8, ConvolveFunc sv8_avg,
				46	ConvolveFunc shv8, ConvolveFunc shv8_avg,
				47	int bd)
				48	: copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
				49	v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
				50	sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
				51	use_highbd_(bd) {}
				52
				53	ConvolveFunc copy_;
				54	ConvolveFunc avg_;
				55	ConvolveFunc h8_;
				56	ConvolveFunc v8_;
				57	ConvolveFunc hv8_;
				58	ConvolveFunc h8_avg_;
				59	ConvolveFunc v8_avg_;
				60	ConvolveFunc hv8_avg_;
				61	ConvolveFunc sh8_; // scaled horiz
				62	ConvolveFunc sv8_; // scaled vert
				63	ConvolveFunc shv8_; // scaled horiz/vert
				64	ConvolveFunc sh8_avg_; // scaled avg horiz
				65	ConvolveFunc sv8_avg_; // scaled avg vert
				66	ConvolveFunc shv8_avg_; // scaled avg horiz/vert
				67	int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
				68	};
				69
				70	typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
				71
				72	#define ALL_SIZES(convolve_fn) \
				73	make_tuple(4, 4, &convolve_fn), \
				74	make_tuple(8, 4, &convolve_fn), \
				75	make_tuple(4, 8, &convolve_fn), \
				76	make_tuple(8, 8, &convolve_fn), \
				77	make_tuple(16, 8, &convolve_fn), \
				78	make_tuple(8, 16, &convolve_fn), \
				79	make_tuple(16, 16, &convolve_fn), \
				80	make_tuple(32, 16, &convolve_fn), \
				81	make_tuple(16, 32, &convolve_fn), \
				82	make_tuple(32, 32, &convolve_fn), \
				83	make_tuple(64, 32, &convolve_fn), \
				84	make_tuple(32, 64, &convolve_fn), \
				85	make_tuple(64, 64, &convolve_fn)
				86
				87	// Reference 8-tap subpixel filter, slightly modified to fit into this test.
				88	#define VP9_FILTER_WEIGHT 128
				89	#define VP9_FILTER_SHIFT 7
				90	uint8_t clip_pixel(int x) {
				91	return x < 0 ? 0 :
				92	x > 255 ? 255 :
				93	x;
				94	}
				95
				96	void filter_block2d_8_c(const uint8_t *src_ptr,
				97	const unsigned int src_stride,
				98	const int16_t *HFilter,
				99	const int16_t *VFilter,
				100	uint8_t *dst_ptr,
				101	unsigned int dst_stride,
				102	unsigned int output_width,
				103	unsigned int output_height) {
				104	// Between passes, we use an intermediate buffer whose height is extended to
				105	// have enough horizontally filtered values as input for the vertical pass.
				106	// This buffer is allocated to be big enough for the largest block type we
				107	// support.
				108	const int kInterp_Extend = 4;
				109	const unsigned int intermediate_height =
				110	(kInterp_Extend - 1) + output_height + kInterp_Extend;
				111	unsigned int i, j;
				112
				113	// Size of intermediate_buffer is max_intermediate_height * filter_max_width,
				114	// where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
				115	// + kInterp_Extend
				116	// = 3 + 16 + 4
				117	// = 23
				118	// and filter_max_width = 16
				119	//
				120	uint8_t intermediate_buffer[71 * kMaxDimension];
				121	const int intermediate_next_stride =
				122	1 - static_cast<int>(intermediate_height * output_width);
				123
				124	// Horizontal pass (src -> transposed intermediate).
				125	uint8_t *output_ptr = intermediate_buffer;
				126	const int src_next_row_stride = src_stride - output_width;
				127	src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
				128	for (i = 0; i < intermediate_height; ++i) {
				129	for (j = 0; j < output_width; ++j) {
				130	// Apply filter...
				131	const int temp = (src_ptr[0] * HFilter[0]) +
				132	(src_ptr[1] * HFilter[1]) +
				133	(src_ptr[2] * HFilter[2]) +
				134	(src_ptr[3] * HFilter[3]) +
				135	(src_ptr[4] * HFilter[4]) +
				136	(src_ptr[5] * HFilter[5]) +
				137	(src_ptr[6] * HFilter[6]) +
				138	(src_ptr[7] * HFilter[7]) +
				139	(VP9_FILTER_WEIGHT >> 1); // Rounding
				140
				141	// Normalize back to 0-255...
				142	*output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
				143	++src_ptr;
				144	output_ptr += intermediate_height;
				145	}
				146	src_ptr += src_next_row_stride;
				147	output_ptr += intermediate_next_stride;
				148	}
				149
				150	// Vertical pass (transposed intermediate -> dst).
				151	src_ptr = intermediate_buffer;
				152	const int dst_next_row_stride = dst_stride - output_width;
				153	for (i = 0; i < output_height; ++i) {
				154	for (j = 0; j < output_width; ++j) {
				155	// Apply filter...
				156	const int temp = (src_ptr[0] * VFilter[0]) +
				157	(src_ptr[1] * VFilter[1]) +
				158	(src_ptr[2] * VFilter[2]) +
				159	(src_ptr[3] * VFilter[3]) +
				160	(src_ptr[4] * VFilter[4]) +
				161	(src_ptr[5] * VFilter[5]) +
				162	(src_ptr[6] * VFilter[6]) +
				163	(src_ptr[7] * VFilter[7]) +
				164	(VP9_FILTER_WEIGHT >> 1); // Rounding
				165
				166	// Normalize back to 0-255...
				167	*dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
				168	src_ptr += intermediate_height;
				169	}
				170	src_ptr += intermediate_next_stride;
				171	dst_ptr += dst_next_row_stride;
				172	}
				173	}
				174
				175	void block2d_average_c(uint8_t *src,
				176	unsigned int src_stride,
				177	uint8_t *output_ptr,
				178	unsigned int output_stride,
				179	unsigned int output_width,
				180	unsigned int output_height) {
				181	unsigned int i, j;
				182	for (i = 0; i < output_height; ++i) {
				183	for (j = 0; j < output_width; ++j) {
				184	output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
				185	}
				186	output_ptr += output_stride;
				187	}
				188	}
				189
				190	void filter_average_block2d_8_c(const uint8_t *src_ptr,
				191	const unsigned int src_stride,
				192	const int16_t *HFilter,
				193	const int16_t *VFilter,
				194	uint8_t *dst_ptr,
				195	unsigned int dst_stride,
				196	unsigned int output_width,
				197	unsigned int output_height) {
				198	uint8_t tmp[kMaxDimension * kMaxDimension];
				199
				200	assert(output_width <= kMaxDimension);
				201	assert(output_height <= kMaxDimension);
				202	filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
				203	output_width, output_height);
				204	block2d_average_c(tmp, 64, dst_ptr, dst_stride,
				205	output_width, output_height);
				206	}
				207
				208	#if CONFIG_VP9_HIGHBITDEPTH
				209	void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
				210	const unsigned int src_stride,
				211	const int16_t *HFilter,
				212	const int16_t *VFilter,
				213	uint16_t *dst_ptr,
				214	unsigned int dst_stride,
				215	unsigned int output_width,
				216	unsigned int output_height,
				217	int bd) {
				218	// Between passes, we use an intermediate buffer whose height is extended to
				219	// have enough horizontally filtered values as input for the vertical pass.
				220	// This buffer is allocated to be big enough for the largest block type we
				221	// support.
				222	const int kInterp_Extend = 4;
				223	const unsigned int intermediate_height =
				224	(kInterp_Extend - 1) + output_height + kInterp_Extend;
				225
				226	/* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
				227	* where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
				228	* + kInterp_Extend
				229	* = 3 + 16 + 4
				230	* = 23
				231	* and filter_max_width = 16
				232	*/
				233	uint16_t intermediate_buffer[71 * kMaxDimension];
				234	const int intermediate_next_stride =
				235	1 - static_cast<int>(intermediate_height * output_width);
				236
				237	// Horizontal pass (src -> transposed intermediate).
				238	{
				239	uint16_t *output_ptr = intermediate_buffer;
				240	const int src_next_row_stride = src_stride - output_width;
				241	unsigned int i, j;
				242	src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
				243	for (i = 0; i < intermediate_height; ++i) {
				244	for (j = 0; j < output_width; ++j) {
				245	// Apply filter...
				246	const int temp = (src_ptr[0] * HFilter[0]) +
				247	(src_ptr[1] * HFilter[1]) +
				248	(src_ptr[2] * HFilter[2]) +
				249	(src_ptr[3] * HFilter[3]) +
				250	(src_ptr[4] * HFilter[4]) +
				251	(src_ptr[5] * HFilter[5]) +
				252	(src_ptr[6] * HFilter[6]) +
				253	(src_ptr[7] * HFilter[7]) +
				254	(VP9_FILTER_WEIGHT >> 1); // Rounding
				255
				256	// Normalize back to 0-255...
				257	*output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
				258	++src_ptr;
				259	output_ptr += intermediate_height;
				260	}
				261	src_ptr += src_next_row_stride;
				262	output_ptr += intermediate_next_stride;
				263	}
				264	}
				265
				266	// Vertical pass (transposed intermediate -> dst).
				267	{
				268	uint16_t *src_ptr = intermediate_buffer;
				269	const int dst_next_row_stride = dst_stride - output_width;
				270	unsigned int i, j;
				271	for (i = 0; i < output_height; ++i) {
				272	for (j = 0; j < output_width; ++j) {
				273	// Apply filter...
				274	const int temp = (src_ptr[0] * VFilter[0]) +
				275	(src_ptr[1] * VFilter[1]) +
				276	(src_ptr[2] * VFilter[2]) +
				277	(src_ptr[3] * VFilter[3]) +
				278	(src_ptr[4] * VFilter[4]) +
				279	(src_ptr[5] * VFilter[5]) +
				280	(src_ptr[6] * VFilter[6]) +
				281	(src_ptr[7] * VFilter[7]) +
				282	(VP9_FILTER_WEIGHT >> 1); // Rounding
				283
				284	// Normalize back to 0-255...
				285	*dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
				286	src_ptr += intermediate_height;
				287	}
				288	src_ptr += intermediate_next_stride;
				289	dst_ptr += dst_next_row_stride;
				290	}
				291	}
				292	}
				293
				294	void highbd_block2d_average_c(uint16_t *src,
				295	unsigned int src_stride,
				296	uint16_t *output_ptr,
				297	unsigned int output_stride,
				298	unsigned int output_width,
				299	unsigned int output_height) {
				300	unsigned int i, j;
				301	for (i = 0; i < output_height; ++i) {
				302	for (j = 0; j < output_width; ++j) {
				303	output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
				304	}
				305	output_ptr += output_stride;
				306	}
				307	}
				308
				309	void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
				310	const unsigned int src_stride,
				311	const int16_t *HFilter,
				312	const int16_t *VFilter,
				313	uint16_t *dst_ptr,
				314	unsigned int dst_stride,
				315	unsigned int output_width,
				316	unsigned int output_height,
				317	int bd) {
				318	uint16_t tmp[kMaxDimension * kMaxDimension];
				319
				320	assert(output_width <= kMaxDimension);
				321	assert(output_height <= kMaxDimension);
				322	highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
				323	output_width, output_height, bd);
				324	highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
				325	output_width, output_height);
				326	}
				327	#endif // CONFIG_VP9_HIGHBITDEPTH
				328
				329	class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
				330	public:
				331	static void SetUpTestCase() {
				332	// Force input_ to be unaligned, output to be 16 byte aligned.
				333	input_ = reinterpret_cast<uint8_t*>(
				334	vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
				335	output_ = reinterpret_cast<uint8_t*>(
				336	vpx_memalign(kDataAlignment, kOutputBufferSize));
				337	output_ref_ = reinterpret_cast<uint8_t*>(
				338	vpx_memalign(kDataAlignment, kOutputBufferSize));
				339	#if CONFIG_VP9_HIGHBITDEPTH
				340	input16_ = reinterpret_cast<uint16_t*>(
				341	vpx_memalign(kDataAlignment,
				342	(kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
				343	output16_ = reinterpret_cast<uint16_t*>(
				344	vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
				345	output16_ref_ = reinterpret_cast<uint16_t*>(
				346	vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
				347	#endif
				348	}
				349
				350	virtual void TearDown() { libvpx_test::ClearSystemState(); }
				351
				352	static void TearDownTestCase() {
				353	vpx_free(input_ - 1);
				354	input_ = NULL;
				355	vpx_free(output_);
				356	output_ = NULL;
				357	vpx_free(output_ref_);
				358	output_ref_ = NULL;
				359	#if CONFIG_VP9_HIGHBITDEPTH
				360	vpx_free(input16_ - 1);
				361	input16_ = NULL;
				362	vpx_free(output16_);
				363	output16_ = NULL;
				364	vpx_free(output16_ref_);
				365	output16_ref_ = NULL;
				366	#endif
				367	}
				368
				369	protected:
				370	static const int kDataAlignment = 16;
				371	static const int kOuterBlockSize = 256;
				372	static const int kInputStride = kOuterBlockSize;
				373	static const int kOutputStride = kOuterBlockSize;
				374	static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
				375	static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
				376
				377	int Width() const { return GET_PARAM(0); }
				378	int Height() const { return GET_PARAM(1); }
				379	int BorderLeft() const {
				380	const int center = (kOuterBlockSize - Width()) / 2;
				381	return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
				382	}
				383	int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
				384
				385	bool IsIndexInBorder(int i) {
				386	return (i < BorderTop() * kOuterBlockSize \|\|
				387	i >= (BorderTop() + Height()) * kOuterBlockSize \|\|
				388	i % kOuterBlockSize < BorderLeft() \|\|
				389	i % kOuterBlockSize >= (BorderLeft() + Width()));
				390	}
				391
				392	virtual void SetUp() {
				393	UUT_ = GET_PARAM(2);
				394	#if CONFIG_VP9_HIGHBITDEPTH
				395	if (UUT_->use_highbd_ != 0)
				396	mask_ = (1 << UUT_->use_highbd_) - 1;
				397	else
				398	mask_ = 255;
				399	#endif
				400	/* Set up guard blocks for an inner block centered in the outer block */
				401	for (int i = 0; i < kOutputBufferSize; ++i) {
				402	if (IsIndexInBorder(i))
				403	output_[i] = 255;
				404	else
				405	output_[i] = 0;
				406	}
				407
				408	::libvpx_test::ACMRandom prng;
				409	for (int i = 0; i < kInputBufferSize; ++i) {
				410	if (i & 1) {
				411	input_[i] = 255;
				412	#if CONFIG_VP9_HIGHBITDEPTH
				413	input16_[i] = mask_;
				414	#endif
				415	} else {
				416	input_[i] = prng.Rand8Extremes();
				417	#if CONFIG_VP9_HIGHBITDEPTH
				418	input16_[i] = prng.Rand16() & mask_;
				419	#endif
				420	}
				421	}
				422	}
				423
				424	void SetConstantInput(int value) {
				425	memset(input_, value, kInputBufferSize);
				426	#if CONFIG_VP9_HIGHBITDEPTH
				427	vpx_memset16(input16_, value, kInputBufferSize);
				428	#endif
				429	}
				430
				431	void CopyOutputToRef() {
				432	memcpy(output_ref_, output_, kOutputBufferSize);
				433	#if CONFIG_VP9_HIGHBITDEPTH
				434	memcpy(output16_ref_, output16_, kOutputBufferSize);
				435	#endif
				436	}
				437
				438	void CheckGuardBlocks() {
				439	for (int i = 0; i < kOutputBufferSize; ++i) {
				440	if (IsIndexInBorder(i))
				441	EXPECT_EQ(255, output_[i]);
				442	}
				443	}
				444
				445	uint8_t *input() const {
				446	#if CONFIG_VP9_HIGHBITDEPTH
				447	if (UUT_->use_highbd_ == 0) {
				448	return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
				449	} else {
				450	return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
				451	BorderLeft());
				452	}
				453	#else
				454	return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
				455	#endif
				456	}
				457
				458	uint8_t *output() const {
				459	#if CONFIG_VP9_HIGHBITDEPTH
				460	if (UUT_->use_highbd_ == 0) {
				461	return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
				462	} else {
				463	return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
				464	BorderLeft());
				465	}
				466	#else
				467	return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
				468	#endif
				469	}
				470
				471	uint8_t *output_ref() const {
				472	#if CONFIG_VP9_HIGHBITDEPTH
				473	if (UUT_->use_highbd_ == 0) {
				474	return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
				475	} else {
				476	return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
				477	BorderLeft());
				478	}
				479	#else
				480	return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
				481	#endif
				482	}
				483
				484	uint16_t lookup(uint8_t *list, int index) const {
				485	#if CONFIG_VP9_HIGHBITDEPTH
				486	if (UUT_->use_highbd_ == 0) {
				487	return list[index];
				488	} else {
				489	return CONVERT_TO_SHORTPTR(list)[index];
				490	}
				491	#else
				492	return list[index];
				493	#endif
				494	}
				495
				496	void assign_val(uint8_t *list, int index, uint16_t val) const {
				497	#if CONFIG_VP9_HIGHBITDEPTH
				498	if (UUT_->use_highbd_ == 0) {
				499	list[index] = (uint8_t) val;
				500	} else {
				501	CONVERT_TO_SHORTPTR(list)[index] = val;
				502	}
				503	#else
				504	list[index] = (uint8_t) val;
				505	#endif
				506	}
				507
				508	void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
				509	const unsigned int src_stride,
				510	const int16_t *HFilter,
				511	const int16_t *VFilter,
				512	uint8_t *dst_ptr,
				513	unsigned int dst_stride,
				514	unsigned int output_width,
				515	unsigned int output_height) {
				516	#if CONFIG_VP9_HIGHBITDEPTH
				517	if (UUT_->use_highbd_ == 0) {
				518	filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
				519	dst_ptr, dst_stride, output_width,
				520	output_height);
				521	} else {
				522	highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr),
				523	src_stride, HFilter, VFilter,
				524	CONVERT_TO_SHORTPTR(dst_ptr),
				525	dst_stride, output_width, output_height,
				526	UUT_->use_highbd_);
				527	}
				528	#else
				529	filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
				530	dst_ptr, dst_stride, output_width,
				531	output_height);
				532	#endif
				533	}
				534
				535	void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
				536	const unsigned int src_stride,
				537	const int16_t *HFilter,
				538	const int16_t *VFilter,
				539	uint8_t *dst_ptr,
				540	unsigned int dst_stride,
				541	unsigned int output_width,
				542	unsigned int output_height) {
				543	#if CONFIG_VP9_HIGHBITDEPTH
				544	if (UUT_->use_highbd_ == 0) {
				545	filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
				546	dst_ptr, dst_stride, output_width, output_height);
				547	} else {
				548	highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
				549	HFilter, VFilter,
				550	CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
				551	output_width, output_height, UUT_->use_highbd_);
				552	}
				553	#else
				554	filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
				555	dst_ptr, dst_stride, output_width, output_height);
				556	#endif
				557	}
				558
				559	const ConvolveFunctions* UUT_;
				560	static uint8_t* input_;
				561	static uint8_t* output_;
				562	static uint8_t* output_ref_;
				563	#if CONFIG_VP9_HIGHBITDEPTH
				564	static uint16_t* input16_;
				565	static uint16_t* output16_;
				566	static uint16_t* output16_ref_;
				567	int mask_;
				568	#endif
				569	};
				570
				571	uint8_t* ConvolveTest::input_ = NULL;
				572	uint8_t* ConvolveTest::output_ = NULL;
				573	uint8_t* ConvolveTest::output_ref_ = NULL;
				574	#if CONFIG_VP9_HIGHBITDEPTH
				575	uint16_t* ConvolveTest::input16_ = NULL;
				576	uint16_t* ConvolveTest::output16_ = NULL;
				577	uint16_t* ConvolveTest::output16_ref_ = NULL;
				578	#endif
				579
				580	TEST_P(ConvolveTest, GuardBlocks) {
				581	CheckGuardBlocks();
				582	}
				583
				584	TEST_P(ConvolveTest, Copy) {
				585	uint8_t* const in = input();
				586	uint8_t* const out = output();
				587
				588	ASM_REGISTER_STATE_CHECK(
				589	UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
				590	Width(), Height()));
				591
				592	CheckGuardBlocks();
				593
				594	for (int y = 0; y < Height(); ++y)
				595	for (int x = 0; x < Width(); ++x)
				596	ASSERT_EQ(lookup(out, y * kOutputStride + x),
				597	lookup(in, y * kInputStride + x))
				598	<< "(" << x << "," << y << ")";
				599	}
				600
				601	TEST_P(ConvolveTest, Avg) {
				602	uint8_t* const in = input();
				603	uint8_t* const out = output();
				604	uint8_t* const out_ref = output_ref();
				605	CopyOutputToRef();
				606
				607	ASM_REGISTER_STATE_CHECK(
				608	UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
				609	Width(), Height()));
				610
				611	CheckGuardBlocks();
				612
				613	for (int y = 0; y < Height(); ++y)
				614	for (int x = 0; x < Width(); ++x)
				615	ASSERT_EQ(lookup(out, y * kOutputStride + x),
				616	ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
				617	lookup(out_ref, y * kOutputStride + x), 1))
				618	<< "(" << x << "," << y << ")";
				619	}
				620
				621	TEST_P(ConvolveTest, CopyHoriz) {
				622	uint8_t* const in = input();
				623	uint8_t* const out = output();
				624	DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
				625
				626	ASM_REGISTER_STATE_CHECK(
				627	UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
				628	Width(), Height()));
				629
				630	CheckGuardBlocks();
				631
				632	for (int y = 0; y < Height(); ++y)
				633	for (int x = 0; x < Width(); ++x)
				634	ASSERT_EQ(lookup(out, y * kOutputStride + x),
				635	lookup(in, y * kInputStride + x))
				636	<< "(" << x << "," << y << ")";
				637	}
				638
				639	TEST_P(ConvolveTest, CopyVert) {
				640	uint8_t* const in = input();
				641	uint8_t* const out = output();
				642	DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
				643
				644	ASM_REGISTER_STATE_CHECK(
				645	UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
				646	Width(), Height()));
				647
				648	CheckGuardBlocks();
				649
				650	for (int y = 0; y < Height(); ++y)
				651	for (int x = 0; x < Width(); ++x)
				652	ASSERT_EQ(lookup(out, y * kOutputStride + x),
				653	lookup(in, y * kInputStride + x))
				654	<< "(" << x << "," << y << ")";
				655	}
				656
				657	TEST_P(ConvolveTest, Copy2D) {
				658	uint8_t* const in = input();
				659	uint8_t* const out = output();
				660	DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
				661
				662	ASM_REGISTER_STATE_CHECK(
				663	UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8,
				664	16, Width(), Height()));
				665
				666	CheckGuardBlocks();
				667
				668	for (int y = 0; y < Height(); ++y)
				669	for (int x = 0; x < Width(); ++x)
				670	ASSERT_EQ(lookup(out, y * kOutputStride + x),
				671	lookup(in, y * kInputStride + x))
				672	<< "(" << x << "," << y << ")";
				673	}
				674
				675	const int kNumFilterBanks = 4;
				676	const int kNumFilters = 16;
				677
				678	TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
				679	for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
				680	const InterpKernel *filters =
				681	vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
				682	for (int i = 0; i < kNumFilters; i++) {
				683	const int p0 = filters[i][0] + filters[i][1];
				684	const int p1 = filters[i][2] + filters[i][3];
				685	const int p2 = filters[i][4] + filters[i][5];
				686	const int p3 = filters[i][6] + filters[i][7];
				687	EXPECT_LE(p0, 128);
				688	EXPECT_LE(p1, 128);
				689	EXPECT_LE(p2, 128);
				690	EXPECT_LE(p3, 128);
				691	EXPECT_LE(p0 + p3, 128);
				692	EXPECT_LE(p0 + p3 + p1, 128);
				693	EXPECT_LE(p0 + p3 + p1 + p2, 128);
				694	EXPECT_EQ(p0 + p1 + p2 + p3, 128);
				695	}
				696	}
				697	}
				698
				699	const int16_t kInvalidFilter[8] = { 0 };
				700
				701	TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
				702	uint8_t* const in = input();
				703	uint8_t* const out = output();
				704	#if CONFIG_VP9_HIGHBITDEPTH
				705	uint8_t ref8[kOutputStride * kMaxDimension];
				706	uint16_t ref16[kOutputStride * kMaxDimension];
				707	uint8_t* ref;
				708	if (UUT_->use_highbd_ == 0) {
				709	ref = ref8;
				710	} else {
				711	ref = CONVERT_TO_BYTEPTR(ref16);
				712	}
				713	#else
				714	uint8_t ref[kOutputStride * kMaxDimension];
				715	#endif
				716
				717	for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
				718	const InterpKernel *filters =
				719	vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
				720
				721	for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
				722	for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
				723	wrapper_filter_block2d_8_c(in, kInputStride,
				724	filters[filter_x], filters[filter_y],
				725	ref, kOutputStride,
				726	Width(), Height());
				727
				728	if (filter_x && filter_y)
				729	ASM_REGISTER_STATE_CHECK(
				730	UUT_->hv8_(in, kInputStride, out, kOutputStride,
				731	filters[filter_x], 16, filters[filter_y], 16,
				732	Width(), Height()));
				733	else if (filter_y)
				734	ASM_REGISTER_STATE_CHECK(
				735	UUT_->v8_(in, kInputStride, out, kOutputStride,
				736	kInvalidFilter, 16, filters[filter_y], 16,
				737	Width(), Height()));
				738	else if (filter_x)
				739	ASM_REGISTER_STATE_CHECK(
				740	UUT_->h8_(in, kInputStride, out, kOutputStride,
				741	filters[filter_x], 16, kInvalidFilter, 16,
				742	Width(), Height()));
				743	else
				744	ASM_REGISTER_STATE_CHECK(
				745	UUT_->copy_(in, kInputStride, out, kOutputStride,
				746	kInvalidFilter, 0, kInvalidFilter, 0,
				747	Width(), Height()));
				748
				749	CheckGuardBlocks();
				750
				751	for (int y = 0; y < Height(); ++y)
				752	for (int x = 0; x < Width(); ++x)
				753	ASSERT_EQ(lookup(ref, y * kOutputStride + x),
				754	lookup(out, y * kOutputStride + x))
				755	<< "mismatch at (" << x << "," << y << "), "
				756	<< "filters (" << filter_bank << ","
				757	<< filter_x << "," << filter_y << ")";
				758	}
				759	}
				760	}
				761	}
				762
				763	TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
				764	uint8_t* const in = input();
				765	uint8_t* const out = output();
				766	#if CONFIG_VP9_HIGHBITDEPTH
				767	uint8_t ref8[kOutputStride * kMaxDimension];
				768	uint16_t ref16[kOutputStride * kMaxDimension];
				769	uint8_t* ref;
				770	if (UUT_->use_highbd_ == 0) {
				771	ref = ref8;
				772	} else {
				773	ref = CONVERT_TO_BYTEPTR(ref16);
				774	}
				775	#else
				776	uint8_t ref[kOutputStride * kMaxDimension];
				777	#endif
				778
				779	// Populate ref and out with some random data
				780	::libvpx_test::ACMRandom prng;
				781	for (int y = 0; y < Height(); ++y) {
				782	for (int x = 0; x < Width(); ++x) {
				783	uint16_t r;
				784	#if CONFIG_VP9_HIGHBITDEPTH
				785	if (UUT_->use_highbd_ == 0 \|\| UUT_->use_highbd_ == 8) {
				786	r = prng.Rand8Extremes();
				787	} else {
				788	r = prng.Rand16() & mask_;
				789	}
				790	#else
				791	r = prng.Rand8Extremes();
				792	#endif
				793
				794	assign_val(out, y * kOutputStride + x, r);
				795	assign_val(ref, y * kOutputStride + x, r);
				796	}
				797	}
				798
				799	for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
				800	const InterpKernel *filters =
				801	vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
				802
				803	for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
				804	for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
				805	wrapper_filter_average_block2d_8_c(in, kInputStride,
				806	filters[filter_x], filters[filter_y],
				807	ref, kOutputStride,
				808	Width(), Height());
				809
				810	if (filter_x && filter_y)
				811	ASM_REGISTER_STATE_CHECK(
				812	UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
				813	filters[filter_x], 16, filters[filter_y], 16,
				814	Width(), Height()));
				815	else if (filter_y)
				816	ASM_REGISTER_STATE_CHECK(
				817	UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
				818	kInvalidFilter, 16, filters[filter_y], 16,
				819	Width(), Height()));
				820	else if (filter_x)
				821	ASM_REGISTER_STATE_CHECK(
				822	UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
				823	filters[filter_x], 16, kInvalidFilter, 16,
				824	Width(), Height()));
				825	else
				826	ASM_REGISTER_STATE_CHECK(
				827	UUT_->avg_(in, kInputStride, out, kOutputStride,
				828	kInvalidFilter, 0, kInvalidFilter, 0,
				829	Width(), Height()));
				830
				831	CheckGuardBlocks();
				832
				833	for (int y = 0; y < Height(); ++y)
				834	for (int x = 0; x < Width(); ++x)
				835	ASSERT_EQ(lookup(ref, y * kOutputStride + x),
				836	lookup(out, y * kOutputStride + x))
				837	<< "mismatch at (" << x << "," << y << "), "
				838	<< "filters (" << filter_bank << ","
				839	<< filter_x << "," << filter_y << ")";
				840	}
				841	}
				842	}
				843	}
				844
				845	TEST_P(ConvolveTest, FilterExtremes) {
				846	uint8_t *const in = input();
				847	uint8_t *const out = output();
				848	#if CONFIG_VP9_HIGHBITDEPTH
				849	uint8_t ref8[kOutputStride * kMaxDimension];
				850	uint16_t ref16[kOutputStride * kMaxDimension];
				851	uint8_t *ref;
				852	if (UUT_->use_highbd_ == 0) {
				853	ref = ref8;
				854	} else {
				855	ref = CONVERT_TO_BYTEPTR(ref16);
				856	}
				857	#else
				858	uint8_t ref[kOutputStride * kMaxDimension];
				859	#endif
				860
				861	// Populate ref and out with some random data
				862	::libvpx_test::ACMRandom prng;
				863	for (int y = 0; y < Height(); ++y) {
				864	for (int x = 0; x < Width(); ++x) {
				865	uint16_t r;
				866	#if CONFIG_VP9_HIGHBITDEPTH
				867	if (UUT_->use_highbd_ == 0 \|\| UUT_->use_highbd_ == 8) {
				868	r = prng.Rand8Extremes();
				869	} else {
				870	r = prng.Rand16() & mask_;
				871	}
				872	#else
				873	r = prng.Rand8Extremes();
				874	#endif
				875	assign_val(out, y * kOutputStride + x, r);
				876	assign_val(ref, y * kOutputStride + x, r);
				877	}
				878	}
				879
				880	for (int axis = 0; axis < 2; axis++) {
				881	int seed_val = 0;
				882	while (seed_val < 256) {
				883	for (int y = 0; y < 8; ++y) {
				884	for (int x = 0; x < 8; ++x) {
				885	#if CONFIG_VP9_HIGHBITDEPTH
				886	assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
				887	((seed_val >> (axis ? y : x)) & 1) * mask_);
				888	#else
				889	assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
				890	((seed_val >> (axis ? y : x)) & 1) * 255);
				891	#endif
				892	if (axis) seed_val++;
				893	}
				894	if (axis)
				895	seed_val-= 8;
				896	else
				897	seed_val++;
				898	}
				899	if (axis) seed_val += 8;
				900
				901	for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
				902	const InterpKernel *filters =
				903	vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
				904	for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
				905	for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
				906	wrapper_filter_block2d_8_c(in, kInputStride,
				907	filters[filter_x], filters[filter_y],
				908	ref, kOutputStride,
				909	Width(), Height());
				910	if (filter_x && filter_y)
				911	ASM_REGISTER_STATE_CHECK(
				912	UUT_->hv8_(in, kInputStride, out, kOutputStride,
				913	filters[filter_x], 16, filters[filter_y], 16,
				914	Width(), Height()));
				915	else if (filter_y)
				916	ASM_REGISTER_STATE_CHECK(
				917	UUT_->v8_(in, kInputStride, out, kOutputStride,
				918	kInvalidFilter, 16, filters[filter_y], 16,
				919	Width(), Height()));
				920	else if (filter_x)
				921	ASM_REGISTER_STATE_CHECK(
				922	UUT_->h8_(in, kInputStride, out, kOutputStride,
				923	filters[filter_x], 16, kInvalidFilter, 16,
				924	Width(), Height()));
				925	else
				926	ASM_REGISTER_STATE_CHECK(
				927	UUT_->copy_(in, kInputStride, out, kOutputStride,
				928	kInvalidFilter, 0, kInvalidFilter, 0,
				929	Width(), Height()));
				930
				931	for (int y = 0; y < Height(); ++y)
				932	for (int x = 0; x < Width(); ++x)
				933	ASSERT_EQ(lookup(ref, y * kOutputStride + x),
				934	lookup(out, y * kOutputStride + x))
				935	<< "mismatch at (" << x << "," << y << "), "
				936	<< "filters (" << filter_bank << ","
				937	<< filter_x << "," << filter_y << ")";
				938	}
				939	}
				940	}
				941	}
				942	}
				943	}
				944
				945	/* This test exercises that enough rows and columns are filtered with every
				946	possible initial fractional positions and scaling steps. */
				947	TEST_P(ConvolveTest, CheckScalingFiltering) {
				948	uint8_t* const in = input();
				949	uint8_t* const out = output();
				950	const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
				951
				952	SetConstantInput(127);
				953
				954	for (int frac = 0; frac < 16; ++frac) {
				955	for (int step = 1; step <= 32; ++step) {
				956	/* Test the horizontal and vertical filters in combination. */
				957	ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
				958	eighttap[frac], step,
				959	eighttap[frac], step,
				960	Width(), Height()));
				961
				962	CheckGuardBlocks();
				963
				964	for (int y = 0; y < Height(); ++y) {
				965	for (int x = 0; x < Width(); ++x) {
				966	ASSERT_EQ(lookup(in, y * kInputStride + x),
				967	lookup(out, y * kOutputStride + x))
				968	<< "x == " << x << ", y == " << y
				969	<< ", frac == " << frac << ", step == " << step;
				970	}
				971	}
				972	}
				973	}
				974	}
				975
				976	using std::tr1::make_tuple;
				977
				978	#if CONFIG_VP9_HIGHBITDEPTH
				979	#define WRAP(func, bd) \
				980	void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \
				981	uint8_t *dst, ptrdiff_t dst_stride, \
				982	const int16_t *filter_x, \
				983	int filter_x_stride, \
				984	const int16_t *filter_y, \
				985	int filter_y_stride, \
				986	int w, int h) { \
				987	vpx_highbd_ ## func(src, src_stride, dst, dst_stride, filter_x, \
				988	filter_x_stride, filter_y, filter_y_stride, \
				989	w, h, bd); \
				990	}
				991	#if HAVE_SSE2 && ARCH_X86_64
				992	#if CONFIG_USE_X86INC
				993	WRAP(convolve_copy_sse2, 8)
				994	WRAP(convolve_avg_sse2, 8)
				995	WRAP(convolve_copy_sse2, 10)
				996	WRAP(convolve_avg_sse2, 10)
				997	WRAP(convolve_copy_sse2, 12)
				998	WRAP(convolve_avg_sse2, 12)
				999	#endif // CONFIG_USE_X86INC
				1000	WRAP(convolve8_horiz_sse2, 8)
				1001	WRAP(convolve8_avg_horiz_sse2, 8)
				1002	WRAP(convolve8_vert_sse2, 8)
				1003	WRAP(convolve8_avg_vert_sse2, 8)
				1004	WRAP(convolve8_sse2, 8)
				1005	WRAP(convolve8_avg_sse2, 8)
				1006	WRAP(convolve8_horiz_sse2, 10)
				1007	WRAP(convolve8_avg_horiz_sse2, 10)
				1008	WRAP(convolve8_vert_sse2, 10)
				1009	WRAP(convolve8_avg_vert_sse2, 10)
				1010	WRAP(convolve8_sse2, 10)
				1011	WRAP(convolve8_avg_sse2, 10)
				1012	WRAP(convolve8_horiz_sse2, 12)
				1013	WRAP(convolve8_avg_horiz_sse2, 12)
				1014	WRAP(convolve8_vert_sse2, 12)
				1015	WRAP(convolve8_avg_vert_sse2, 12)
				1016	WRAP(convolve8_sse2, 12)
				1017	WRAP(convolve8_avg_sse2, 12)
				1018	#endif // HAVE_SSE2 && ARCH_X86_64
				1019
				1020	WRAP(convolve_copy_c, 8)
				1021	WRAP(convolve_avg_c, 8)
				1022	WRAP(convolve8_horiz_c, 8)
				1023	WRAP(convolve8_avg_horiz_c, 8)
				1024	WRAP(convolve8_vert_c, 8)
				1025	WRAP(convolve8_avg_vert_c, 8)
				1026	WRAP(convolve8_c, 8)
				1027	WRAP(convolve8_avg_c, 8)
				1028	WRAP(convolve_copy_c, 10)
				1029	WRAP(convolve_avg_c, 10)
				1030	WRAP(convolve8_horiz_c, 10)
				1031	WRAP(convolve8_avg_horiz_c, 10)
				1032	WRAP(convolve8_vert_c, 10)
				1033	WRAP(convolve8_avg_vert_c, 10)
				1034	WRAP(convolve8_c, 10)
				1035	WRAP(convolve8_avg_c, 10)
				1036	WRAP(convolve_copy_c, 12)
				1037	WRAP(convolve_avg_c, 12)
				1038	WRAP(convolve8_horiz_c, 12)
				1039	WRAP(convolve8_avg_horiz_c, 12)
				1040	WRAP(convolve8_vert_c, 12)
				1041	WRAP(convolve8_avg_vert_c, 12)
				1042	WRAP(convolve8_c, 12)
				1043	WRAP(convolve8_avg_c, 12)
				1044	#undef WRAP
				1045
				1046	const ConvolveFunctions convolve8_c(
				1047	wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
				1048	wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
				1049	wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
				1050	wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
				1051	wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
				1052	wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
				1053	wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
				1054	const ConvolveFunctions convolve10_c(
				1055	wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
				1056	wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
				1057	wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
				1058	wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
				1059	wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
				1060	wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
				1061	wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
				1062	const ConvolveFunctions convolve12_c(
				1063	wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
				1064	wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
				1065	wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
				1066	wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
				1067	wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
				1068	wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
				1069	wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
				1070	const ConvolveParam kArrayConvolve_c[] = {
				1071	ALL_SIZES(convolve8_c),
				1072	ALL_SIZES(convolve10_c),
				1073	ALL_SIZES(convolve12_c)
				1074	};
				1075
				1076	#else
				1077	const ConvolveFunctions convolve8_c(
				1078	vpx_convolve_copy_c, vpx_convolve_avg_c,
				1079	vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
				1080	vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
				1081	vpx_convolve8_c, vpx_convolve8_avg_c,
				1082	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1083	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1084	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1085	const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
				1086	#endif
				1087	INSTANTIATE_TEST_CASE_P(C, ConvolveTest,
				1088	::testing::ValuesIn(kArrayConvolve_c));
				1089
				1090	#if HAVE_SSE2 && ARCH_X86_64
				1091	#if CONFIG_VP9_HIGHBITDEPTH
				1092	const ConvolveFunctions convolve8_sse2(
				1093	#if CONFIG_USE_X86INC
				1094	wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
				1095	#else
				1096	wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
				1097	#endif // CONFIG_USE_X86INC
				1098	wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
				1099	wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
				1100	wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
				1101	wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
				1102	wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
				1103	wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
				1104	const ConvolveFunctions convolve10_sse2(
				1105	#if CONFIG_USE_X86INC
				1106	wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
				1107	#else
				1108	wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
				1109	#endif // CONFIG_USE_X86INC
				1110	wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
				1111	wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
				1112	wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
				1113	wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
				1114	wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
				1115	wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
				1116	const ConvolveFunctions convolve12_sse2(
				1117	#if CONFIG_USE_X86INC
				1118	wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
				1119	#else
				1120	wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
				1121	#endif // CONFIG_USE_X86INC
				1122	wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
				1123	wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
				1124	wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
				1125	wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
				1126	wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
				1127	wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
				1128	const ConvolveParam kArrayConvolve_sse2[] = {
				1129	ALL_SIZES(convolve8_sse2),
				1130	ALL_SIZES(convolve10_sse2),
				1131	ALL_SIZES(convolve12_sse2)
				1132	};
				1133	#else
				1134	const ConvolveFunctions convolve8_sse2(
				1135	#if CONFIG_USE_X86INC
				1136	vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
				1137	#else
				1138	vpx_convolve_copy_c, vpx_convolve_avg_c,
				1139	#endif // CONFIG_USE_X86INC
				1140	vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
				1141	vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
				1142	vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
				1143	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1144	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1145	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1146
				1147	const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
				1148	#endif // CONFIG_VP9_HIGHBITDEPTH
				1149	INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
				1150	::testing::ValuesIn(kArrayConvolve_sse2));
				1151	#endif
				1152
				1153	#if HAVE_SSSE3
				1154	const ConvolveFunctions convolve8_ssse3(
				1155	vpx_convolve_copy_c, vpx_convolve_avg_c,
				1156	vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
				1157	vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
				1158	vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
				1159	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1160	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1161	vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
				1162
				1163	const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
				1164	INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
				1165	::testing::ValuesIn(kArrayConvolve8_ssse3));
				1166	#endif
				1167
				1168	#if HAVE_AVX2 && HAVE_SSSE3
				1169	const ConvolveFunctions convolve8_avx2(
				1170	vpx_convolve_copy_c, vpx_convolve_avg_c,
				1171	vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
				1172	vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
				1173	vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
				1174	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1175	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1176	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1177
				1178	const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
				1179	INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
				1180	::testing::ValuesIn(kArrayConvolve8_avx2));
				1181	#endif // HAVE_AVX2 && HAVE_SSSE3
				1182
				1183	#if HAVE_NEON
				1184	#if HAVE_NEON_ASM
				1185	const ConvolveFunctions convolve8_neon(
				1186	vpx_convolve_copy_neon, vpx_convolve_avg_neon,
				1187	vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
				1188	vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
				1189	vpx_convolve8_neon, vpx_convolve8_avg_neon,
				1190	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1191	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1192	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1193	#else // HAVE_NEON
				1194	const ConvolveFunctions convolve8_neon(
				1195	vpx_convolve_copy_neon, vpx_convolve_avg_neon,
				1196	vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
				1197	vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
				1198	vpx_convolve8_neon, vpx_convolve8_avg_neon,
				1199	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1200	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1201	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1202	#endif // HAVE_NEON_ASM
				1203
				1204	const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
				1205	INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
				1206	::testing::ValuesIn(kArrayConvolve8_neon));
				1207	#endif // HAVE_NEON
				1208
				1209	#if HAVE_DSPR2
				1210	const ConvolveFunctions convolve8_dspr2(
				1211	vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
				1212	vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
				1213	vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
				1214	vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
				1215	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1216	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1217	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1218
				1219	const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
				1220	INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
				1221	::testing::ValuesIn(kArrayConvolve8_dspr2));
				1222	#endif // HAVE_DSPR2
				1223
				1224	#if HAVE_MSA
				1225	const ConvolveFunctions convolve8_msa(
				1226	vpx_convolve_copy_msa, vpx_convolve_avg_msa,
				1227	vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
				1228	vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
				1229	vpx_convolve8_msa, vpx_convolve8_avg_msa,
				1230	vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
				1231	vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
				1232	vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
				1233
				1234	const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
				1235	INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
				1236	::testing::ValuesIn(kArrayConvolve8_msa));
				1237	#endif // HAVE_MSA
				1238	} // namespace