blob: 86ae50288a9bd4eac9a01af397cbe32fc865c409 [file] [log] [blame]
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/*
ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q
*/
#ifndef SkBlend_opts_DEFINED
#define SkBlend_opts_DEFINED
#include "SkNx.h"
#include "SkPM4fPriv.h"
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
#include <immintrin.h>
#endif
namespace SK_OPTS_NS {
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if (src >= 0xFF000000) {
*dst = src;
return;
}
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst , *src );
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
static inline __m128i load(const uint32_t* p) {
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
}
static inline void store(uint32_t* p, __m128i v) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
}
static void srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
while (ndst > 0) {
int count = SkTMin(ndst, nsrc);
ndst -= count;
const uint32_t* src = srcStart;
const uint32_t* end = dst + (count & ~3);
while (dst < end) {
__m128i pixels = load(src);
if (_mm_testc_si128(pixels, alphaMask)) {
store(dst, pixels);
} else if (!_mm_testz_si128(pixels, alphaMask)) {
srcover_srgb_srgb_4(dst, src);
}
dst += 4;
src += 4;
}
count = count & 3;
while (count-- > 0) {
srcover_srgb_srgb_1(dst++, *src++);
}
}
}
#else
static void srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
#endif
} // namespace SK_OPTS_NS
#endif//SkBlend_opts_DEFINED