third_party/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c - cobalt - Git at Google

 // Copyright 2014 Google Inc. All Rights Reserved.
 //
 // Use of this source code is governed by a BSD-style license
 // that can be found in the COPYING file in the root of the source
 // tree. An additional intellectual property rights grant can be found
 // in the file PATENTS. All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Utilities for processing transparent channel.
 //
 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
 //            Djordje Pesut  (djordje.pesut@imgtec.com)

 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_MIPS_DSP_R2)

 static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
                                    int width, int height,
                                    uint8_t* dst, int dst_stride) {
   uint32_t alpha_mask = 0xffffffff;
   int i, j, temp0;

   for (j = 0; j < height; ++j) {
     uint8_t* pdst = dst;
     const uint8_t* palpha = alpha;
     for (i = 0; i < (width >> 2); ++i) {
       int temp1, temp2, temp3;

       __asm__ volatile (
         "ulw    %[temp0],      0(%[palpha])                \n\t"
         "addiu  %[palpha],     %[palpha],     4            \n\t"
         "addiu  %[pdst],       %[pdst],       16           \n\t"
         "srl    %[temp1],      %[temp0],      8            \n\t"
         "srl    %[temp2],      %[temp0],      16           \n\t"
         "srl    %[temp3],      %[temp0],      24           \n\t"
         "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
         "sb     %[temp0],      -16(%[pdst])                \n\t"
         "sb     %[temp1],      -12(%[pdst])                \n\t"
         "sb     %[temp2],      -8(%[pdst])                 \n\t"
         "sb     %[temp3],      -4(%[pdst])                 \n\t"
         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
           [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
           [alpha_mask]"+r"(alpha_mask)
         :
         : "memory"
       );
     }

     for (i = 0; i < (width & 3); ++i) {
       __asm__ volatile (
         "lbu    %[temp0],      0(%[palpha])                \n\t"
         "addiu  %[palpha],     %[palpha],     1            \n\t"
         "sb     %[temp0],      0(%[pdst])                  \n\t"
         "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
         "addiu  %[pdst],       %[pdst],       4            \n\t"
         : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
           [alpha_mask]"+r"(alpha_mask)
         :
         : "memory"
       );
     }
     alpha += alpha_stride;
     dst += dst_stride;
   }

   __asm__ volatile (
     "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
     "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
     "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
     "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
     "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
     "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
     : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
     :
   );

   return (alpha_mask != 0xff);
 }

 static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
                                   int inverse) {
   int x;
   const uint32_t c_00ffffff = 0x00ffffffu;
   const uint32_t c_ff000000 = 0xff000000u;
   const uint32_t c_8000000  = 0x00800000u;
   const uint32_t c_8000080  = 0x00800080u;
   for (x = 0; x < width; ++x) {
     const uint32_t argb = ptr[x];
     if (argb < 0xff000000u) {      // alpha < 255
       if (argb <= 0x00ffffffu) {   // alpha == 0
         ptr[x] = 0;
       } else {
         int temp0, temp1, temp2, temp3, alpha;
         __asm__ volatile (
           "srl          %[alpha],   %[argb],       24                \n\t"
           "replv.qb     %[temp0],   %[alpha]                         \n\t"
           "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
           "beqz         %[inverse], 0f                               \n\t"
           "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
           "mflo         %[temp0]                                     \n\t"
         "0:                                                          \n\t"
           "andi         %[temp1],   %[argb],       0xff              \n\t"
           "ext          %[temp2],   %[argb],       8,             8  \n\t"
           "ext          %[temp3],   %[argb],       16,            8  \n\t"
           "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
           "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
           "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
           "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
           "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
           "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
           "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
           "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
           : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
             [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
           : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
             [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
             [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
           : "memory", "hi", "lo"
         );
         ptr[x] = temp1;
       }
     }
   }
 }

 #ifdef WORDS_BIGENDIAN
 static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
                                const uint8_t* g, const uint8_t* b, int len,
                                uint32_t* out) {
   int temp0, temp1, temp2, temp3, offset;
   const int rest = len & 1;
   const uint32_t* const loop_end = out + len - rest;
   const int step = 4;
   __asm__ volatile (
     "xor          %[offset],   %[offset], %[offset]    \n\t"
     "beq          %[loop_end], %[out],    0f           \n\t"
   "2:                                                  \n\t"
     "lbux         %[temp0],    %[offset](%[a])         \n\t"
     "lbux         %[temp1],    %[offset](%[r])         \n\t"
     "lbux         %[temp2],    %[offset](%[g])         \n\t"
     "lbux         %[temp3],    %[offset](%[b])         \n\t"
     "ins          %[temp1],    %[temp0],  16,     16   \n\t"
     "ins          %[temp3],    %[temp2],  16,     16   \n\t"
     "addiu        %[out],      %[out],    4            \n\t"
     "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
     "sw           %[temp0],    -4(%[out])              \n\t"
     "addu         %[offset],   %[offset], %[step]      \n\t"
     "bne          %[loop_end], %[out],    2b           \n\t"
   "0:                                                  \n\t"
     "beq          %[rest],     $zero,     1f           \n\t"
     "lbux         %[temp0],    %[offset](%[a])         \n\t"
     "lbux         %[temp1],    %[offset](%[r])         \n\t"
     "lbux         %[temp2],    %[offset](%[g])         \n\t"
     "lbux         %[temp3],    %[offset](%[b])         \n\t"
     "ins          %[temp1],    %[temp0],  16,     16   \n\t"
     "ins          %[temp3],    %[temp2],  16,     16   \n\t"
     "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
     "sw           %[temp0],    0(%[out])               \n\t"
   "1:                                                  \n\t"
     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
       [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
     : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
       [loop_end]"r"(loop_end), [rest]"r"(rest)
     : "memory"
   );
 }
 #endif  // WORDS_BIGENDIAN

 static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
                               const uint8_t* b, int len, int step,
                               uint32_t* out) {
   int temp0, temp1, temp2, offset;
   const int rest = len & 1;
   const int a = 0xff;
   const uint32_t* const loop_end = out + len - rest;
   __asm__ volatile (
     "xor          %[offset],   %[offset], %[offset]    \n\t"
     "beq          %[loop_end], %[out],    0f           \n\t"
   "2:                                                  \n\t"
     "lbux         %[temp0],    %[offset](%[r])         \n\t"
     "lbux         %[temp1],    %[offset](%[g])         \n\t"
     "lbux         %[temp2],    %[offset](%[b])         \n\t"
     "ins          %[temp0],    %[a],      16,     16   \n\t"
     "ins          %[temp2],    %[temp1],  16,     16   \n\t"
     "addiu        %[out],      %[out],    4            \n\t"
     "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
     "sw           %[temp0],    -4(%[out])              \n\t"
     "addu         %[offset],   %[offset], %[step]      \n\t"
     "bne          %[loop_end], %[out],    2b           \n\t"
   "0:                                                  \n\t"
     "beq          %[rest],     $zero,     1f           \n\t"
     "lbux         %[temp0],    %[offset](%[r])         \n\t"
     "lbux         %[temp1],    %[offset](%[g])         \n\t"
     "lbux         %[temp2],    %[offset](%[b])         \n\t"
     "ins          %[temp0],    %[a],      16,     16   \n\t"
     "ins          %[temp2],    %[temp1],  16,     16   \n\t"
     "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
     "sw           %[temp0],    0(%[out])               \n\t"
   "1:                                                  \n\t"
     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
       [offset]"=&r"(offset), [out]"+&r"(out)
     : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
       [loop_end]"r"(loop_end), [rest]"r"(rest)
     : "memory"
   );
 }

 //------------------------------------------------------------------------------
 // Entry point

 extern void WebPInitAlphaProcessingMIPSdspR2(void);

 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
   WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
   WebPMultARGBRow = MultARGBRow_MIPSdspR2;
 #ifdef WORDS_BIGENDIAN
   WebPPackARGB = PackARGB_MIPSdspR2;
 #endif
   WebPPackRGB = PackRGB_MIPSdspR2;
 }

 #else  // !WEBP_USE_MIPS_DSP_R2

 WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)

 #endif  // WEBP_USE_MIPS_DSP_R2
	// Copyright 2014 Google Inc. All Rights Reserved.
	//
	// Use of this source code is governed by a BSD-style license
	// that can be found in the COPYING file in the root of the source
	// tree. An additional intellectual property rights grant can be found
	// in the file PATENTS. All contributing project authors may
	// be found in the AUTHORS file in the root of the source tree.
	// -----------------------------------------------------------------------------
	//
	// Utilities for processing transparent channel.
	//
	// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
	// Djordje Pesut (djordje.pesut@imgtec.com)

	#include "src/dsp/dsp.h"

	#if defined(WEBP_USE_MIPS_DSP_R2)

	static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
	int width, int height,
	uint8_t* dst, int dst_stride) {
	uint32_t alpha_mask = 0xffffffff;
	int i, j, temp0;

	for (j = 0; j < height; ++j) {
	uint8_t* pdst = dst;
	const uint8_t* palpha = alpha;
	for (i = 0; i < (width >> 2); ++i) {
	int temp1, temp2, temp3;

	__asm__ volatile (
	"ulw %[temp0], 0(%[palpha]) \n\t"
	"addiu %[palpha], %[palpha], 4 \n\t"
	"addiu %[pdst], %[pdst], 16 \n\t"
	"srl %[temp1], %[temp0], 8 \n\t"
	"srl %[temp2], %[temp0], 16 \n\t"
	"srl %[temp3], %[temp0], 24 \n\t"
	"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
	"sb %[temp0], -16(%[pdst]) \n\t"
	"sb %[temp1], -12(%[pdst]) \n\t"
	"sb %[temp2], -8(%[pdst]) \n\t"
	"sb %[temp3], -4(%[pdst]) \n\t"
	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
	[temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
	[alpha_mask]"+r"(alpha_mask)
	:
	: "memory"
	);
	}

	for (i = 0; i < (width & 3); ++i) {
	__asm__ volatile (
	"lbu %[temp0], 0(%[palpha]) \n\t"
	"addiu %[palpha], %[palpha], 1 \n\t"
	"sb %[temp0], 0(%[pdst]) \n\t"
	"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
	"addiu %[pdst], %[pdst], 4 \n\t"
	: [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
	[alpha_mask]"+r"(alpha_mask)
	:
	: "memory"
	);
	}
	alpha += alpha_stride;
	dst += dst_stride;
	}

	__asm__ volatile (
	"ext %[temp0], %[alpha_mask], 0, 16 \n\t"
	"srl %[alpha_mask], %[alpha_mask], 16 \n\t"
	"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
	"ext %[temp0], %[alpha_mask], 0, 8 \n\t"
	"srl %[alpha_mask], %[alpha_mask], 8 \n\t"
	"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
	: [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
	:
	);

	return (alpha_mask != 0xff);
	}

	static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
	int inverse) {
	int x;
	const uint32_t c_00ffffff = 0x00ffffffu;
	const uint32_t c_ff000000 = 0xff000000u;
	const uint32_t c_8000000 = 0x00800000u;
	const uint32_t c_8000080 = 0x00800080u;
	for (x = 0; x < width; ++x) {
	const uint32_t argb = ptr[x];
	if (argb < 0xff000000u) { // alpha < 255
	if (argb <= 0x00ffffffu) { // alpha == 0
	ptr[x] = 0;
	} else {
	int temp0, temp1, temp2, temp3, alpha;
	__asm__ volatile (
	"srl %[alpha], %[argb], 24 \n\t"
	"replv.qb %[temp0], %[alpha] \n\t"
	"and %[temp0], %[temp0], %[c_00ffffff] \n\t"
	"beqz %[inverse], 0f \n\t"
	"divu $zero, %[c_ff000000], %[alpha] \n\t"
	"mflo %[temp0] \n\t"
	"0: \n\t"
	"andi %[temp1], %[argb], 0xff \n\t"
	"ext %[temp2], %[argb], 8, 8 \n\t"
	"ext %[temp3], %[argb], 16, 8 \n\t"
	"mul %[temp1], %[temp1], %[temp0] \n\t"
	"mul %[temp2], %[temp2], %[temp0] \n\t"
	"mul %[temp3], %[temp3], %[temp0] \n\t"
	"precrq.ph.w %[temp1], %[temp2], %[temp1] \n\t"
	"addu %[temp3], %[temp3], %[c_8000000] \n\t"
	"addu %[temp1], %[temp1], %[c_8000080] \n\t"
	"precrq.ph.w %[temp3], %[argb], %[temp3] \n\t"
	"precrq.qb.ph %[temp1], %[temp3], %[temp1] \n\t"
	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
	[temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
	: [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
	[c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
	[c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
	: "memory", "hi", "lo"
	);
	ptr[x] = temp1;
	}
	}
	}
	}

	#ifdef WORDS_BIGENDIAN
	static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
	const uint8_t* g, const uint8_t* b, int len,
	uint32_t* out) {
	int temp0, temp1, temp2, temp3, offset;
	const int rest = len & 1;
	const uint32_t* const loop_end = out + len - rest;
	const int step = 4;
	__asm__ volatile (
	"xor %[offset], %[offset], %[offset] \n\t"
	"beq %[loop_end], %[out], 0f \n\t"
	"2: \n\t"
	"lbux %[temp0], %[offset](%[a]) \n\t"
	"lbux %[temp1], %[offset](%[r]) \n\t"
	"lbux %[temp2], %[offset](%[g]) \n\t"
	"lbux %[temp3], %[offset](%[b]) \n\t"
	"ins %[temp1], %[temp0], 16, 16 \n\t"
	"ins %[temp3], %[temp2], 16, 16 \n\t"
	"addiu %[out], %[out], 4 \n\t"
	"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
	"sw %[temp0], -4(%[out]) \n\t"
	"addu %[offset], %[offset], %[step] \n\t"
	"bne %[loop_end], %[out], 2b \n\t"
	"0: \n\t"
	"beq %[rest], $zero, 1f \n\t"
	"lbux %[temp0], %[offset](%[a]) \n\t"
	"lbux %[temp1], %[offset](%[r]) \n\t"
	"lbux %[temp2], %[offset](%[g]) \n\t"
	"lbux %[temp3], %[offset](%[b]) \n\t"
	"ins %[temp1], %[temp0], 16, 16 \n\t"
	"ins %[temp3], %[temp2], 16, 16 \n\t"
	"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
	"sw %[temp0], 0(%[out]) \n\t"
	"1: \n\t"
	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
	[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
	: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
	[loop_end]"r"(loop_end), [rest]"r"(rest)
	: "memory"
	);
	}
	#endif // WORDS_BIGENDIAN

	static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
	const uint8_t* b, int len, int step,
	uint32_t* out) {
	int temp0, temp1, temp2, offset;
	const int rest = len & 1;
	const int a = 0xff;
	const uint32_t* const loop_end = out + len - rest;
	__asm__ volatile (
	"xor %[offset], %[offset], %[offset] \n\t"
	"beq %[loop_end], %[out], 0f \n\t"
	"2: \n\t"
	"lbux %[temp0], %[offset](%[r]) \n\t"
	"lbux %[temp1], %[offset](%[g]) \n\t"
	"lbux %[temp2], %[offset](%[b]) \n\t"
	"ins %[temp0], %[a], 16, 16 \n\t"
	"ins %[temp2], %[temp1], 16, 16 \n\t"
	"addiu %[out], %[out], 4 \n\t"
	"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
	"sw %[temp0], -4(%[out]) \n\t"
	"addu %[offset], %[offset], %[step] \n\t"
	"bne %[loop_end], %[out], 2b \n\t"
	"0: \n\t"
	"beq %[rest], $zero, 1f \n\t"
	"lbux %[temp0], %[offset](%[r]) \n\t"
	"lbux %[temp1], %[offset](%[g]) \n\t"
	"lbux %[temp2], %[offset](%[b]) \n\t"
	"ins %[temp0], %[a], 16, 16 \n\t"
	"ins %[temp2], %[temp1], 16, 16 \n\t"
	"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
	"sw %[temp0], 0(%[out]) \n\t"
	"1: \n\t"
	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
	[offset]"=&r"(offset), [out]"+&r"(out)
	: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
	[loop_end]"r"(loop_end), [rest]"r"(rest)
	: "memory"
	);
	}

	//------------------------------------------------------------------------------
	// Entry point

	extern void WebPInitAlphaProcessingMIPSdspR2(void);

	WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
	WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
	WebPMultARGBRow = MultARGBRow_MIPSdspR2;
	#ifdef WORDS_BIGENDIAN
	WebPPackARGB = PackARGB_MIPSdspR2;
	#endif
	WebPPackRGB = PackRGB_MIPSdspR2;
	}

	#else // !WEBP_USE_MIPS_DSP_R2

	WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)

	#endif // WEBP_USE_MIPS_DSP_R2