third_party/skia/experimental/graphite/src/UniformManager.cpp - cobalt - Git at Google

 /*
  * Copyright 2021 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "experimental/graphite/src/UniformManager.h"

 #include "experimental/graphite/src/DrawTypes.h"
 #include "include/core/SkMatrix.h"
 #include "include/private/SkHalf.h"
 #include "include/private/SkTemplates.h"
 #include "src/core/SkUniform.h"

 // ensure that these types are the sizes the uniform data is expecting
 static_assert(sizeof(int32_t) == 4);
 static_assert(sizeof(float) == 4);
 static_assert(sizeof(int16_t) == 2);
 static_assert(sizeof(SkHalf) == 2);

 namespace skgpu {

 //////////////////////////////////////////////////////////////////////////////

 UniformManager::UniformManager(Layout layout) : fLayout(layout) {}

 template<typename BaseType>
 static constexpr size_t tight_vec_size(int vecLength) {
     return sizeof(BaseType) * vecLength;
 }

 /**
  * From Section 7.6.2.2 "Standard Uniform Block Layout":
  *  1. If the member is a scalar consuming N basic machine units, the base alignment is N.
  *  2. If the member is a two- or four-component vector with components consuming N basic machine
  *     units, the base alignment is 2N or 4N, respectively.
  *  3. If the member is a three-component vector with components consuming N
  *     basic machine units, the base alignment is 4N.
  *  4. If the member is an array of scalars or vectors, the base alignment and array
  *     stride are set to match the base alignment of a single array element, according
  *     to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
  *     array may have padding at the end; the base offset of the member following
  *     the array is rounded up to the next multiple of the base alignment.
  *  5. If the member is a column-major matrix with C columns and R rows, the
  *     matrix is stored identically to an array of C column vectors with R components each,
  *     according to rule (4).
  *  6. If the member is an array of S column-major matrices with C columns and
  *     R rows, the matrix is stored identically to a row of S × C column vectors
  *     with R components each, according to rule (4).
  *  7. If the member is a row-major matrix with C columns and R rows, the matrix
  *     is stored identically to an array of R row vectors with C components each,
  *     according to rule (4).
  *  8. If the member is an array of S row-major matrices with C columns and R
  *     rows, the matrix is stored identically to a row of S × R row vectors with C
  *    components each, according to rule (4).
  *  9. If the member is a structure, the base alignment of the structure is N, where
  *     N is the largest base alignment value of any of its members, and rounded
  *     up to the base alignment of a vec4. The individual members of this substructure are then
  *     assigned offsets by applying this set of rules recursively,
  *     where the base offset of the first member of the sub-structure is equal to the
  *     aligned offset of the structure. The structure may have padding at the end;
  *     the base offset of the member following the sub-structure is rounded up to
  *     the next multiple of the base alignment of the structure.
  * 10. If the member is an array of S structures, the S elements of the array are laid
  *     out in order, according to rule (9).
  */
 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
 struct Rules140 {
     /**
      * For an array of scalars or vectors this returns the stride between array elements. For
      * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
      * that for single (non-array) scalars or vectors we don't require a stride.
      */
     static constexpr size_t Stride(int count) {
         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
         static_assert(Cols >= 1 && Cols <= 4);
         if (Cols != 1) {
             // This is a matrix or array of matrices. We return the stride between columns.
             SkASSERT(RowsOrVecLength > 1);
             return Rules140<BaseType, RowsOrVecLength>::Stride(1);
         }
         if (count == 0) {
             // Stride doesn't matter for a non-array.
             return RowsOrVecLength * sizeof(BaseType);
         }

         // Rule 4.

         // Alignment of vec4 by Rule 2.
         constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
         // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
         int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
         size_t kElementAlignment = tight_vec_size<BaseType>(n);
         // Round kElementAlignment up to multiple of kVec4Alignment.
         size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
         return m * kVec4Alignment;
     }
 };

 /**
  * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
  * identically to uniform and shader storage blocks using the std140 layout, except that the base
  * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
  * not rounded up a multiple of the base alignment of a vec4.
  */
 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
 struct Rules430 {
     static constexpr size_t Stride(int count) {
         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
         static_assert(Cols >= 1 && Cols <= 4);

         if (Cols != 1) {
             // This is a matrix or array of matrices. We return the stride between columns.
             SkASSERT(RowsOrVecLength > 1);
             return Rules430<BaseType, RowsOrVecLength>::Stride(1);
         }
         if (count == 0) {
             // Stride doesn't matter for a non-array.
             return RowsOrVecLength * sizeof(BaseType);
         }
         // Rule 4 without the round up to a multiple of align-of vec4.
         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
     }
 };

 // The strides used here were derived from the rules we've imposed on ourselves in
 // GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
 // their 4-component equivalents.
 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
 struct RulesMetal {
     static constexpr size_t Stride(int count) {
         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
         static_assert(Cols >= 1 && Cols <= 4);
         if (Cols != 1) {
             // This is a matrix or array of matrices. We return the stride between columns.
             SkASSERT(RowsOrVecLength > 1);
             return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
         }
         if (count == 0) {
             // Stride doesn't matter for a non-array.
             return RowsOrVecLength * sizeof(BaseType);
         }
         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
     }
 };

 template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
 class Writer {
 private:
     template <typename MemType, typename UniformType>
     static void CopyUniforms(void* dst, const void* src, int numUniforms) {
         if constexpr (std::is_same<MemType, UniformType>::value) {
             // Matching types--use memcpy.
             std::memcpy(dst, src, numUniforms * sizeof(MemType));
             return;
         }

         if constexpr (std::is_same<MemType, float>::value &&
                       std::is_same<UniformType, SkHalf>::value) {
             // Convert floats to half.
             const float* floatBits = static_cast<const float*>(src);
             SkHalf* halfBits = static_cast<SkHalf*>(dst);
             while (numUniforms-- > 0) {
                 *halfBits++ = SkFloatToHalf(*floatBits++);
             }
             return;
         }

         SK_ABORT("implement conversion from MemType to UniformType");
     }

     template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
     static uint32_t Write(void *dst, int n, const MemType src[]) {
         size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
         n = (n == SkUniform::kNonArray) ? 1 : n;
         n *= Cols;

         if (dst) {
             if (stride == RowsOrVecLength * sizeof(UniformType)) {
                 CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
             } else {
                 for (int i = 0; i < n; ++i) {
                     CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
                     src += RowsOrVecLength;
                     dst = SkTAddOffset<void>(dst, stride);
                 }
             }
         }

         return n * stride;
     }

     template <typename UniformType>
     static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
         // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
         size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
         n = std::max(n, 1);

         if (dst) {
             size_t offset = 0;
             for (int i = 0; i < n; ++i) {
                 float mt[] = {
                         m[i].get(SkMatrix::kMScaleX),
                         m[i].get(SkMatrix::kMSkewY),
                         m[i].get(SkMatrix::kMPersp0),
                         m[i].get(SkMatrix::kMSkewX),
                         m[i].get(SkMatrix::kMScaleY),
                         m[i].get(SkMatrix::kMPersp1),
                         m[i].get(SkMatrix::kMTransX),
                         m[i].get(SkMatrix::kMTransY),
                         m[i].get(SkMatrix::kMPersp2),
                 };
                 Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
                 offset += stride;
             }
         }
         return n * stride;
     }

 public:
     static uint32_t WriteUniform(SkSLType type,
                                  CType ctype,
                                  void *dest,
                                  int n,
                                  const void *src) {
         SkASSERT(n >= 1 || n == SkUniform::kNonArray);
         switch (type) {
             case SkSLType::kInt:
                 return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));

             case SkSLType::kInt2:
                 return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));

             case SkSLType::kInt3:
                 return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));

             case SkSLType::kInt4:
                 return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));

             case SkSLType::kHalf:
                 return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat:
                 return Write<float, float>(dest, n, static_cast<const float *>(src));

             case SkSLType::kHalf2:
                 return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat2:
                 return Write<float, float, 2>(dest, n, static_cast<const float *>(src));

             case SkSLType::kHalf3:
                 return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat3:
                 return Write<float, float, 3>(dest, n, static_cast<const float *>(src));

             case SkSLType::kHalf4:
                 return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat4:
                 return Write<float, float, 4>(dest, n, static_cast<const float *>(src));

             case SkSLType::kHalf2x2:
                 return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat2x2:
                 return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));

             case SkSLType::kHalf3x3:
                 switch (ctype) {
                     case CType::kDefault:
                         return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
                     case CType::kSkMatrix:
                         return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
                 }
                 SkUNREACHABLE;

             case SkSLType::kFloat3x3:
                 switch (ctype) {
                     case CType::kDefault:
                         return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
                     case CType::kSkMatrix:
                         return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
                 }
                 SkUNREACHABLE;

             case SkSLType::kHalf4x4:
                 return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));

             case SkSLType::kFloat4x4:
                 return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));

             default:
                 SK_ABORT("Unexpected uniform type");
         }
     }
 };

 #ifdef SK_DEBUG
 // To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
 // alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
 // are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
 static uint32_t sksltype_to_alignment_mask(SkSLType type) {
     switch (type) {
         case SkSLType::kInt:
         case SkSLType::kUInt:
         case SkSLType::kFloat:
             return 0x3;
         case SkSLType::kInt2:
         case SkSLType::kUInt2:
         case SkSLType::kFloat2:
             return 0x7;
         case SkSLType::kInt3:
         case SkSLType::kUInt3:
         case SkSLType::kFloat3:
         case SkSLType::kInt4:
         case SkSLType::kUInt4:
         case SkSLType::kFloat4:
             return 0xF;

         case SkSLType::kFloat2x2:
             return 0x7;
         case SkSLType::kFloat3x3:
             return 0xF;
         case SkSLType::kFloat4x4:
             return 0xF;

         case SkSLType::kShort:
         case SkSLType::kUShort:
         case SkSLType::kHalf:
             return 0x1;
         case SkSLType::kShort2:
         case SkSLType::kUShort2:
         case SkSLType::kHalf2:
             return 0x3;
         case SkSLType::kShort3:
         case SkSLType::kShort4:
         case SkSLType::kUShort3:
         case SkSLType::kUShort4:
         case SkSLType::kHalf3:
         case SkSLType::kHalf4:
             return 0x7;

         case SkSLType::kHalf2x2:
             return 0x3;
         case SkSLType::kHalf3x3:
             return 0x7;
         case SkSLType::kHalf4x4:
             return 0x7;

         // This query is only valid for certain types.
         case SkSLType::kVoid:
         case SkSLType::kBool:
         case SkSLType::kBool2:
         case SkSLType::kBool3:
         case SkSLType::kBool4:
         case SkSLType::kTexture2DSampler:
         case SkSLType::kTextureExternalSampler:
         case SkSLType::kTexture2DRectSampler:
         case SkSLType::kSampler:
         case SkSLType::kTexture2D:
         case SkSLType::kInput:
             break;
     }
     SK_ABORT("Unexpected type");
 }

 /** Returns the size in bytes taken up in Metal buffers for SkSLTypes. */
 inline uint32_t sksltype_to_mtl_size(SkSLType type) {
     switch (type) {
         case SkSLType::kInt:
         case SkSLType::kUInt:
         case SkSLType::kFloat:
             return 4;
         case SkSLType::kInt2:
         case SkSLType::kUInt2:
         case SkSLType::kFloat2:
             return 8;
         case SkSLType::kInt3:
         case SkSLType::kUInt3:
         case SkSLType::kFloat3:
         case SkSLType::kInt4:
         case SkSLType::kUInt4:
         case SkSLType::kFloat4:
             return 16;

         case SkSLType::kFloat2x2:
             return 16;
         case SkSLType::kFloat3x3:
             return 48;
         case SkSLType::kFloat4x4:
             return 64;

         case SkSLType::kShort:
         case SkSLType::kUShort:
         case SkSLType::kHalf:
             return 2;
         case SkSLType::kShort2:
         case SkSLType::kUShort2:
         case SkSLType::kHalf2:
             return 4;
         case SkSLType::kShort3:
         case SkSLType::kShort4:
         case SkSLType::kUShort3:
         case SkSLType::kUShort4:
         case SkSLType::kHalf3:
         case SkSLType::kHalf4:
             return 8;

         case SkSLType::kHalf2x2:
             return 8;
         case SkSLType::kHalf3x3:
             return 24;
         case SkSLType::kHalf4x4:
             return 32;

         // This query is only valid for certain types.
         case SkSLType::kVoid:
         case SkSLType::kBool:
         case SkSLType::kBool2:
         case SkSLType::kBool3:
         case SkSLType::kBool4:
         case SkSLType::kTexture2DSampler:
         case SkSLType::kTextureExternalSampler:
         case SkSLType::kTexture2DRectSampler:
         case SkSLType::kSampler:
         case SkSLType::kTexture2D:
         case SkSLType::kInput:
             break;
     }
     SK_ABORT("Unexpected type");
 }

 // Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
 // taking into consideration all alignment requirements. The uniformOffset is set to the offset for
 // the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
 static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
                                        uint32_t* maxAlignment,
                                        SkSLType type,
                                        int arrayCount) {
     uint32_t alignmentMask = sksltype_to_alignment_mask(type);
     if (alignmentMask > *maxAlignment) {
         *maxAlignment = alignmentMask;
     }
     uint32_t offsetDiff = *currentOffset & alignmentMask;
     if (offsetDiff != 0) {
         offsetDiff = alignmentMask - offsetDiff + 1;
     }
     uint32_t uniformOffset = *currentOffset + offsetDiff;
     SkASSERT(sizeof(float) == 4);
     if (arrayCount) {
         *currentOffset = uniformOffset + sksltype_to_mtl_size(type) * arrayCount;
     } else {
         *currentOffset = uniformOffset + sksltype_to_mtl_size(type);
     }
     return uniformOffset;
 }
 #endif // SK_DEBUG

 SkSLType UniformManager::getUniformTypeForLayout(SkSLType type) {
     if (fLayout != Layout::kMetal) {
         // GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
         switch (type) {
             case SkSLType::kShort:            return SkSLType::kInt;
             case SkSLType::kUShort:           return SkSLType::kUInt;
             case SkSLType::kHalf:             return SkSLType::kFloat;

             case SkSLType::kShort2:           return SkSLType::kInt2;
             case SkSLType::kUShort2:          return SkSLType::kUInt2;
             case SkSLType::kHalf2:            return SkSLType::kFloat2;

             case SkSLType::kShort3:           return SkSLType::kInt3;
             case SkSLType::kUShort3:          return SkSLType::kUInt3;
             case SkSLType::kHalf3:            return SkSLType::kFloat3;

             case SkSLType::kShort4:           return SkSLType::kInt4;
             case SkSLType::kUShort4:          return SkSLType::kUInt4;
             case SkSLType::kHalf4:            return SkSLType::kFloat4;

             case SkSLType::kHalf2x2:          return SkSLType::kFloat2x2;
             case SkSLType::kHalf3x3:          return SkSLType::kFloat3x3;
             case SkSLType::kHalf4x4:          return SkSLType::kFloat4x4;

             default:                        break;
         }
     }

     return type;
 }

 uint32_t UniformManager::writeUniforms(SkSpan<const SkUniform> uniforms,
                                        const void** srcs,
                                        uint32_t* offsets,
                                        char *dst) {
     decltype(&Writer<Rules140>::WriteUniform) write;
     switch (fLayout) {
         case Layout::kStd140:
             write = Writer<Rules140>::WriteUniform;
             break;
         case Layout::kStd430:
             write = Writer<Rules430>::WriteUniform;
             break;
         case Layout::kMetal:
             write = Writer<RulesMetal>::WriteUniform;
             break;
     }

 #ifdef SK_DEBUG
     uint32_t curUBOOffset = 0;
     uint32_t curUBOMaxAlignment = 0;
 #endif // SK_DEBUG

     uint32_t offset = 0;

     for (int i = 0; i < (int) uniforms.size(); ++i) {
         const SkUniform& u = uniforms[i];
         SkSLType uniformType = this->getUniformTypeForLayout(u.type());

 #ifdef SK_DEBUG
         uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
                                                       &curUBOMaxAlignment,
                                                       uniformType,
                                                       u.count());
 #endif // SK_DEBUG

         uint32_t bytesWritten = write(uniformType,
                                       CType::kDefault,
                                       dst ? &dst[offset] : nullptr,
                                       u.count(),
                                       srcs ? srcs[i] : nullptr);
         SkASSERT(debugOffset == offset);

         if (offsets) {
             offsets[i] = offset;
         }
         offset += bytesWritten;
     }

     return offset;
 }

 } // namespace skgpu
	/*
	* Copyright 2021 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "experimental/graphite/src/UniformManager.h"

	#include "experimental/graphite/src/DrawTypes.h"
	#include "include/core/SkMatrix.h"
	#include "include/private/SkHalf.h"
	#include "include/private/SkTemplates.h"
	#include "src/core/SkUniform.h"

	// ensure that these types are the sizes the uniform data is expecting
	static_assert(sizeof(int32_t) == 4);
	static_assert(sizeof(float) == 4);
	static_assert(sizeof(int16_t) == 2);
	static_assert(sizeof(SkHalf) == 2);

	namespace skgpu {

	//////////////////////////////////////////////////////////////////////////////

	UniformManager::UniformManager(Layout layout) : fLayout(layout) {}

	template<typename BaseType>
	static constexpr size_t tight_vec_size(int vecLength) {
	return sizeof(BaseType) * vecLength;
	}

	/**
	* From Section 7.6.2.2 "Standard Uniform Block Layout":
	* 1. If the member is a scalar consuming N basic machine units, the base alignment is N.
	* 2. If the member is a two- or four-component vector with components consuming N basic machine
	* units, the base alignment is 2N or 4N, respectively.
	* 3. If the member is a three-component vector with components consuming N
	* basic machine units, the base alignment is 4N.
	* 4. If the member is an array of scalars or vectors, the base alignment and array
	* stride are set to match the base alignment of a single array element, according
	* to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
	* array may have padding at the end; the base offset of the member following
	* the array is rounded up to the next multiple of the base alignment.
	* 5. If the member is a column-major matrix with C columns and R rows, the
	* matrix is stored identically to an array of C column vectors with R components each,
	* according to rule (4).
	* 6. If the member is an array of S column-major matrices with C columns and
	* R rows, the matrix is stored identically to a row of S × C column vectors
	* with R components each, according to rule (4).
	* 7. If the member is a row-major matrix with C columns and R rows, the matrix
	* is stored identically to an array of R row vectors with C components each,
	* according to rule (4).
	* 8. If the member is an array of S row-major matrices with C columns and R
	* rows, the matrix is stored identically to a row of S × R row vectors with C
	* components each, according to rule (4).
	* 9. If the member is a structure, the base alignment of the structure is N, where
	* N is the largest base alignment value of any of its members, and rounded
	* up to the base alignment of a vec4. The individual members of this substructure are then
	* assigned offsets by applying this set of rules recursively,
	* where the base offset of the first member of the sub-structure is equal to the
	* aligned offset of the structure. The structure may have padding at the end;
	* the base offset of the member following the sub-structure is rounded up to
	* the next multiple of the base alignment of the structure.
	* 10. If the member is an array of S structures, the S elements of the array are laid
	* out in order, according to rule (9).
	*/
	template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
	struct Rules140 {
	/**
	* For an array of scalars or vectors this returns the stride between array elements. For
	* matrices or arrays of matrices this returns the stride between columns of the matrix. Note
	* that for single (non-array) scalars or vectors we don't require a stride.
	*/
	static constexpr size_t Stride(int count) {
	SkASSERT(count >= 1 \|\| count == SkUniform::kNonArray);
	static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
	static_assert(Cols >= 1 && Cols <= 4);
	if (Cols != 1) {
	// This is a matrix or array of matrices. We return the stride between columns.
	SkASSERT(RowsOrVecLength > 1);
	return Rules140<BaseType, RowsOrVecLength>::Stride(1);
	}
	if (count == 0) {
	// Stride doesn't matter for a non-array.
	return RowsOrVecLength * sizeof(BaseType);
	}

	// Rule 4.

	// Alignment of vec4 by Rule 2.
	constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
	// Get alignment of a single vector of BaseType by Rule 1, 2, or 3
	int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
	size_t kElementAlignment = tight_vec_size<BaseType>(n);
	// Round kElementAlignment up to multiple of kVec4Alignment.
	size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
	return m * kVec4Alignment;
	}
	};

	/**
	* When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
	* identically to uniform and shader storage blocks using the std140 layout, except that the base
	* alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
	* not rounded up a multiple of the base alignment of a vec4.
	*/
	template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
	struct Rules430 {
	static constexpr size_t Stride(int count) {
	SkASSERT(count >= 1 \|\| count == SkUniform::kNonArray);
	static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
	static_assert(Cols >= 1 && Cols <= 4);

	if (Cols != 1) {
	// This is a matrix or array of matrices. We return the stride between columns.
	SkASSERT(RowsOrVecLength > 1);
	return Rules430<BaseType, RowsOrVecLength>::Stride(1);
	}
	if (count == 0) {
	// Stride doesn't matter for a non-array.
	return RowsOrVecLength * sizeof(BaseType);
	}
	// Rule 4 without the round up to a multiple of align-of vec4.
	return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
	}
	};

	// The strides used here were derived from the rules we've imposed on ourselves in
	// GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
	// their 4-component equivalents.
	template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
	struct RulesMetal {
	static constexpr size_t Stride(int count) {
	SkASSERT(count >= 1 \|\| count == SkUniform::kNonArray);
	static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
	static_assert(Cols >= 1 && Cols <= 4);
	if (Cols != 1) {
	// This is a matrix or array of matrices. We return the stride between columns.
	SkASSERT(RowsOrVecLength > 1);
	return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
	}
	if (count == 0) {
	// Stride doesn't matter for a non-array.
	return RowsOrVecLength * sizeof(BaseType);
	}
	return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
	}
	};

	template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
	class Writer {
	private:
	template <typename MemType, typename UniformType>
	static void CopyUniforms(void* dst, const void* src, int numUniforms) {
	if constexpr (std::is_same<MemType, UniformType>::value) {
	// Matching types--use memcpy.
	std::memcpy(dst, src, numUniforms * sizeof(MemType));
	return;
	}

	if constexpr (std::is_same<MemType, float>::value &&
	std::is_same<UniformType, SkHalf>::value) {
	// Convert floats to half.
	const float* floatBits = static_cast<const float*>(src);
	SkHalf* halfBits = static_cast<SkHalf*>(dst);
	while (numUniforms-- > 0) {
	halfBits++ = SkFloatToHalf(floatBits++);
	}
	return;
	}

	SK_ABORT("implement conversion from MemType to UniformType");
	}

	template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
	static uint32_t Write(void *dst, int n, const MemType src[]) {
	size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
	n = (n == SkUniform::kNonArray) ? 1 : n;
	n *= Cols;

	if (dst) {
	if (stride == RowsOrVecLength * sizeof(UniformType)) {
	CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
	} else {
	for (int i = 0; i < n; ++i) {
	CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
	src += RowsOrVecLength;
	dst = SkTAddOffset<void>(dst, stride);
	}
	}
	}

	return n * stride;
	}

	template <typename UniformType>
	static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
	// Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
	size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
	n = std::max(n, 1);

	if (dst) {
	size_t offset = 0;
	for (int i = 0; i < n; ++i) {
	float mt[] = {
	m[i].get(SkMatrix::kMScaleX),
	m[i].get(SkMatrix::kMSkewY),
	m[i].get(SkMatrix::kMPersp0),
	m[i].get(SkMatrix::kMSkewX),
	m[i].get(SkMatrix::kMScaleY),
	m[i].get(SkMatrix::kMPersp1),
	m[i].get(SkMatrix::kMTransX),
	m[i].get(SkMatrix::kMTransY),
	m[i].get(SkMatrix::kMPersp2),
	};
	Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
	offset += stride;
	}
	}
	return n * stride;
	}

	public:
	static uint32_t WriteUniform(SkSLType type,
	CType ctype,
	void *dest,
	int n,
	const void *src) {
	SkASSERT(n >= 1 \|\| n == SkUniform::kNonArray);
	switch (type) {
	case SkSLType::kInt:
	return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));

	case SkSLType::kInt2:
	return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));

	case SkSLType::kInt3:
	return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));

	case SkSLType::kInt4:
	return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));

	case SkSLType::kHalf:
	return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat:
	return Write<float, float>(dest, n, static_cast<const float *>(src));

	case SkSLType::kHalf2:
	return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat2:
	return Write<float, float, 2>(dest, n, static_cast<const float *>(src));

	case SkSLType::kHalf3:
	return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat3:
	return Write<float, float, 3>(dest, n, static_cast<const float *>(src));

	case SkSLType::kHalf4:
	return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat4:
	return Write<float, float, 4>(dest, n, static_cast<const float *>(src));

	case SkSLType::kHalf2x2:
	return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat2x2:
	return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));

	case SkSLType::kHalf3x3:
	switch (ctype) {
	case CType::kDefault:
	return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
	case CType::kSkMatrix:
	return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
	}
	SkUNREACHABLE;

	case SkSLType::kFloat3x3:
	switch (ctype) {
	case CType::kDefault:
	return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
	case CType::kSkMatrix:
	return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
	}
	SkUNREACHABLE;

	case SkSLType::kHalf4x4:
	return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));

	case SkSLType::kFloat4x4:
	return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));

	default:
	SK_ABORT("Unexpected uniform type");
	}
	}
	};

	#ifdef SK_DEBUG
	// To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
	// alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
	// are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
	static uint32_t sksltype_to_alignment_mask(SkSLType type) {
	switch (type) {
	case SkSLType::kInt:
	case SkSLType::kUInt:
	case SkSLType::kFloat:
	return 0x3;
	case SkSLType::kInt2:
	case SkSLType::kUInt2:
	case SkSLType::kFloat2:
	return 0x7;
	case SkSLType::kInt3:
	case SkSLType::kUInt3:
	case SkSLType::kFloat3:
	case SkSLType::kInt4:
	case SkSLType::kUInt4:
	case SkSLType::kFloat4:
	return 0xF;

	case SkSLType::kFloat2x2:
	return 0x7;
	case SkSLType::kFloat3x3:
	return 0xF;
	case SkSLType::kFloat4x4:
	return 0xF;

	case SkSLType::kShort:
	case SkSLType::kUShort:
	case SkSLType::kHalf:
	return 0x1;
	case SkSLType::kShort2:
	case SkSLType::kUShort2:
	case SkSLType::kHalf2:
	return 0x3;
	case SkSLType::kShort3:
	case SkSLType::kShort4:
	case SkSLType::kUShort3:
	case SkSLType::kUShort4:
	case SkSLType::kHalf3:
	case SkSLType::kHalf4:
	return 0x7;

	case SkSLType::kHalf2x2:
	return 0x3;
	case SkSLType::kHalf3x3:
	return 0x7;
	case SkSLType::kHalf4x4:
	return 0x7;

	// This query is only valid for certain types.
	case SkSLType::kVoid:
	case SkSLType::kBool:
	case SkSLType::kBool2:
	case SkSLType::kBool3:
	case SkSLType::kBool4:
	case SkSLType::kTexture2DSampler:
	case SkSLType::kTextureExternalSampler:
	case SkSLType::kTexture2DRectSampler:
	case SkSLType::kSampler:
	case SkSLType::kTexture2D:
	case SkSLType::kInput:
	break;
	}
	SK_ABORT("Unexpected type");
	}

	/** Returns the size in bytes taken up in Metal buffers for SkSLTypes. */
	inline uint32_t sksltype_to_mtl_size(SkSLType type) {
	switch (type) {
	case SkSLType::kInt:
	case SkSLType::kUInt:
	case SkSLType::kFloat:
	return 4;
	case SkSLType::kInt2:
	case SkSLType::kUInt2:
	case SkSLType::kFloat2:
	return 8;
	case SkSLType::kInt3:
	case SkSLType::kUInt3:
	case SkSLType::kFloat3:
	case SkSLType::kInt4:
	case SkSLType::kUInt4:
	case SkSLType::kFloat4:
	return 16;

	case SkSLType::kFloat2x2:
	return 16;
	case SkSLType::kFloat3x3:
	return 48;
	case SkSLType::kFloat4x4:
	return 64;

	case SkSLType::kShort:
	case SkSLType::kUShort:
	case SkSLType::kHalf:
	return 2;
	case SkSLType::kShort2:
	case SkSLType::kUShort2:
	case SkSLType::kHalf2:
	return 4;
	case SkSLType::kShort3:
	case SkSLType::kShort4:
	case SkSLType::kUShort3:
	case SkSLType::kUShort4:
	case SkSLType::kHalf3:
	case SkSLType::kHalf4:
	return 8;

	case SkSLType::kHalf2x2:
	return 8;
	case SkSLType::kHalf3x3:
	return 24;
	case SkSLType::kHalf4x4:
	return 32;

	// This query is only valid for certain types.
	case SkSLType::kVoid:
	case SkSLType::kBool:
	case SkSLType::kBool2:
	case SkSLType::kBool3:
	case SkSLType::kBool4:
	case SkSLType::kTexture2DSampler:
	case SkSLType::kTextureExternalSampler:
	case SkSLType::kTexture2DRectSampler:
	case SkSLType::kSampler:
	case SkSLType::kTexture2D:
	case SkSLType::kInput:
	break;
	}
	SK_ABORT("Unexpected type");
	}

	// Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
	// taking into consideration all alignment requirements. The uniformOffset is set to the offset for
	// the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
	static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
	uint32_t* maxAlignment,
	SkSLType type,
	int arrayCount) {
	uint32_t alignmentMask = sksltype_to_alignment_mask(type);
	if (alignmentMask > *maxAlignment) {
	*maxAlignment = alignmentMask;
	}
	uint32_t offsetDiff = *currentOffset & alignmentMask;
	if (offsetDiff != 0) {
	offsetDiff = alignmentMask - offsetDiff + 1;
	}
	uint32_t uniformOffset = *currentOffset + offsetDiff;
	SkASSERT(sizeof(float) == 4);
	if (arrayCount) {
	currentOffset = uniformOffset + sksltype_to_mtl_size(type) arrayCount;
	} else {
	*currentOffset = uniformOffset + sksltype_to_mtl_size(type);
	}
	return uniformOffset;
	}
	#endif // SK_DEBUG

	SkSLType UniformManager::getUniformTypeForLayout(SkSLType type) {
	if (fLayout != Layout::kMetal) {
	// GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
	switch (type) {
	case SkSLType::kShort: return SkSLType::kInt;
	case SkSLType::kUShort: return SkSLType::kUInt;
	case SkSLType::kHalf: return SkSLType::kFloat;

	case SkSLType::kShort2: return SkSLType::kInt2;
	case SkSLType::kUShort2: return SkSLType::kUInt2;
	case SkSLType::kHalf2: return SkSLType::kFloat2;

	case SkSLType::kShort3: return SkSLType::kInt3;
	case SkSLType::kUShort3: return SkSLType::kUInt3;
	case SkSLType::kHalf3: return SkSLType::kFloat3;

	case SkSLType::kShort4: return SkSLType::kInt4;
	case SkSLType::kUShort4: return SkSLType::kUInt4;
	case SkSLType::kHalf4: return SkSLType::kFloat4;

	case SkSLType::kHalf2x2: return SkSLType::kFloat2x2;
	case SkSLType::kHalf3x3: return SkSLType::kFloat3x3;
	case SkSLType::kHalf4x4: return SkSLType::kFloat4x4;

	default: break;
	}
	}

	return type;
	}

	uint32_t UniformManager::writeUniforms(SkSpan<const SkUniform> uniforms,
	const void** srcs,
	uint32_t* offsets,
	char *dst) {
	decltype(&Writer<Rules140>::WriteUniform) write;
	switch (fLayout) {
	case Layout::kStd140:
	write = Writer<Rules140>::WriteUniform;
	break;
	case Layout::kStd430:
	write = Writer<Rules430>::WriteUniform;
	break;
	case Layout::kMetal:
	write = Writer<RulesMetal>::WriteUniform;
	break;
	}

	#ifdef SK_DEBUG
	uint32_t curUBOOffset = 0;
	uint32_t curUBOMaxAlignment = 0;
	#endif // SK_DEBUG

	uint32_t offset = 0;

	for (int i = 0; i < (int) uniforms.size(); ++i) {
	const SkUniform& u = uniforms[i];
	SkSLType uniformType = this->getUniformTypeForLayout(u.type());

	#ifdef SK_DEBUG
	uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
	&curUBOMaxAlignment,
	uniformType,
	u.count());
	#endif // SK_DEBUG

	uint32_t bytesWritten = write(uniformType,
	CType::kDefault,
	dst ? &dst[offset] : nullptr,
	u.count(),
	srcs ? srcs[i] : nullptr);
	SkASSERT(debugOffset == offset);

	if (offsets) {
	offsets[i] = offset;
	}
	offset += bytesWritten;
	}

	return offset;
	}

	} // namespace skgpu