third_party/skia_next/third_party/skia/experimental/graphite/src/DrawPass.cpp - cobalt - Git at Google

 /*
  * Copyright 2021 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "experimental/graphite/src/DrawPass.h"

 #include "experimental/graphite/include/GraphiteTypes.h"
 #include "experimental/graphite/src/Buffer.h"
 #include "experimental/graphite/src/ContextUtils.h"
 #include "experimental/graphite/src/DrawBufferManager.h"
 #include "experimental/graphite/src/DrawContext.h"
 #include "experimental/graphite/src/DrawList.h"
 #include "experimental/graphite/src/ProgramCache.h"
 #include "experimental/graphite/src/Recorder.h"
 #include "experimental/graphite/src/Renderer.h"
 #include "experimental/graphite/src/TextureProxy.h"
 #include "experimental/graphite/src/UniformCache.h"
 #include "experimental/graphite/src/geom/BoundsManager.h"

 #include "src/core/SkMathPriv.h"
 #include "src/core/SkUtils.h"
 #include "src/gpu/BufferWriter.h"

 #include <algorithm>

 namespace {

 // Retrieve the program ID and uniformData ID
 std::tuple<uint32_t, uint32_t> get_ids_from_paint(skgpu::Recorder* recorder,
                                                   skgpu::PaintParams params) {
     // TODO: add an ExtractCombo that takes PaintParams directly?
     SkPaint p;

     p.setColor(params.color());
     p.setBlendMode(params.blendMode());
     p.setShader(params.refShader());

     // TODO: perhaps just return the ids here rather than the sk_sps?
     auto [ combo, uniformData] = ExtractCombo(recorder->uniformCache(), p);
     auto programInfo = recorder->programCache()->findOrCreateProgram(combo);

     return { programInfo->id(), uniformData->id() };
 }

 } // anonymous namespace

 namespace skgpu {

 /**
  * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
  * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
  *
  * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
  * within a pipeline, while still respecting the overall painter's order. This decreases the number
  * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
  * to operate more efficiently and have fewer bubbles within its own instruction stream.
  *
  * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
  * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
  * description is encoded in two steps:
  *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
  *     ordered correctly.
  *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
  *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
  *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
  * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
  * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
  * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
  * the pipeline description index are packed into indices and not actual pointers, a given SortKey
  * is only valid for the a specific DrawList->DrawPass conversion.
  */
 class DrawPass::SortKey {
 public:
     SortKey(const DrawList::Draw* draw,
             int renderStep,
             uint32_t pipelineIndex,
             uint32_t geomUniformIndex,
             uint32_t shadingUniformIndex)
         : fPipelineKey{draw->fOrder.paintOrder().bits(),
                        draw->fOrder.stencilIndex().bits(),
                        static_cast<uint32_t>(renderStep),
                        pipelineIndex}
         , fUniformKey{geomUniformIndex, shadingUniformIndex}
         , fDraw(draw) {
     }

     bool operator<(const SortKey& k) const {
         uint64_t k1 = this->pipelineKey();
         uint64_t k2 = k.pipelineKey();
         return k1 < k2 || (k1 == k2 && this->uniformKey() < k.uniformKey());
     }

     const DrawList::Draw* draw() const { return fDraw; }
     uint32_t pipeline() const { return fPipelineKey.fPipeline; }
     int renderStep() const { return static_cast<int>(fPipelineKey.fRenderStep); }

     uint32_t geometryUniforms() const { return fUniformKey.fGeometryIndex; }
     uint32_t shadingUniforms() const { return fUniformKey.fShadingIndex; }

 private:
     // Fields are ordered from most-significant to lowest when sorting by 128-bit value.
     struct {
         uint32_t fColorDepthOrder : 16; // sizeof(CompressedPaintersOrder)
         uint32_t fStencilOrder    : 16; // sizeof(DisjointStencilIndex)
         uint32_t fRenderStep      : 2;  // bits >= log2(Renderer::kMaxRenderSteps)
         uint32_t fPipeline        : 30; // bits >= log2(max steps * DrawList::kMaxDraws)
     } fPipelineKey; // NOTE: named for bit-punning, can't take address of a bit-field

     uint64_t pipelineKey() const { return sk_bit_cast<uint64_t>(fPipelineKey); }

     struct {
         uint32_t fGeometryIndex; // bits >= log2(max steps * max draw count)
         uint32_t fShadingIndex;  //  ""
     } fUniformKey;

     uint64_t uniformKey() const { return sk_bit_cast<uint64_t>(fUniformKey); }

     // Backpointer to the draw that produced the sort key
     const DrawList::Draw* fDraw;

     static_assert(16 >= sizeof(CompressedPaintersOrder));
     static_assert(16 >= sizeof(DisjointStencilIndex));
     static_assert(2  >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
     static_assert(30 >= SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
 };

 ///////////////////////////////////////////////////////////////////////////////////////////////////

 namespace {

 skgpu::UniformData* lookup(skgpu::Recorder* recorder, uint32_t uniformID) {
     // TODO: just return a raw 'UniformData*' here
     sk_sp<skgpu::UniformData> tmp = recorder->uniformCache()->lookup(uniformID);
     return tmp.get();
 }

 } // anonymous namespace

 DrawPass::DrawPass(sk_sp<TextureProxy> target, const SkIRect& bounds,
                    bool requiresStencil, bool requiresMSAA)
         : fTarget(std::move(target))
         , fBounds(bounds)
         , fRequiresStencil(requiresStencil)
         , fRequiresMSAA(requiresMSAA) {}

 DrawPass::~DrawPass() = default;

 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
                                          std::unique_ptr<DrawList> draws,
                                          sk_sp<TextureProxy> target,
                                          const BoundsManager* occlusionCuller) {
     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
     // its size should be done with care and good reason. The performance of sorting the keys is
     // heavily tied to the total size.
     //
     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
     // 16 bytes. There are several ways this could be done if necessary:
     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
     //  - Compact the Draw* to an index into the original collection, although that has extra
     //    indirection and does not work as well with SkTBlockList.
     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
     // than an 8 byte key and unmodified pointer.
     static_assert(sizeof(DrawPass::SortKey) == 16 + sizeof(void*));

     bool requiresStencil = false;
     bool requiresMSAA = false;
     Rect passBounds = Rect::InfiniteInverted();

     std::vector<SortKey> keys;
     keys.reserve(draws->renderStepCount()); // will not exceed but may use less with occluded draws

     for (const DrawList::Draw& draw : draws->fDraws.items()) {
         if (occlusionCuller && occlusionCuller->isOccluded(draw.fClip.drawBounds(),
                                                            draw.fOrder.depth())) {
             continue;
         }

         // If we have two different descriptors, such that the uniforms from the PaintParams can be
         // bound independently of those used by the rest of the RenderStep, then we can upload now
         // and remember the location for re-use on any RenderStep that does shading.
         uint32_t programID = ProgramCache::kInvalidProgramID;
         uint32_t shadingUniformID = UniformData::kInvalidUniformID;
         if (draw.fPaintParams.has_value()) {
             std::tie(programID, shadingUniformID) = get_ids_from_paint(recorder,
                                                                        draw.fPaintParams.value());
         }

         for (int stepIndex = 0; stepIndex < draw.fRenderer.numRenderSteps(); ++stepIndex) {
             const RenderStep* const step = draw.fRenderer.steps()[stepIndex];

             // TODO ask step to generate a pipeline description based on the above shading code, and
             // have pipelineIndex point to that description in the accumulated list of descs
             uint32_t pipelineIndex = 0;
             // TODO step writes out geometry uniforms and have geomIndex point to that buffer data,
             // providing shape, transform, scissor, and paint depth to RenderStep
             uint32_t geometryIndex = 0;

             uint32_t shadingIndex = UniformData::kInvalidUniformID;

             const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
             if (performsShading) {
                 // TODO: we need to combine the 'programID' with the RenderPass info and the
                 // geometric rendering method to get the true 'pipelineIndex'
                 pipelineIndex = programID;
                 shadingIndex = shadingUniformID;
             } else {
                 // TODO: fill in 'pipelineIndex' for Chris' stencil/depth draws
             }

             keys.push_back({&draw, stepIndex, pipelineIndex, geometryIndex, shadingIndex});
         }

         passBounds.join(draw.fClip.drawBounds());
         requiresStencil |= draw.fRenderer.requiresStencil();
         requiresMSAA |= draw.fRenderer.requiresMSAA();
     }

     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
     // vs. algorithms that require an extra O(n) storage.
     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
     // bugs in the DrawOrder determination code?
     std::sort(keys.begin(), keys.end());

     DrawBufferManager* bufferMgr = recorder->drawBufferManager();

     uint32_t lastPipeline = 0;
     uint32_t lastShadingUniforms = UniformData::kInvalidUniformID;
     uint32_t lastGeometryUniforms = 0;
     SkIRect lastScissor = SkIRect::MakeSize(target->dimensions());
     Buffer* lastBoundVertexBuffer = nullptr;
     Buffer* lastBoundIndexBuffer = nullptr;

     for (const SortKey& key : keys) {
         const DrawList::Draw& draw = *key.draw();
         int renderStep = key.renderStep();

         size_t vertexSize = draw.requiredVertexSpace(renderStep);
         size_t indexSize = draw.requiredIndexSpace(renderStep);
         auto [vertexWriter, vertexInfo] = bufferMgr->getVertexWriter(vertexSize);
         auto [indexWriter, indexInfo] = bufferMgr->getIndexWriter(indexSize);
         // TODO: handle the case where we fail to get a vertex or index writer besides asserting
         SkASSERT(!vertexSize || (vertexWriter && vertexInfo.fBuffer));
         SkASSERT(!indexSize || (indexWriter && indexInfo.fBuffer));
         draw.writeVertices(std::move(vertexWriter), std::move(indexWriter), renderStep);

         if (vertexSize) {
             if (lastBoundVertexBuffer != vertexInfo.fBuffer) {
                 // TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
             }
             // TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
             // executes.
         }
         if (indexSize) {
             if (lastBoundIndexBuffer != indexInfo.fBuffer) {
                 // TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
             }
             // TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
             // executes.
         }

         // TODO: Have the render step write out vertices and figure out what draw call function and
         // primitive type it uses. The vertex buffer binding/offset and draw params will be examined
         // to determine if the active draw can be updated to include the new vertices, or if it has
         // to be ended and a new one begun for this step. In addition to checking this state, must
         // also check if pipeline, uniform, scissor etc. would require the active draw to end.
         //
         // const RenderStep* const step = draw.fRenderer.steps()[key.renderStep()];

         if (key.pipeline() != lastPipeline) {
             // TODO: Look up pipeline description from key's index and record binding it
             lastPipeline = key.pipeline();
             lastShadingUniforms = UniformData::kInvalidUniformID;
             lastGeometryUniforms = 0;
         }
         if (key.geometryUniforms() != lastGeometryUniforms) {
             // TODO: Look up uniform buffer binding info corresponding to key's index and record it
             lastGeometryUniforms = key.geometryUniforms();
         }
         if (key.shadingUniforms() != lastShadingUniforms) {
             auto ud = lookup(recorder, key.shadingUniforms());

             auto [writer, bufferInfo] = bufferMgr->getUniformWriter(ud->dataSize());
             writer.write(ud->data(), ud->dataSize());
             // TODO: recording 'bufferInfo' somewhere to allow a later uniform bind call

             lastShadingUniforms = key.shadingUniforms();
         }

         if (draw.fClip.scissor() != lastScissor) {
             // TODO: Record new scissor rectangle
         }

         // TODO: Write vertex and index data for the draw step
     }

     // if (currentDraw) {
         // TODO: End the current draw if it has pending vertices
     // }

     passBounds.roundOut();
     SkIRect pxPassBounds = SkIRect::MakeLTRB((int) passBounds.left(), (int) passBounds.top(),
                                              (int) passBounds.right(), (int) passBounds.bot());
     return std::unique_ptr<DrawPass>(new DrawPass(std::move(target), pxPassBounds,
                                                   requiresStencil, requiresMSAA));
 }

 void DrawPass::execute(CommandBuffer* buffer) const {
     // TODO
 }

 } // namespace skgpu
	/*
	* Copyright 2021 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "experimental/graphite/src/DrawPass.h"

	#include "experimental/graphite/include/GraphiteTypes.h"
	#include "experimental/graphite/src/Buffer.h"
	#include "experimental/graphite/src/ContextUtils.h"
	#include "experimental/graphite/src/DrawBufferManager.h"
	#include "experimental/graphite/src/DrawContext.h"
	#include "experimental/graphite/src/DrawList.h"
	#include "experimental/graphite/src/ProgramCache.h"
	#include "experimental/graphite/src/Recorder.h"
	#include "experimental/graphite/src/Renderer.h"
	#include "experimental/graphite/src/TextureProxy.h"
	#include "experimental/graphite/src/UniformCache.h"
	#include "experimental/graphite/src/geom/BoundsManager.h"

	#include "src/core/SkMathPriv.h"
	#include "src/core/SkUtils.h"
	#include "src/gpu/BufferWriter.h"

	#include <algorithm>

	namespace {

	// Retrieve the program ID and uniformData ID
	std::tuple<uint32_t, uint32_t> get_ids_from_paint(skgpu::Recorder* recorder,
	skgpu::PaintParams params) {
	// TODO: add an ExtractCombo that takes PaintParams directly?
	SkPaint p;

	p.setColor(params.color());
	p.setBlendMode(params.blendMode());
	p.setShader(params.refShader());

	// TODO: perhaps just return the ids here rather than the sk_sps?
	auto [ combo, uniformData] = ExtractCombo(recorder->uniformCache(), p);
	auto programInfo = recorder->programCache()->findOrCreateProgram(combo);

	return { programInfo->id(), uniformData->id() };
	}

	} // anonymous namespace

	namespace skgpu {

	/**
	* Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
	* Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
	*
	* The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
	* within a pipeline, while still respecting the overall painter's order. This decreases the number
	* of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
	* to operate more efficiently and have fewer bubbles within its own instruction stream.
	*
	* The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
	* of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
	* description is encoded in two steps:
	* 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
	* ordered correctly.
	* 2. An index into a cache of pipeline descriptions is used to encode the identity of the
	* pipeline (SortKeys that differ in the bits from #1 necessarily would have different
	* descriptions, but then the specific ordering of the RenderSteps isn't enforced).
	* Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
	* This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
	* require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
	* the pipeline description index are packed into indices and not actual pointers, a given SortKey
	* is only valid for the a specific DrawList->DrawPass conversion.
	*/
	class DrawPass::SortKey {
	public:
	SortKey(const DrawList::Draw* draw,
	int renderStep,
	uint32_t pipelineIndex,
	uint32_t geomUniformIndex,
	uint32_t shadingUniformIndex)
	: fPipelineKey{draw->fOrder.paintOrder().bits(),
	draw->fOrder.stencilIndex().bits(),
	static_cast<uint32_t>(renderStep),
	pipelineIndex}
	, fUniformKey{geomUniformIndex, shadingUniformIndex}
	, fDraw(draw) {
	}

	bool operator<(const SortKey& k) const {
	uint64_t k1 = this->pipelineKey();
	uint64_t k2 = k.pipelineKey();
	return k1 < k2 \|\| (k1 == k2 && this->uniformKey() < k.uniformKey());
	}

	const DrawList::Draw* draw() const { return fDraw; }
	uint32_t pipeline() const { return fPipelineKey.fPipeline; }
	int renderStep() const { return static_cast<int>(fPipelineKey.fRenderStep); }

	uint32_t geometryUniforms() const { return fUniformKey.fGeometryIndex; }
	uint32_t shadingUniforms() const { return fUniformKey.fShadingIndex; }

	private:
	// Fields are ordered from most-significant to lowest when sorting by 128-bit value.
	struct {
	uint32_t fColorDepthOrder : 16; // sizeof(CompressedPaintersOrder)
	uint32_t fStencilOrder : 16; // sizeof(DisjointStencilIndex)
	uint32_t fRenderStep : 2; // bits >= log2(Renderer::kMaxRenderSteps)
	uint32_t fPipeline : 30; // bits >= log2(max steps * DrawList::kMaxDraws)
	} fPipelineKey; // NOTE: named for bit-punning, can't take address of a bit-field

	uint64_t pipelineKey() const { return sk_bit_cast<uint64_t>(fPipelineKey); }

	struct {
	uint32_t fGeometryIndex; // bits >= log2(max steps * max draw count)
	uint32_t fShadingIndex; // ""
	} fUniformKey;

	uint64_t uniformKey() const { return sk_bit_cast<uint64_t>(fUniformKey); }

	// Backpointer to the draw that produced the sort key
	const DrawList::Draw* fDraw;

	static_assert(16 >= sizeof(CompressedPaintersOrder));
	static_assert(16 >= sizeof(DisjointStencilIndex));
	static_assert(2 >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
	static_assert(30 >= SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
	};

	///////////////////////////////////////////////////////////////////////////////////////////////////

	namespace {

	skgpu::UniformData* lookup(skgpu::Recorder* recorder, uint32_t uniformID) {
	// TODO: just return a raw 'UniformData*' here
	sk_sp<skgpu::UniformData> tmp = recorder->uniformCache()->lookup(uniformID);
	return tmp.get();
	}

	} // anonymous namespace

	DrawPass::DrawPass(sk_sp<TextureProxy> target, const SkIRect& bounds,
	bool requiresStencil, bool requiresMSAA)
	: fTarget(std::move(target))
	, fBounds(bounds)
	, fRequiresStencil(requiresStencil)
	, fRequiresMSAA(requiresMSAA) {}

	DrawPass::~DrawPass() = default;

	std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
	std::unique_ptr<DrawList> draws,
	sk_sp<TextureProxy> target,
	const BoundsManager* occlusionCuller) {
	// NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
	// its size should be done with care and good reason. The performance of sorting the keys is
	// heavily tied to the total size.
	//
	// At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
	// 16 bytes. There are several ways this could be done if necessary:
	// - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
	// the uniform data => 8 bytes of key, 8 bytes of pointer.
	// - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
	// 4 low bits from the Draw* pointer since it's 16 byte aligned.
	// - Compact the Draw* to an index into the original collection, although that has extra
	// indirection and does not work as well with SkTBlockList.
	// In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
	// than an 8 byte key and unmodified pointer.
	static_assert(sizeof(DrawPass::SortKey) == 16 + sizeof(void*));

	bool requiresStencil = false;
	bool requiresMSAA = false;
	Rect passBounds = Rect::InfiniteInverted();

	std::vector<SortKey> keys;
	keys.reserve(draws->renderStepCount()); // will not exceed but may use less with occluded draws

	for (const DrawList::Draw& draw : draws->fDraws.items()) {
	if (occlusionCuller && occlusionCuller->isOccluded(draw.fClip.drawBounds(),
	draw.fOrder.depth())) {
	continue;
	}

	// If we have two different descriptors, such that the uniforms from the PaintParams can be
	// bound independently of those used by the rest of the RenderStep, then we can upload now
	// and remember the location for re-use on any RenderStep that does shading.
	uint32_t programID = ProgramCache::kInvalidProgramID;
	uint32_t shadingUniformID = UniformData::kInvalidUniformID;
	if (draw.fPaintParams.has_value()) {
	std::tie(programID, shadingUniformID) = get_ids_from_paint(recorder,
	draw.fPaintParams.value());
	}

	for (int stepIndex = 0; stepIndex < draw.fRenderer.numRenderSteps(); ++stepIndex) {
	const RenderStep* const step = draw.fRenderer.steps()[stepIndex];

	// TODO ask step to generate a pipeline description based on the above shading code, and
	// have pipelineIndex point to that description in the accumulated list of descs
	uint32_t pipelineIndex = 0;
	// TODO step writes out geometry uniforms and have geomIndex point to that buffer data,
	// providing shape, transform, scissor, and paint depth to RenderStep
	uint32_t geometryIndex = 0;

	uint32_t shadingIndex = UniformData::kInvalidUniformID;

	const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
	if (performsShading) {
	// TODO: we need to combine the 'programID' with the RenderPass info and the
	// geometric rendering method to get the true 'pipelineIndex'
	pipelineIndex = programID;
	shadingIndex = shadingUniformID;
	} else {
	// TODO: fill in 'pipelineIndex' for Chris' stencil/depth draws
	}

	keys.push_back({&draw, stepIndex, pipelineIndex, geometryIndex, shadingIndex});
	}

	passBounds.join(draw.fClip.drawBounds());
	requiresStencil \|= draw.fRenderer.requiresStencil();
	requiresMSAA \|= draw.fRenderer.requiresMSAA();
	}

	// TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
	// algorithms that approach O(n) in that condition may be favorable. Alternatively, could
	// explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
	// std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
	// vs. algorithms that require an extra O(n) storage.
	// TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
	// bugs in the DrawOrder determination code?
	std::sort(keys.begin(), keys.end());

	DrawBufferManager* bufferMgr = recorder->drawBufferManager();

	uint32_t lastPipeline = 0;
	uint32_t lastShadingUniforms = UniformData::kInvalidUniformID;
	uint32_t lastGeometryUniforms = 0;
	SkIRect lastScissor = SkIRect::MakeSize(target->dimensions());
	Buffer* lastBoundVertexBuffer = nullptr;
	Buffer* lastBoundIndexBuffer = nullptr;

	for (const SortKey& key : keys) {
	const DrawList::Draw& draw = *key.draw();
	int renderStep = key.renderStep();

	size_t vertexSize = draw.requiredVertexSpace(renderStep);
	size_t indexSize = draw.requiredIndexSpace(renderStep);
	auto [vertexWriter, vertexInfo] = bufferMgr->getVertexWriter(vertexSize);
	auto [indexWriter, indexInfo] = bufferMgr->getIndexWriter(indexSize);
	// TODO: handle the case where we fail to get a vertex or index writer besides asserting
	SkASSERT(!vertexSize \|\| (vertexWriter && vertexInfo.fBuffer));
	SkASSERT(!indexSize \|\| (indexWriter && indexInfo.fBuffer));
	draw.writeVertices(std::move(vertexWriter), std::move(indexWriter), renderStep);

	if (vertexSize) {
	if (lastBoundVertexBuffer != vertexInfo.fBuffer) {
	// TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
	}
	// TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
	// executes.
	}
	if (indexSize) {
	if (lastBoundIndexBuffer != indexInfo.fBuffer) {
	// TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
	}
	// TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
	// executes.
	}

	// TODO: Have the render step write out vertices and figure out what draw call function and
	// primitive type it uses. The vertex buffer binding/offset and draw params will be examined
	// to determine if the active draw can be updated to include the new vertices, or if it has
	// to be ended and a new one begun for this step. In addition to checking this state, must
	// also check if pipeline, uniform, scissor etc. would require the active draw to end.
	//
	// const RenderStep* const step = draw.fRenderer.steps()[key.renderStep()];

	if (key.pipeline() != lastPipeline) {
	// TODO: Look up pipeline description from key's index and record binding it
	lastPipeline = key.pipeline();
	lastShadingUniforms = UniformData::kInvalidUniformID;
	lastGeometryUniforms = 0;
	}
	if (key.geometryUniforms() != lastGeometryUniforms) {
	// TODO: Look up uniform buffer binding info corresponding to key's index and record it
	lastGeometryUniforms = key.geometryUniforms();
	}
	if (key.shadingUniforms() != lastShadingUniforms) {
	auto ud = lookup(recorder, key.shadingUniforms());

	auto [writer, bufferInfo] = bufferMgr->getUniformWriter(ud->dataSize());
	writer.write(ud->data(), ud->dataSize());
	// TODO: recording 'bufferInfo' somewhere to allow a later uniform bind call

	lastShadingUniforms = key.shadingUniforms();
	}

	if (draw.fClip.scissor() != lastScissor) {
	// TODO: Record new scissor rectangle
	}

	// TODO: Write vertex and index data for the draw step
	}

	// if (currentDraw) {
	// TODO: End the current draw if it has pending vertices
	// }

	passBounds.roundOut();
	SkIRect pxPassBounds = SkIRect::MakeLTRB((int) passBounds.left(), (int) passBounds.top(),
	(int) passBounds.right(), (int) passBounds.bot());
	return std::unique_ptr<DrawPass>(new DrawPass(std::move(target), pxPassBounds,
	requiresStencil, requiresMSAA));
	}

	void DrawPass::execute(CommandBuffer* buffer) const {
	// TODO
	}

	} // namespace skgpu