blob: 3413f07c27e855a3df50c45207217e9d084433f8 [file] [log] [blame]
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// OverlayCull.comp: Cull overlay widgets. A maximum of 32 text widgets and 32 graph widgets is
// supported simultaneously.
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
#if SupportsBallot
#extension GL_KHR_shader_subgroup_ballot: require
#elif SupportsArithmetic
#extension GL_KHR_shader_subgroup_arithmetic: require
#if Is8x4
#define BLOCK_WIDTH 8
#define BLOCK_HEIGHT 4
#elif Is8x8
#define BLOCK_WIDTH 8
#define BLOCK_HEIGHT 8
#error "Not all subgroup sizes are accounted for"
// Limits:
layout (local_size_x = WORKGROUP_WIDTH, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0, rgba32ui) uniform writeonly uimage2D culledWidgetsOut;
layout (set = 0, binding = 1) uniform WidgetCoordinates
#if SupportsNone
shared uint intersectingWidgets[32];
void accumulateWidgets(const uint localId)
// Note: no barriers needed as the workgroup size is the same as hardware subgroup size.
if (localId < 16)
intersectingWidgets[localId] |= intersectingWidgets[localId + 16];
if (localId < 8)
intersectingWidgets[localId] |= intersectingWidgets[localId + 8];
if (localId < 4)
intersectingWidgets[localId] |= intersectingWidgets[localId + 4];
if (localId < 2)
intersectingWidgets[localId] |= intersectingWidgets[localId + 2];
if (localId < 1)
intersectingWidgets[localId] |= intersectingWidgets[localId + 1];
uvec2 cullWidgets(const uint offset, const uvec2 blockCoordLow, const uvec2 blockCoordHigh)
const uint localId = gl_LocalInvocationID.x;
const uvec4 widgetCoords = coordinates[offset + localId];
const bool intersects = widgetCoords.x < widgetCoords.z &&
all(lessThan(widgetCoords.xy, blockCoordHigh)) &&
all(greaterThanEqual(, blockCoordLow));
#if SupportsBallot
return subgroupBallot(intersects).xy;
#elif SupportsArithmetic
#if Is8x8
const uint textWidgetBit =
localId < MAX_TEXT_WIDGETS ? uint(intersects) << localId : 0;
const uint graphWidgetBit =
localId >= MAX_TEXT_WIDGETS ? uint(intersects) << (localId - MAX_TEXT_WIDGETS) : 0;
return uvec2(subgroupOr(textWidgetBit), subgroupOr(graphWidgetBit));
#elif Is8x4
return uvec2(subgroupOr(uint(intersects) << localId), 0);
#error "Not all subgroup sizes are accounted for"
#elif SupportsNone
uvec2 ballot = uvec2(0, 0);
#if Is8x8
if (localId < MAX_TEXT_WIDGETS)
intersectingWidgets[localId] = uint(intersects) << localId;
if (localId == 0)
ballot.x = intersectingWidgets[0];
const uint graphLocalId = localId - MAX_TEXT_WIDGETS;
intersectingWidgets[graphLocalId] = uint(intersects) << graphLocalId;
if (localId == 0)
ballot.y = intersectingWidgets[0];
#elif Is8x4
intersectingWidgets[localId] = uint(intersects) << localId;
if (localId == 0)
ballot.x = intersectingWidgets[0];
#error "Not all subgroup sizes are accounted for"
return ballot;
#error "Not all subgroup operations are accounted for"
void main()
// There is one workgroup invocation per pixel in culledWidgetsOut. Depending on the subgroup
// size, either all widgets and graphs are processed simultaneously (subgroup size 64) or
// separately (subgroup size 32).
const uvec2 outCoord = gl_WorkGroupID.xy;
const uvec2 blockCoordLow = outCoord * uvec2(BLOCK_WIDTH, BLOCK_HEIGHT);
const uvec2 blockCoordHigh = blockCoordLow + uvec2(BLOCK_WIDTH, BLOCK_HEIGHT);
uvec2 culledWidgets;
#if Is8x8
culledWidgets = cullWidgets(0, blockCoordLow, blockCoordHigh);
#elif Is8x4
culledWidgets.x = cullWidgets(0, blockCoordLow, blockCoordHigh).x;
culledWidgets.y = cullWidgets(MAX_TEXT_WIDGETS, blockCoordLow, blockCoordHigh).x;
#error "Not all subgroup sizes are accounted for"
if (gl_LocalInvocationID.x == 0)
imageStore(culledWidgetsOut, ivec2(outCoord), uvec4(culledWidgets, 0, 0));