gl_device: Reserve base bindings on limited devices

SSBOs and other resources are limited per pipeline on Intel and AMD.
Heuristically reserve resources per stage having in mind the reported
OpenGL limits.
This commit is contained in:
ReinUsesLisp 2019-11-22 21:22:01 -03:00
parent e3d7334be9
commit dc2e83fa31
No known key found for this signature in database
GPG Key ID: 2DFC508897B39CFE

View File

@ -5,7 +5,9 @@
#include <algorithm>
#include <array>
#include <cstddef>
#include <optional>
#include <vector>
#include <glad/glad.h>
#include "common/logging/log.h"
@ -20,6 +22,27 @@ namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
constexpr std::array LimitSSBOs = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
constexpr std::array LimitSamplers = {
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS};
constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
@ -51,54 +74,71 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
return std::find(images.begin(), images.end(), extension) != images.end();
}
constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) {
return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer,
lhs.shader_storage_buffer + rhs.shader_storage_buffer,
lhs.sampler + rhs.sampler, lhs.image + rhs.image};
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
ASSERT(num >= amount);
if (limit) {
amount = std::min(amount, GetInteger<u32>(*limit));
}
num -= amount;
return std::exchange(base, base + amount);
}
Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks,
GLenum texture_image_units, GLenum image_uniforms) noexcept {
return Device::BaseBindings{
GetInteger<u32>(uniform_blocks) - ReservedUniformBlocks,
GetInteger<u32>(shader_storage_blocks),
GetInteger<u32>(texture_image_units),
GetInteger<u32>(image_uniforms),
};
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4};
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
u32 num_ubos = total_ubos - ReservedUniformBlocks;
u32 num_ssbos = total_ssbos;
u32 num_samplers = total_samplers;
u32 base_ubo = ReservedUniformBlocks;
u32 base_ssbo = 0;
u32 base_samplers = 0;
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
// Reserve more image bindings on fragment and vertex stages.
bindings[4].image =
Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
bindings[0].image =
Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
// Reserve the other image bindings.
const u32 total_extracted_images = num_images / (NumStages - 2);
for (std::size_t i = 2; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
}
// Compute doesn't care about any of this.
bindings[5] = {0, 0, 0, 0};
return bindings;
}
} // Anonymous namespace
Device::Device() {
Device::Device() : base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_intel = vendor == "Intel";
// Reserve the first UBO for emulation bindings
base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0};
base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS,
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_VERTEX_IMAGE_UNIFORMS);
base_bindings[2] =
base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS);
base_bindings[3] =
base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS);
base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_IMAGE_UNIFORMS);
// Compute doesn't need any of that
base_bindings[5] = BaseBindings{0, 0, 0, 0};
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);