From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: [PATCH] shader: Initial OpenGL implementation --- .../frontend/ir/ir_emitter.cpp | 4 + .../frontend/ir/ir_emitter.h | 1 + .../translate/impl/move_special_register.cpp | 7 + src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- .../renderer_opengl/gl_buffer_cache.cpp | 37 ++- .../renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++ .../renderer_opengl/gl_compute_program.h | 83 +++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------ src/video_core/renderer_opengl/gl_device.h | 16 - .../renderer_opengl/gl_graphics_program.cpp | 296 ++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 +++++++ .../renderer_opengl/gl_rasterizer.cpp | 23 +- .../renderer_opengl/gl_shader_cache.cpp | 275 +++++++++++++++- .../renderer_opengl/gl_shader_cache.h | 98 +++--- .../renderer_opengl/gl_shader_manager.cpp | 146 --------- .../renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 +++++---------- .../renderer_opengl/gl_texture_cache.h | 29 +- .../renderer_opengl/maxwell_to_gl.h | 108 +++++++ .../renderer_opengl/renderer_opengl.cpp | 17 +- .../renderer_opengl/renderer_opengl.h | 5 +- .../renderer_opengl/util_shaders.cpp | 13 +- .../renderer_vulkan/pipeline_helper.h | 17 - .../renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- .../renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 + src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 - src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + .../vulkan_common/vulkan_device.cpp | 2 +- 38 files changed, 1427 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a6..5913fdeffa 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d3..a12919283f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74be..01fb6f5e52 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f51..b008c37c0c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61df..6c92e4c301 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60e..2d0ef1307d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97a..4986c65fd4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 0000000000..d5ef654392 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 0000000000..64a75d44d2 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e70..18bbc4c1f1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767a..152a3acd31 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 0000000000..fd09587198 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 0000000000..5adf3f41e5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863f..e527b76ba3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40c..c9ca1f005f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17cb..b479d073ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6e..399959afbc 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74d..70781d6f5f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); - - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf842183..7053be1617 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e6503..2e3e02b795 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f3708..672f94bfc3 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6b..4e77ef8086 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f83322..b3ee556653 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be3935..51e72b705f 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c56..168ffa7e94 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8f..e5f54a84f9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b56..30b71bdbc9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f72..ef14e91e72 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5c..78bf90c48f 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc848..136fe294cb 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc00975..c93174519d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d7120459..d26dbfaab6 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccde..249cc4d0fd 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a484139835..c6cf0583fc 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335ec..1a93994555 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda8..e27a2b51e1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false,