From c4ac05c82c49e678ca78147b2716e0a26b103f8d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 14:37:43 -0400 Subject: [PATCH 01/14] Implement Const Buffer Accessor --- src/video_core/CMakeLists.txt | 2 ++ src/video_core/const_buffer_accessor.cpp | 35 +++++++++++++++++++ src/video_core/const_buffer_accessor.h | 21 +++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 6 ++-- .../renderer_opengl/gl_rasterizer.h | 3 ++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 src/video_core/const_buffer_accessor.cpp create mode 100644 src/video_core/const_buffer_accessor.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 242a0d1cd7..804395d388 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,4 +1,6 @@ add_library(video_core STATIC + const_buffer_accessor.cpp + const_buffer_accessor.h dma_pusher.cpp dma_pusher.h debug_utils/debug_utils.cpp diff --git a/src/video_core/const_buffer_accessor.cpp b/src/video_core/const_buffer_accessor.cpp new file mode 100644 index 0000000000..c89ab91c74 --- /dev/null +++ b/src/video_core/const_buffer_accessor.cpp @@ -0,0 +1,35 @@ +#pragma once + +#include + +#include "common/common_types.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/const_buffer_accessor.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra { + +u32 ConstBufferAccessor::access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + u64 const_buffer, u64 offset) { + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset * 4), sizeof(u32)); + return result; +} + +u64 ConstBufferAccessor::access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + u64 const_buffer, u64 offset) { + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u64 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset * 4), sizeof(u64)); + return result; +} + +} // namespace Tegra diff --git a/src/video_core/const_buffer_accessor.h b/src/video_core/const_buffer_accessor.h new file mode 100644 index 0000000000..2410f14835 --- /dev/null +++ b/src/video_core/const_buffer_accessor.h @@ -0,0 +1,21 @@ +#pragma once + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" + +namespace Tegra { + +class ConstBufferAccessor { +public: + ConstBufferAccessor(Tegra::Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + ~ConstBufferAccessor() = default; + + u32 access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); + + u64 access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); + +private: + Tegra::Engines::Maxwell3D& maxwell3d; +}; + +} // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7ff1e67377..f2ac5382d9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,9 +98,11 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, + ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE), + const_buffer_accessor(system.GPU().Maxwell3D()) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 54fbf48aa5..886e9c2b5f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,6 +17,7 @@ #include #include "common/common_types.h" +#include "video_core/const_buffer_accessor.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" @@ -229,6 +230,8 @@ private: PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; + Tegra::ConstBufferAccessor const_buffer_accessor; + std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; From e28fd3d0a533695242d17350dd929ad3bb56c429 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 17:05:23 -0400 Subject: [PATCH 02/14] Implement Bindless Samplers and TEX_B in the IR. --- src/video_core/engines/shader_bytecode.h | 2 + .../renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/shader/decode/texture.cpp | 58 +++++++++++++++++-- src/video_core/shader/shader_ir.h | 31 +++++++--- 4 files changed, 77 insertions(+), 16 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7f613370b6..2edd3245e1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1309,6 +1309,7 @@ public: LDG, // Load from global memory STG, // Store in global memory TEX, + TEX_B, // Texture Load Bindless TXQ, // Texture Query TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations @@ -1577,6 +1578,7 @@ private: INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), + INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 8a43eb1576..6a95af6f65 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -328,7 +328,7 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn } entry.entries.samplers.emplace_back( static_cast(offset), static_cast(index), - static_cast(type), is_array != 0, is_shadow != 0); + static_cast(type), is_array != 0, is_shadow != 0, false); } u32 global_memory_count{}; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a775b402b8..23f2ad9997 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -57,6 +57,23 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); break; } + case OpCode::Id::TEX_B: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex.texture_type}; + const bool is_array = instr.tex.array != 0; + const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, true, instr.gpr20)); + break; + } case OpCode::Id::TEXS: { const TextureType texture_type{instr.texs.GetTextureType()}; const bool is_array{instr.texs.IsArrayTexture()}; @@ -250,10 +267,36 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu // Otherwise create a new mapping for this sampler const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow}; + const Sampler entry{offset, next_index, type, is_array, is_shadow, false}; return *used_samplers.emplace(entry).first; } +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + TextureType type, bool is_array, bool is_shadow) { + + const Node sampler_register = GetRegister(reg); + const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); + const auto cbuf = std::get_if(base_sampler); + const auto cbuf_offset_imm = std::get_if(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto cbuf_index = cbuf->GetIndex(); + const std::pair cbuf_pair = {cbuf_index, cbuf_offset}; + + // If this sampler has already been used, return the existing mapping. + if (used_bindless_samplers.count(cbuf_pair) > 0) { + const auto& sampler = used_bindless_samplers[cbuf_pair]; + ASSERT(sampler.GetType() == type && sampler.IsArray() == is_array && + sampler.IsShadow() == is_shadow); + return sampler; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_bindless_samplers.size(); + const Sampler entry{0, next_index, type, is_array, is_shadow, true}; + return (*used_bindless_samplers.emplace(std::make_pair(cbuf_pair, entry)).first).second; +} + void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { @@ -325,8 +368,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector aoffi) { + Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, bool is_bindless, + Register bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; @@ -334,7 +377,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = !is_bindless + ? GetSampler(instr.sampler, texture_type, is_array, is_shadow) + : GetBindlessSampler(bindless_reg, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -384,7 +429,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi) { + bool is_aoffi, bool is_bindless, Register bindless_reg) { const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; @@ -423,7 +468,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi, is_bindless, + bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 4888998d34..712dc3ddb3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -196,9 +196,12 @@ enum class ExitMethod { class Sampler { public: + Sampler() = default; explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_bindless{is_bindless} {} + + ~Sampler() = default; std::size_t GetOffset() const { return offset; @@ -233,6 +236,7 @@ private: Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; class ConstBuffer { @@ -730,6 +734,10 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + // Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -741,9 +749,11 @@ private: void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi); + Node4 GetTexCode( + Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, + Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array, + bool is_aoffi, bool is_bindless = false, + Tegra::Shader::Register bindless_reg = static_cast(0)); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -760,10 +770,12 @@ private: bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi); + + Node4 GetTextureCode( + Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, + Tegra::Shader::TextureProcessMode process_mode, std::vector coords, Node array, + Node depth_compare, u32 bias_offset, std::vector aoffi, bool is_bindless = false, + Tegra::Shader::Register bindless_reg = static_cast(0)); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -833,6 +845,7 @@ private: std::set used_output_attributes; std::map used_cbufs; std::set used_samplers; + std::map, Sampler> used_bindless_samplers; std::array used_clip_distances{}; std::set used_global_memory_bases; From fe392fff2425c10c9683a4058c779d352b9855ec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 17:56:16 -0400 Subject: [PATCH 03/14] Unify both sampler types. --- .../renderer_opengl/gl_shader_decompiler.h | 3 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 9 +++-- src/video_core/shader/decode/texture.cpp | 22 ++++++------ src/video_core/shader/shader_ir.h | 36 ++++++++++++++----- 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 4e04ab2f8e..9f7b7272e3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -58,6 +58,7 @@ private: struct ShaderEntries { std::vector const_buffers; std::vector samplers; + std::vector bindless_samplers; std::vector global_memory_entries; std::array clip_distances{}; std::size_t shader_length{}; @@ -68,4 +69,4 @@ std::string GetCommonDeclarations(); ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix); -} // namespace OpenGL::GLShader \ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 6a95af6f65..e277403832 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -319,16 +319,18 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn u32 type{}; u8 is_array{}; u8 is_shadow{}; + u8 is_bindless{}; if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || - file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { + file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { return {}; } entry.entries.samplers.emplace_back( static_cast(offset), static_cast(index), - static_cast(type), is_array != 0, is_shadow != 0, false); + static_cast(type), is_array != 0, is_shadow != 0, is_bindless != 0); } u32 global_memory_count{}; @@ -388,7 +390,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu file.WriteObject(static_cast(sampler.GetIndex())) != 1 || file.WriteObject(static_cast(sampler.GetType())) != 1 || file.WriteObject(static_cast(sampler.IsArray() ? 1 : 0)) != 1 || - file.WriteObject(static_cast(sampler.IsShadow() ? 1 : 0)) != 1) { + file.WriteObject(static_cast(sampler.IsShadow() ? 1 : 0)) != 1 || + file.WriteObject(static_cast(sampler.IsBindless() ? 1 : 0)) != 1) { return false; } } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 23f2ad9997..3ac04f6b75 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -267,7 +267,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu // Otherwise create a new mapping for this sampler const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow, false}; + const Sampler entry{offset, next_index, type, is_array, is_shadow}; return *used_samplers.emplace(entry).first; } @@ -281,20 +281,22 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); const auto cbuf_index = cbuf->GetIndex(); - const std::pair cbuf_pair = {cbuf_index, cbuf_offset}; + const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; // If this sampler has already been used, return the existing mapping. - if (used_bindless_samplers.count(cbuf_pair) > 0) { - const auto& sampler = used_bindless_samplers[cbuf_pair]; - ASSERT(sampler.GetType() == type && sampler.IsArray() == is_array && - sampler.IsShadow() == is_shadow); - return sampler; + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; } // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_bindless_samplers.size(); - const Sampler entry{0, next_index, type, is_array, is_shadow, true}; - return (*used_bindless_samplers.emplace(std::make_pair(cbuf_pair, entry)).first).second; + const std::size_t next_index = used_samplers.size(); + const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 712dc3ddb3..773c71fa53 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -196,12 +196,24 @@ enum class ExitMethod { class Sampler { public: - Sampler() = default; + // Use this constructor for binded Samplers + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + // Use this constructor for bindless Samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_bindless{true} {} + + // Use this only for serialization/deserialization explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_bindless{is_bindless} {} + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} - ~Sampler() = default; std::size_t GetOffset() const { return offset; @@ -223,6 +235,14 @@ public: return is_shadow; } + bool IsBindless() const { + return is_bindless; + } + + std::pair GetBindlessCBuf() { + return {offset >> 32, offset & 0x00000000FFFFFFFFULL}; + } + bool operator<(const Sampler& rhs) const { return std::tie(offset, index, type, is_array, is_shadow) < std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); @@ -234,8 +254,8 @@ private: std::size_t offset{}; std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; @@ -735,8 +755,9 @@ private: Tegra::Shader::TextureType type, bool is_array, bool is_shadow); // Accesses a texture sampler for a bindless texture. - const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow); + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, + Tegra::Shader::TextureType type, bool is_array, + bool is_shadow); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -845,7 +866,6 @@ private: std::set used_output_attributes; std::map used_cbufs; std::set used_samplers; - std::map, Sampler> used_bindless_samplers; std::array used_clip_distances{}; std::set used_global_memory_bases; From 7af82ca022fd6f02583e5686d5c69baf0b6a3611 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 18:18:54 -0400 Subject: [PATCH 04/14] Implement Bindless Handling on SetupTexture --- src/video_core/engines/maxwell_3d.cpp | 31 +++++++++++-------- src/video_core/engines/maxwell_3d.h | 4 +++ .../renderer_opengl/gl_rasterizer.cpp | 10 +++++- src/video_core/shader/shader_ir.h | 7 ++--- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed48..079132135f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -482,19 +482,8 @@ std::vector Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, - std::size_t offset) const { - auto& shader = state.shader_stages[static_cast(stage)]; - auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; - +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { Texture::FullTextureInfo tex_info{}; tex_info.index = static_cast(offset); @@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, return tex_info; } +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, + std::size_t offset) const { + auto& shader = state.shader_stages[static_cast(stage)]; + auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; + ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); + + const GPUVAddr tex_info_address = + tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); + + ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; + + return GetTextureInfo(tex_handle, offset); +} + u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 321af32974..fd2c35a01c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1131,6 +1131,10 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + /// Returns a list of enabled textures for the specified shader stage. std::vector GetStageTextures(Regs::ShaderStage stage) const; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f2ac5382d9..a20acfe8e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -985,7 +985,15 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + Tegra::Texture::FullTextureInfo texture; + if (!entry.IsBindless()) { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } else { + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = const_buffer_accessor.access32(stage, cbuf.first, cbuf.second); + texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } const u32 current_bindpoint = base_bindings.sampler + bindpoint; texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 773c71fa53..ed321cfe5d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -205,8 +205,8 @@ public: // Use this constructor for bindless Samplers explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_bindless{true} {} + : offset{(static_cast(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} // Use this only for serialization/deserialization explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, @@ -214,7 +214,6 @@ public: : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_bindless{is_bindless} {} - std::size_t GetOffset() const { return offset; } @@ -239,7 +238,7 @@ public: return is_bindless; } - std::pair GetBindlessCBuf() { + std::pair GetBindlessCBuf() const { return {offset >> 32, offset & 0x00000000FFFFFFFFULL}; } From 90d06acfedb4c58b7b62153059c97b05035fc979 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 18:46:46 -0400 Subject: [PATCH 05/14] Fixes to Const Buffer Accessor and Formatting --- src/video_core/const_buffer_accessor.cpp | 12 ++++++++---- src/video_core/const_buffer_accessor.h | 5 +---- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 +-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/video_core/const_buffer_accessor.cpp b/src/video_core/const_buffer_accessor.cpp index c89ab91c74..0e613e1f92 100644 --- a/src/video_core/const_buffer_accessor.cpp +++ b/src/video_core/const_buffer_accessor.cpp @@ -14,21 +14,25 @@ namespace Tegra { u32 ConstBufferAccessor::access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset) { - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + auto& gpu = Core::System::GetInstance().GPU(); + auto& memory_manager = gpu.MemoryManager(); + auto& maxwell3d = gpu.Maxwell3D(); const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset * 4), sizeof(u32)); + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); return result; } u64 ConstBufferAccessor::access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset) { - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + auto& gpu = Core::System::GetInstance().GPU(); + auto& memory_manager = gpu.MemoryManager(); + auto& maxwell3d = gpu.Maxwell3D(); const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u64 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset * 4), sizeof(u64)); + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u64)); return result; } diff --git a/src/video_core/const_buffer_accessor.h b/src/video_core/const_buffer_accessor.h index 2410f14835..37d1ca7676 100644 --- a/src/video_core/const_buffer_accessor.h +++ b/src/video_core/const_buffer_accessor.h @@ -7,15 +7,12 @@ namespace Tegra { class ConstBufferAccessor { public: - ConstBufferAccessor(Tegra::Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + ConstBufferAccessor() {} ~ConstBufferAccessor() = default; u32 access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); u64 access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); - -private: - Tegra::Engines::Maxwell3D& maxwell3d; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a20acfe8e1..e847b75fb5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -101,8 +101,7 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE), - const_buffer_accessor(system.GPU().Maxwell3D()) { + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE), const_buffer_accessor() { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); From ac3ba9a33e0d1e14061fc0f341b69cb85ea2e6a6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 19:02:24 -0400 Subject: [PATCH 06/14] Corrections to TEX_B --- src/video_core/engines/shader_bytecode.h | 32 ++++++++++++++++++++++++ src/video_core/shader/decode/texture.cpp | 9 ++++--- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2edd3245e1..71c22aff00 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -966,6 +966,38 @@ union Instruction { } } tex; + union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<36, 1, u64> aoffi_flag; + BitField<37, 3, TextureProcessMode> process_mode; + + bool IsComponentEnabled(std::size_t component) const { + return ((1ull << component) & component_mask) != 0; + } + + TextureProcessMode GetTextureProcessMode() const { + return process_mode; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } + } tex_b; + union { BitField<22, 6, TextureQueryType> query_type; BitField<31, 4, u64> component_mask; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 3ac04f6b75..300f1abad3 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -65,10 +65,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); + const TextureType texture_type{instr.tex_b.texture_type}; + const bool is_array = instr.tex_b.array != 0; + const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex_b.GetTextureProcessMode(); WriteTexInstructionFloat(bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, true, instr.gpr20)); @@ -462,6 +462,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } + const u32 bindless_offset = (is_bindless ? 1 : 0); Node dc{}; if (depth_compare) { From 189bd1980cc405d512eb8b19307df76976344e6e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 19:13:53 -0400 Subject: [PATCH 07/14] Implement TMML_B --- src/video_core/shader/decode/texture.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 300f1abad3..ddb7755b83 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - + bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { @@ -185,6 +185,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML_B: + is_bindless = true; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -195,7 +197,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = !is_bindless + ? GetSampler(instr.sampler, texture_type, is_array, false) + : GetBindlessSampler(instr.gpr20, texture_type, is_array, false); std::vector coords; @@ -271,11 +275,12 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, - TextureType type, bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, + bool is_array, bool is_shadow) { const Node sampler_register = GetRegister(reg); - const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); + const Node base_sampler = + TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); const auto cbuf = std::get_if(base_sampler); const auto cbuf_offset_imm = std::get_if(cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); From 4841440382c72047d68bb2c0ce7a7defadab7d3d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 26 Mar 2019 19:46:11 -0400 Subject: [PATCH 08/14] Implement TXQ_B --- src/video_core/engines/shader_bytecode.h | 2 ++ src/video_core/shader/decode/texture.cpp | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 71c22aff00..f7ef9a32ac 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1343,6 +1343,7 @@ public: TEX, TEX_B, // Texture Load Bindless TXQ, // Texture Query + TXQ_B, // Texture Query Bindless TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations TLD4, // Texture Load 4 @@ -1612,6 +1613,7 @@ private: INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), + INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index ddb7755b83..3eac75bef6 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -151,6 +151,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexsInstructionFloat(bb, instr, values); break; } + case OpCode::Id::TXQ_B: + is_bindless = true; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -160,7 +162,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + !is_bindless + ? GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false) + : GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -171,7 +176,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + Operation(OperationCode::TextureQueryDimensions, meta, + GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { From fd4e994de3196dfdd2a3f2caf4ca8934e719c296 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 27 Mar 2019 07:11:50 -0400 Subject: [PATCH 09/14] Refactor GetTextureCode and GetTexCode to use an optional instead of optional parameters --- src/video_core/shader/decode/texture.cpp | 47 ++++++++++++------------ src/video_core/shader/shader_ir.h | 20 +++++----- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 3eac75bef6..5d670b24e2 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -54,7 +54,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); break; } case OpCode::Id::TEX_B: { @@ -69,9 +69,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const bool is_array = instr.tex_b.array != 0; const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, true, instr.gpr20)); + WriteTexInstructionFloat( + bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -162,10 +162,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - !is_bindless - ? GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false) - : GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false); + is_bindless + ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false) + : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -203,9 +203,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = !is_bindless - ? GetSampler(instr.sampler, texture_type, is_array, false) - : GetBindlessSampler(instr.gpr20, texture_type, is_array, false); + const auto& sampler = is_bindless + ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) + : GetSampler(instr.sampler, texture_type, is_array, false); std::vector coords; @@ -381,25 +381,26 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, bool is_bindless, - Register bindless_reg) { + Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, std::optional bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; + const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = !is_bindless - ? GetSampler(instr.sampler, texture_type, is_array, is_shadow) - : GetBindlessSampler(bindless_reg, texture_type, is_array, is_shadow); + const auto& sampler = is_bindless + ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) + : GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not + // supported in GL for sampler2DArrayShadow and + // samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -417,8 +418,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = Immediate(0.0f); break; case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers + // If present, lod or bias are always stored in the register + // indexed by the gpr20 field with an offset depending on the + // usage of the other registers bias = GetRegister(instr.gpr20.Value() + bias_offset); break; case TextureProcessMode::LL: @@ -442,7 +444,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, bool is_bindless, Register bindless_reg) { + bool is_aoffi, std::optional bindless_reg) { const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; @@ -482,8 +484,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi, is_bindless, - bindless_reg); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ed321cfe5d..11495799ff 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -769,11 +769,10 @@ private: void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - Node4 GetTexCode( - Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, bool is_bindless = false, - Tegra::Shader::Register bindless_reg = static_cast(0)); + Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, + Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, + bool is_array, bool is_aoffi, + std::optional bindless_reg); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -790,12 +789,11 @@ private: bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - Node4 GetTextureCode( - Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, Node array, - Node depth_compare, u32 bias_offset, std::vector aoffi, bool is_bindless = false, - Tegra::Shader::Register bindless_reg = static_cast(0)); + + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, + Tegra::Shader::TextureProcessMode process_mode, std::vector coords, + Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, + std::optional bindless_reg); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); From a77e9a27b0e82192cd17eea255e6b04893ccafa7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 28 Mar 2019 14:54:52 -0400 Subject: [PATCH 10/14] Simplify ConstBufferAccessor --- src/video_core/CMakeLists.txt | 1 - src/video_core/const_buffer_accessor.cpp | 39 ------------------- src/video_core/const_buffer_accessor.h | 26 +++++++++---- .../renderer_opengl/gl_rasterizer.cpp | 6 ++- .../renderer_opengl/gl_rasterizer.h | 3 -- 5 files changed, 22 insertions(+), 53 deletions(-) delete mode 100644 src/video_core/const_buffer_accessor.cpp diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 804395d388..c58f51f18d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(video_core STATIC - const_buffer_accessor.cpp const_buffer_accessor.h dma_pusher.cpp dma_pusher.h diff --git a/src/video_core/const_buffer_accessor.cpp b/src/video_core/const_buffer_accessor.cpp deleted file mode 100644 index 0e613e1f92..0000000000 --- a/src/video_core/const_buffer_accessor.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "core/memory.h" -#include "video_core/const_buffer_accessor.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" - -namespace Tegra { - -u32 ConstBufferAccessor::access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - u64 const_buffer, u64 offset) { - auto& gpu = Core::System::GetInstance().GPU(); - auto& memory_manager = gpu.MemoryManager(); - auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); - return result; -} - -u64 ConstBufferAccessor::access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - u64 const_buffer, u64 offset) { - auto& gpu = Core::System::GetInstance().GPU(); - auto& memory_manager = gpu.MemoryManager(); - auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - u64 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u64)); - return result; -} - -} // namespace Tegra diff --git a/src/video_core/const_buffer_accessor.h b/src/video_core/const_buffer_accessor.h index 37d1ca7676..01524673b2 100644 --- a/src/video_core/const_buffer_accessor.h +++ b/src/video_core/const_buffer_accessor.h @@ -1,18 +1,28 @@ #pragma once +#include + #include "common/common_types.h" +#include "core/core.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" namespace Tegra { -class ConstBufferAccessor { -public: - ConstBufferAccessor() {} - ~ConstBufferAccessor() = default; +namespace ConstBufferAccessor { - u32 access32(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); - - u64 access64(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset); -}; +template +T access(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset) { + auto& gpu = Core::System::GetInstance().GPU(); + auto& memory_manager = gpu.MemoryManager(); + auto& maxwell3d = gpu.Maxwell3D(); + const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + T result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(T)); + return result; +} +} // namespace ConstBufferAccessor } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e847b75fb5..30cad484a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/settings.h" +#include "video_core/const_buffer_accessor.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -101,7 +102,7 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE), const_buffer_accessor() { + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -990,7 +991,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s } else { const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = const_buffer_accessor.access32(stage, cbuf.first, cbuf.second); + tex_handle.raw = + Tegra::ConstBufferAccessor::access(stage, cbuf.first, cbuf.second); texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); } const u32 current_bindpoint = base_bindings.sampler + bindpoint; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 886e9c2b5f..54fbf48aa5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,7 +17,6 @@ #include #include "common/common_types.h" -#include "video_core/const_buffer_accessor.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" @@ -230,8 +229,6 @@ private: PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; - Tegra::ConstBufferAccessor const_buffer_accessor; - std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; From c60b0b8432953fbbd7434578fd0858073b908392 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 28 Mar 2019 19:45:19 -0400 Subject: [PATCH 11/14] Fix TMML --- src/video_core/shader/decode/texture.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 5d670b24e2..99385c46ea 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -226,17 +226,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + 1)); texture_type = TextureType::Texture2D; } - + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } auto params = coords; MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); + SetTemporal(bb, indexer++, value); } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - break; } case OpCode::Id::TLDS: { From 797e351bf816f86f689ee2704eb95c63411d4002 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Apr 2019 19:48:16 -0400 Subject: [PATCH 12/14] Fix bad rebase --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 30cad484a7..ed1e97a738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -99,8 +99,7 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, - ScreenInfo& info) +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects From 492040bd9ce40f86f9845699d68104d31d272155 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 7 Apr 2019 08:30:26 -0400 Subject: [PATCH 13/14] Move ConstBufferAccessor to Maxwell3d, correct mistakes and clang format. --- src/video_core/CMakeLists.txt | 1 - src/video_core/const_buffer_accessor.h | 28 ------------------- src/video_core/engines/maxwell_3d.cpp | 12 ++++++-- src/video_core/engines/maxwell_3d.h | 2 ++ src/video_core/engines/shader_bytecode.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 10 +++---- .../renderer_opengl/gl_shader_disk_cache.cpp | 7 +++-- src/video_core/shader/decode/texture.cpp | 3 +- src/video_core/shader/shader_ir.h | 4 +-- 9 files changed, 25 insertions(+), 44 deletions(-) delete mode 100644 src/video_core/const_buffer_accessor.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c58f51f18d..242a0d1cd7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(video_core STATIC - const_buffer_accessor.h dma_pusher.cpp dma_pusher.h debug_utils/debug_utils.cpp diff --git a/src/video_core/const_buffer_accessor.h b/src/video_core/const_buffer_accessor.h deleted file mode 100644 index 01524673b2..0000000000 --- a/src/video_core/const_buffer_accessor.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" - -namespace Tegra { - -namespace ConstBufferAccessor { - -template -T access(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u64 const_buffer, u64 offset) { - auto& gpu = Core::System::GetInstance().GPU(); - auto& memory_manager = gpu.MemoryManager(); - auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - T result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(T)); - return result; -} - -} // namespace ConstBufferAccessor -} // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 079132135f..b198793bc3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -502,8 +502,8 @@ Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const { - auto& shader = state.shader_stages[static_cast(stage)]; - auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; + const auto& shader = state.shader_stages[static_cast(stage)]; + const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); const GPUVAddr tex_info_address = @@ -529,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } +u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { + const auto& shader_stage = state.shader_stages[static_cast(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fd2c35a01c..cc2424d389 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1141,6 +1141,8 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index f7ef9a32ac..a7ef5da9a0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -976,7 +976,7 @@ union Instruction { BitField<37, 3, TextureProcessMode> process_mode; bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + return ((1ULL << component) & component_mask) != 0; } TextureProcessMode GetTextureProcessMode() const { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ed1e97a738..6f3bcccec4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -19,7 +19,6 @@ #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/settings.h" -#include "video_core/const_buffer_accessor.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -985,14 +984,13 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; Tegra::Texture::FullTextureInfo texture; - if (!entry.IsBindless()) { - texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); - } else { + if (entry.IsBindless()) { const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = - Tegra::ConstBufferAccessor::access(stage, cbuf.first, cbuf.second); + tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } else { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); } const u32 current_bindpoint = base_bindings.sampler + bindpoint; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index e277403832..08603b7a59 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -328,9 +328,10 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.samplers.emplace_back( - static_cast(offset), static_cast(index), - static_cast(type), is_array != 0, is_shadow != 0, is_bindless != 0); + entry.entries.samplers.emplace_back(static_cast(offset), + static_cast(index), + static_cast(type), + is_array != 0, is_shadow != 0, is_bindless != 0); } u32 global_memory_count{}; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 99385c46ea..dd5310c367 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -153,6 +153,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } case OpCode::Id::TXQ_B: is_bindless = true; + [[fallthrough]]; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -193,6 +194,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } case OpCode::Id::TMML_B: is_bindless = true; + [[fallthrough]]; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -285,7 +287,6 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, bool is_array, bool is_shadow) { - const Node sampler_register = GetRegister(reg); const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 11495799ff..2490241671 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -196,7 +196,7 @@ enum class ExitMethod { class Sampler { public: - // Use this constructor for binded Samplers + // Use this constructor for bounded Samplers explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, @@ -239,7 +239,7 @@ public: } std::pair GetBindlessCBuf() const { - return {offset >> 32, offset & 0x00000000FFFFFFFFULL}; + return {static_cast(offset >> 32), static_cast(offset)}; } bool operator<(const Sampler& rhs) const { From ef8be408d321f4f15f0731c46118834bb757be1a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 8 Apr 2019 12:07:56 -0400 Subject: [PATCH 14/14] Adapt Bindless to work with AOFFI --- src/video_core/shader/decode/texture.cpp | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index dd5310c367..fa65ac9a9d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -67,11 +67,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const TextureType texture_type{instr.tex_b.texture_type}; const bool is_array = instr.tex_b.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, {instr.gpr20})); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, is_aoffi, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -384,7 +385,9 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, std::optional bindless_reg) { + Node array, Node depth_compare, u32 bias_offset, + std::vector aoffi, + std::optional bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; const bool is_bindless = bindless_reg.has_value(); @@ -451,7 +454,14 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + const bool is_bindless = bindless_reg.has_value(); + u64 parameter_register = instr.gpr20.Value(); + if (is_bindless) { + ++parameter_register; + } + + const u32 bias_lod_offset = (is_bindless ? 1 : 0); if (lod_bias_enabled) { ++parameter_register; } @@ -478,7 +488,6 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } - const u32 bindless_offset = (is_bindless ? 1 : 0); Node dc{}; if (depth_compare) { @@ -487,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi, bindless_reg); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, + aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -523,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, + {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,