From 6233b1db089686e1ea0dde68278e9086981d6f76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 1 Nov 2019 00:34:38 -0300 Subject: [PATCH] shader_ir/memory: Implement patch stores --- src/video_core/engines/shader_bytecode.h | 3 ++- src/video_core/shader/decode/memory.cpp | 34 +++++++++++++----------- src/video_core/shader/node.h | 20 +++++++++++--- src/video_core/shader/track.cpp | 1 + 4 files changed, 38 insertions(+), 20 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8b7dcbe9d5..7703a76a36 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -98,10 +98,11 @@ union Attribute { BitField<20, 10, u64> immediate; BitField<22, 2, u64> element; BitField<24, 6, Index> index; + BitField<31, 1, u64> patch; BitField<47, 3, AttributeSize> size; bool IsPhysical() const { - return element == 0 && static_cast(index.Value()) == 0; + return patch == 0 && element == 0 && static_cast(index.Value()) == 0; } } fmt20; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 335d781462..78e92f52e7 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -21,6 +21,7 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Register; namespace { + u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { case Tegra::Shader::UniformType::Single: @@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { return 1; } } + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast(instr.attribute.fmt20.index.Value()); + u64 element = instr.attribute.fmt20.element; + auto index = static_cast(instr.attribute.fmt20.index.Value()); - const auto StoreNextElement = [&](u32 reg_offset) { - const auto dest = GetOutputAttribute(static_cast(next_index), - next_element, GetRegister(instr.gpr39)); + const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + Node dest; + if (instr.attribute.fmt20.patch) { + const u32 offset = static_cast(index) * 4 + static_cast(element); + dest = MakeNode(offset); + } else { + dest = GetOutputAttribute(static_cast(index), element, + GetRegister(instr.gpr39)); + } const auto src = GetRegister(instr.gpr0.Value() + reg_offset); bb.push_back(Operation(OperationCode::Assign, dest, src)); - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - StoreNextElement(reg_offset); + // Load the next attribute element into the following register. If the element to load + // goes beyond the vec4 size, load the first element of the next attribute. + element = (element + 1) % 4; + index = index + (element == 0 ? 1 : 0); } - break; } case OpCode::Id::ST_L: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b2576bdd65..8f4811c889 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -213,13 +213,14 @@ class PredicateNode; class AbufNode; class CbufNode; class LmemNode; +class PatchNode; class SmemNode; class GmemNode; class CommentNode; -using NodeData = - std::variant; +using NodeData = std::variant; using Node = std::shared_ptr; using Node4 = std::array; using NodeBlock = std::vector; @@ -542,6 +543,19 @@ private: u32 element{}; }; +/// Patch memory (used to communicate tessellation stages). +class PatchNode final { +public: + explicit PatchNode(u32 offset) : offset{offset} {} + + u32 GetOffset() const { + return offset; + } + +private: + u32 offset{}; +}; + /// Constant buffer node, usually mapped to uniform buffers in GLSL class CbufNode final { public: diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 55f5949e4b..165c793308 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -7,6 +7,7 @@ #include #include "common/common_types.h" +#include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader {