shader: Implement VertexA stage

2024-07-15 19:31:07 +02:00 · 2021-04-19 01:03:38 +02:00 · 2021-04-19 01:03:38 +02:00 · b541f5e5e3
commit b541f5e5e3
parent da936d6ad8
12 changed files with 180 additions and 3 deletions
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC
    ir_opt/collect_shader_info_pass.cpp
    ir_opt/constant_propagation_pass.cpp
    ir_opt/dead_code_elimination_pass.cpp
+    ir_opt/dual_vertex_pass.cpp
    ir_opt/global_memory_to_storage_buffer_pass.cpp
    ir_opt/identity_removal_pass.cpp
    ir_opt/lower_fp16_to_fp32.cpp
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal
 void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
 void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
+void EmitJoin(EmitContext& ctx);
 void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
 void EmitBarrier(EmitContext& ctx);
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) {
    ctx.OpReturn();
 }

+void EmitJoin(EmitContext&) {
+    throw NotImplementedException("Join shouldn't be emitted");
+}
+
 void EmitUnreachable(EmitContext& ctx) {
    ctx.OpUnreachable();
 }
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
    case Opcode::LoopMerge:
    case Opcode::SelectionMerge:
    case Opcode::Return:
+    case Opcode::Join:
    case Opcode::Unreachable:
    case Opcode::DemoteToHelperInvocation:
    case Opcode::Barrier:
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@ -13,6 +13,7 @@ OPCODE(BranchConditional,                                   Void,           U1,
 OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
 OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
 OPCODE(Return,                                              Void,                                                                                           )
+OPCODE(Join,                                                Void,                                                                                           )
 OPCODE(Unreachable,                                         Void,                                                                                           )
 OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )

--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
    return program;
 }

+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                    Environment& env2) {
+    IR::Program program{};
+    Optimization::VertexATransformPass(vertex_a);
+    Optimization::VertexBTransformPass(vertex_b);
+    program.blocks.swap(vertex_a.blocks);
+    for (IR::Block* block : vertex_b.blocks) {
+        program.blocks.push_back(block);
+    }
+    program.stage = Stage::VertexB;
+    program.info = vertex_a.info;
+    program.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+
+    for (size_t index = 0; index < 32; index++) {
+        program.info.input_generics[index].used |= vertex_b.info.input_generics[index].used;
+        program.info.stores_generics[index] |= vertex_b.info.stores_generics[index];
+    }
+    Optimization::JoinTextureInfo(program.info, vertex_b.info);
+    Optimization::JoinStorageInfo(program.info, vertex_b.info);
+    Optimization::DualVertexJoinPass(program);
+    program.post_order_blocks = PostOrder(program.blocks);
+    Optimization::DeadCodeEliminationPass(program);
+    Optimization::IdentityRemovalPass(program);
+    Optimization::VerificationPass(program);
+    Optimization::CollectShaderInfoPass(env2, program);
+    return program;
+}
+
 } // namespace Shader::Maxwell
--- a/src/shader_recompiler/frontend/maxwell/program.h
+++ b/src/shader_recompiler/frontend/maxwell/program.h
@ -21,4 +21,6 @@ namespace Shader::Maxwell {
                                           ObjectPool<IR::Block>& block_pool, Environment& env,
                                           Flow::CFG& cfg);

+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                                  Environment& env_vertex_b);
 } // namespace Shader::Maxwell
--- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <ranges>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void VertexATransformPass(IR::Program& program) {
+    bool replaced_join{};
+    bool eliminated_epilogue{};
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            switch (inst.GetOpcode()) {
+            case IR::Opcode::Return:
+                inst.ReplaceOpcode(IR::Opcode::Join);
+                replaced_join = true;
+                break;
+            case IR::Opcode::Epilogue:
+                inst.Invalidate();
+                eliminated_epilogue = true;
+                break;
+            default:
+                break;
+            }
+            if (replaced_join && eliminated_epilogue) {
+                return;
+            }
+        }
+    }
+}
+
+void VertexBTransformPass(IR::Program& program) {
+    for (IR::Block* const block : program.post_order_blocks | std::views::reverse) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Prologue) {
+                return inst.Invalidate();
+            }
+        }
+    }
+}
+
+void DualVertexJoinPass(IR::Program& program) {
+    const auto& blocks = program.blocks;
+    s64 s = static_cast<s64>(blocks.size()) - 1;
+    if (s < 1) {
+        throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!");
+    }
+    for (s64 index = 0; index < s; index++) {
+        IR::Block* const current_block = blocks[index];
+        IR::Block* const next_block = blocks[index + 1];
+        for (IR::Inst& inst : current_block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Join) {
+                IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)};
+                ir.Branch(next_block);
+                inst.Invalidate();
+                // only 1 join should exist
+                return;
+            }
+        }
+    }
+    throw NotImplementedException("Dual Vertex Join pass failed, no join present!");
+}
+
+} // namespace Shader::Optimization
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
    }
 }

+template <typename Descriptors, typename Descriptor, typename Func>
+static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+    // TODO: Handle arrays
+    const auto it{std::ranges::find_if(descriptors, pred)};
+    if (it != descriptors.end()) {
+        return static_cast<u32>(std::distance(descriptors.begin(), it));
+    }
+    descriptors.push_back(desc);
+    return static_cast<u32>(descriptors.size()) - 1;
+}
+
+void JoinStorageInfo(Info& base, Info& source) {
+    auto& descriptors = base.storage_buffers_descriptors;
+    for (auto& desc : source.storage_buffers_descriptors) {
+        auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
+        })};
+        if (it != descriptors.end()) {
+            it->is_written |= desc.is_written;
+            continue;
+        }
+        descriptors.push_back(desc);
+    }
+}
+
 } // namespace Shader::Optimization
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program);
 void TexturePass(Environment& env, IR::Program& program);
 void VerificationPass(const IR::Program& program);

+// Dual Vertex
+void VertexATransformPass(IR::Program& program);
+void VertexBTransformPass(IR::Program& program);
+void DualVertexJoinPass(IR::Program& program);
+void JoinTextureInfo(Info& base, Info& source);
+void JoinStorageInfo(Info& base, Info& source);
+
 } // namespace Shader::Optimization
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) {
    }
 }

+void JoinTextureInfo(Info& base, Info& source) {
+    Descriptors descriptors{
+        base.texture_buffer_descriptors,
+        base.image_buffer_descriptors,
+        base.texture_descriptors,
+        base.image_descriptors,
+    };
+    for (auto& desc : source.texture_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.texture_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_descriptors) {
+        descriptors.Add(desc);
+    }
+}
+
 } // namespace Shader::Optimization
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);

 namespace {
 using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
 using Shader::Maxwell::TranslateProgram;
 using VideoCommon::ComputeEnvironment;
 using VideoCommon::FileEnvironment;
@ -287,22 +288,32 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
    size_t env_index{0};
    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+    bool uses_vertex_a{};
+    std::size_t start_value_processing{};
    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        if (key.unique_hashes[index] == 0) {
            continue;
        }
+        uses_vertex_a |= index == 0;
        Shader::Environment& env{*envs[env_index]};
        ++env_index;

        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
-        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
-        programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (!uses_vertex_a || index != 1) {
+            programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+            continue;
+        }
+        Shader::IR::Program& program_va{programs[0]};
+        Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)};
+        programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+        start_value_processing = 1;
    }
    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
    std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;

    u32 binding{0};
-    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+    for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) {
        if (key.unique_hashes[index] == 0) {
            continue;
        }