From 1109db86b7259857762fc29b2ff8c9f95e92353e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:22:40 -0400 Subject: [PATCH 01/10] video_core/shader/decode: Prevent sign-conversion warnings Makes it explicit that the conversions here are intentional. --- src/video_core/shader/decode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index afffd157fa..b547d83230 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -47,14 +47,14 @@ void ShaderIR::Decode() { if (shader_info.decompilable) { disable_flow_stack = true; const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == exit_branch) { + if (label == static_cast(exit_branch)) { return; } basic_blocks.insert({label, nodes}); }; const auto& blocks = shader_info.blocks; NodeBlock current_block; - u32 current_label = exit_branch; + u32 current_label = static_cast(exit_branch); for (auto& block : blocks) { if (shader_info.labels.count(block.start) != 0) { insert_block(current_block, current_label); From 3df9558593d41b8c9bda905dd745a67ff00db033 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:32:35 -0400 Subject: [PATCH 02/10] video_core/control_flow: Place all internally linked types/functions within an anonymous namespace Previously, quite a few functions were being linked with external linkage. --- src/video_core/shader/control_flow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index fdcc970ff8..440729258c 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -15,7 +15,7 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { - +namespace { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -411,6 +411,7 @@ bool TryQuery(CFGRebuildState& state) { state.queries.push_back(conditional_query); return true; } +} // Anonymous namespace std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address) { From 47df844338a6ad59189cad060cbeced1bc306e73 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:35:33 -0400 Subject: [PATCH 03/10] video_core/control_flow: Make program_size for ScanFlow() a std::size_t Prevents a truncation warning from occurring with MSVC. Also the internal data structures already treat it as a size_t, so this is just a discrepancy in the interface. --- src/video_core/shader/control_flow.cpp | 4 ++-- src/video_core/shader/control_flow.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 440729258c..20f9a64809 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -413,8 +413,8 @@ bool TryQuery(CFGRebuildState& state) { } } // Anonymous namespace -std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address) { +std::optional ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; // Inspect Code and generate blocks state.labels.clear(); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5e8ea32716..728286d70c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -57,7 +56,7 @@ struct ShaderCharacteristics { std::unordered_set labels{}; }; -std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address); +std::optional ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address); } // namespace VideoCommon::Shader From 45fa12a05c8ba20259979d4b9e40d7401d825502 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:38:48 -0400 Subject: [PATCH 04/10] video_core: Resolve -Wreorder warnings Ensures that the constructor members are always initialized in the order that they're declared in. --- src/video_core/shader/control_flow.cpp | 2 +- src/video_core/texture_cache/surface_base.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 20f9a64809..6a92c540ee 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -58,7 +58,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, const u32 start) - : program_code{program_code}, program_size{program_size}, start{start} {} + : start{start}, program_code{program_code}, program_size{program_size} {} u32 start{}; std::vector block_info{}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6af9044cad..683c492072 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -24,9 +24,8 @@ StagingCache::StagingCache() = default; StagingCache::~StagingCache() = default; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, mipmap_sizes(params.num_levels), - mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ - params.GetHostSizeInBytes()} { + : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, + mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; From 6885e7e7ec4919cd66ee0294fdbd9aea8935c50d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:40:58 -0400 Subject: [PATCH 05/10] video_core/control_flow: Use empty() member function for checking emptiness It's what it's there for. --- src/video_core/shader/control_flow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6a92c540ee..83549c082c 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -379,8 +379,8 @@ bool TryQuery(CFGRebuildState& state) { // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); + const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } From e7b39f47f86ee2c3656340b8af7747848ddc1f2e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:42:05 -0400 Subject: [PATCH 06/10] video_core/control_flow: Use the prefix variant of operator++ for iterators Same thing, but potentially allows a standard library implementation to pick a more efficient codepath. --- src/video_core/shader/control_flow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 83549c082c..c2243337cf 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -365,7 +365,7 @@ bool TryQuery(CFGRebuildState& state) { const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { cc.push(gather_start->second); - gather_start++; + ++gather_start; } }; if (state.queries.empty()) { @@ -470,7 +470,7 @@ std::optional ScanFlow(const ProgramCode& program_code, continue; } back = next; - next++; + ++next; } return {result_out}; } From 56bc11d952fa228475d09891e01b3d1c6d32f015 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:50:36 -0400 Subject: [PATCH 07/10] video_core/control_flow: Use std::move where applicable Results in less work being done where avoidable. --- src/video_core/shader/control_flow.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index c2243337cf..4d500320a0 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -371,10 +371,11 @@ bool TryQuery(CFGRebuildState& state) { if (state.queries.empty()) { return false; } + Query& q = state.queries.front(); const u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; - // If the block is visted, check if the stacks match, else gather the ssy/pbk + // If the block is visited, check if the stacks match, else gather the ssy/pbk // labels into the current stack and look if the branch at the end of the block // consumes a label. Schedule new queries accordingly if (block.visited) { @@ -385,7 +386,8 @@ bool TryQuery(CFGRebuildState& state) { return all_okay; } block.visited = true; - state.stacks[q.address] = BlockStack{q}; + state.stacks.insert_or_assign(q.address, BlockStack{q}); + Query q2(q); state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); @@ -394,6 +396,7 @@ bool TryQuery(CFGRebuildState& state) { q2.address = block.end + 1; state.queries.push_back(q2); } + Query conditional_query{q2}; if (block.branch.is_sync) { if (block.branch.address == unassigned_branch) { @@ -408,7 +411,7 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = block.branch.address; - state.queries.push_back(conditional_query); + state.queries.push_back(std::move(conditional_query)); return true; } } // Anonymous namespace @@ -416,6 +419,7 @@ bool TryQuery(CFGRebuildState& state) { std::optional ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; + // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); @@ -425,10 +429,9 @@ std::optional ScanFlow(const ProgramCode& program_code, return {}; } } + // Decompile Stacks - Query start_query{}; - start_query.address = state.start; - state.queries.push_back(start_query); + state.queries.push_back(Query{state.start, {}, {}}); bool decompiled = true; while (!state.queries.empty()) { if (!TryQuery(state)) { @@ -436,14 +439,15 @@ std::optional ScanFlow(const ProgramCode& program_code, break; } } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); ShaderCharacteristics result_out{}; result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; - for (auto& block : state.block_info) { + for (const auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; @@ -458,8 +462,9 @@ std::optional ScanFlow(const ProgramCode& program_code, } if (result_out.decompilable) { result_out.labels = std::move(state.labels); - return {result_out}; + return {std::move(result_out)}; } + // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); auto next = std::next(back); @@ -472,6 +477,6 @@ std::optional ScanFlow(const ProgramCode& program_code, back = next; ++next; } - return {result_out}; + return {std::move(result_out)}; } } // namespace VideoCommon::Shader From a162a844d2ede2b13d4a52f2dae37980be91cb1a Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:52:08 -0400 Subject: [PATCH 08/10] video_core/control_flow: Remove unnecessary BlockStack copy constructor This is the default behavior of the copy constructor, so it doesn't need to be specified. While we're at it we can make the other non-default constructor explicit. --- src/video_core/shader/control_flow.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 4d500320a0..37792d420e 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -29,8 +29,7 @@ struct Query { struct BlockStack { BlockStack() = default; - BlockStack(const BlockStack& b) = default; - BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} std::stack ssy_stack{}; std::stack pbk_stack{}; }; From 1780e0e3d04fe7d8edd0b7b691198f31f23140ce Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:56:37 -0400 Subject: [PATCH 09/10] video_core/control_flow: Prevent sign conversion in TryGetBlock() The return value is a u32, not an s32, so this would result in an implicit signedness conversion. --- src/video_core/shader/control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 37792d420e..ec3a766900 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -84,7 +84,7 @@ std::pair TryGetBlock(CFGRebuildState& state, u32 address) return {BlockCollision::Inside, index}; } } - return {BlockCollision::None, -1}; + return {BlockCollision::None, 0xFFFFFFFF}; } struct ParseInfo { From c1c89411da173cf76de2a2ec19fd26247648cc3c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:59:57 -0400 Subject: [PATCH 10/10] video_core/control_flow: Provide operator!= for types with operator== Provides operational symmetry for the respective structures. --- src/video_core/shader/control_flow.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 728286d70c..b0a5e4f8c9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -25,27 +25,44 @@ struct Condition { bool IsUnconditional() const { return predicate == Pred::UnusedIndex && cc == ConditionCode::T; } + bool operator==(const Condition& other) const { return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); } + + bool operator!=(const Condition& other) const { + return !operator==(other); + } }; struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; s32 address{}; + bool operator==(const Branch& b) const { return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } - } branch{}; + + bool operator!=(const Branch& b) const { + return !operator==(b); + } + }; + + u32 start{}; + u32 end{}; + bool ignore_branch{}; + Branch branch{}; + bool operator==(const ShaderBlock& sb) const { return std::tie(start, end, ignore_branch, branch) == std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); } + + bool operator!=(const ShaderBlock& sb) const { + return !operator==(sb); + } }; struct ShaderCharacteristics {