From d5237342668db88cc81fefbee81f468b5214e655 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 18:51:32 -0300 Subject: [PATCH 1/7] decode/register_set_predicate: Use move for shared pointers Avoid atomic counters used by shared pointers. --- .../shader/decode/register_set_predicate.cpp | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 8d54cce34e..a6733255d2 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -10,6 +12,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -23,7 +26,7 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - const Node apply_mask = [&] { + Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: case OpCode::Id::P2R_IMM: @@ -34,25 +37,23 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { } }(); - const auto offset = static_cast(instr.p2r_r2p.byte) * 8; + const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { - const Node mask = GetRegister(instr.gpr8); + Node mask = GetRegister(instr.gpr8); for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const auto shift = static_cast(pred); + const u32 shift = static_cast(pred); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + Node apply = BitfieldExtract(apply_mask, shift, 1); + Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = - Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + Node compare = BitfieldExtract(mask, offset + shift, 1); + Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + bb.push_back(Conditional(condition, {move(code)})); } break; } @@ -61,12 +62,12 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } - value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } default: From ffc5ec6fa816b1bf56044b9d8cf5f1935abe77ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 19:05:34 -0300 Subject: [PATCH 2/7] decode/register_set_predicate: Implement CC P2R CC takes the state of condition codes and puts them into a register. We already have this implemented for PR (predicates). This commit implements CC over that. --- .../shader/decode/register_set_predicate.cpp | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index a6733255d2..6116c31aa4 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -17,15 +17,14 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; namespace { -constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; -} +constexpr u64 NUM_CONDITION_CODES = 4; +constexpr u64 NUM_PREDICATES = 7; +} // namespace u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: @@ -39,12 +38,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; + const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; + const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; + const auto get_entry = [this, cc](u64 entry) { + return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); + }; + switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { Node mask = GetRegister(instr.gpr8); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const u32 shift = static_cast(pred); + for (u64 entry = 0; entry < num_entries; ++entry) { + const u32 shift = static_cast(entry); Node apply = BitfieldExtract(apply_mask, shift, 1); Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); @@ -52,15 +57,15 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { Node compare = BitfieldExtract(mask, offset + shift, 1); Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); bb.push_back(Conditional(condition, {move(code)})); } break; } case OpCode::Id::P2R_IMM: { Node value = Immediate(0); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + for (u64 entry = 0; entry < num_entries; ++entry) { + Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), Immediate(0)); value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } From 255197e64363f9286ed145cafdeb129c85c16621 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 21:56:11 -0300 Subject: [PATCH 3/7] shader/arithmetic_integer: Implement CC for IADD --- externals/sirit | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 10 +++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 11 ++++++++++ .../shader/decode/arithmetic_integer.cpp | 22 ++++++++++++++++--- src/video_core/shader/node.h | 2 ++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/externals/sirit b/externals/sirit index a712959f1e..414fc4dbd2 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit a712959f1e373a33b48042b5934e288a243d5954 +Subproject commit 414fc4dbd28d8fe48f735a0c389db8a234f733c0 diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0cd3ad7e13..3803a6f3ae 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1870,6 +1870,14 @@ private: return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); } + Expression LogicalAddCarry(Operation operation) { + const std::string carry = code.GenerateTemporary(); + code.AddLine("uint {};", carry); + code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), + VisitOperand(operation, 1).AsUint(), carry); + return {fmt::format("({} != 0)", carry), Type::Bool}; + } + Expression LogicalFIsNan(Operation operation) { return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); } @@ -2441,6 +2449,8 @@ private: &GLSLDecompiler::LogicalNotEqual, &GLSLDecompiler::LogicalGreaterEqual, + &GLSLDecompiler::LogicalAddCarry, + &GLSLDecompiler::Logical2HLessThan, &GLSLDecompiler::Logical2HEqual, &GLSLDecompiler::Logical2HLessEqual, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index aaa138f527..20b6ca0ad7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1584,6 +1584,15 @@ private: return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; } + Expression LogicalAddCarry(Operation operation) { + const Id op_a = AsUint(Visit(operation[0])); + const Id op_b = AsUint(Visit(operation[1])); + + const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); + const Id carry = OpCompositeExtract(t_uint, result, 1); + return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; + } + Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -2518,6 +2527,8 @@ private: &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::LogicalAddCarry, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9af8c606d6..99b4b6342f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,10 +40,26 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + if (instr.generates_cc) { + const Node i0 = Immediate(0); + + Node zero = Operation(OperationCode::LogicalIEqual, value, i0); + Node sign = Operation(OperationCode::LogicalILessThan, value, i0); + Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); + + Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); + Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); + Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); + Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); + + SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); + SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); + SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); + SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); + } + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::IADD3_C: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3eee961f53..3f5a7bc7af 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -132,6 +132,8 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool + LogicalAddCarry, /// (uint a, uint b) -> bool + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 From c788f9c0bd9cb0b0cb66f7424a65032cca3731cc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:41:20 -0300 Subject: [PATCH 4/7] shader/arithmetic_integer: Implement IADD.X IADD.X takes the carry flag and adds it to the result. This is generally used to emulate 64-bit operations with 32-bit registers. --- src/video_core/engines/shader_bytecode.h | 4 ++++ src/video_core/shader/decode/arithmetic_integer.cpp | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cde3a26b9d..8dae754d49 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -813,6 +813,10 @@ union Instruction { BitField<49, 1, u64> negate_a; } alu_integer; + union { + BitField<43, 1, u64> x; + } iadd; + union { BitField<39, 1, u64> ftz; BitField<32, 1, u64> saturate; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 99b4b6342f..2a3311cb8e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -42,6 +42,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { Node value = Operation(OperationCode::IAdd, op_a, op_b); + if (instr.iadd.x) { + Node carry = GetInternalFlag(InternalFlag::Carry); + Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); + value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + } + if (instr.generates_cc) { const Node i0 = Immediate(0); From 2a96bea6a7efd9efaaa8d1d72f1eb8ca27cb81f8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:42:33 -0300 Subject: [PATCH 5/7] shader/arithmetic_integer: Change IAdd to UAdd to avoid signed overflow Signed integer addition overflow might be undefined behavior. It's free to change operations to UAdd and use unsigned integers to avoid potential bugs. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2a3311cb8e..addd7f5333 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,12 +40,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - Node value = Operation(OperationCode::IAdd, op_a, op_b); + Node value = Operation(OperationCode::UAdd, op_a, op_b); if (instr.iadd.x) { Node carry = GetInternalFlag(InternalFlag::Carry); Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); } if (instr.generates_cc) { From e895a4e2d7ebf29fbafda7d950301cd7d03efdbb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:53:54 -0300 Subject: [PATCH 6/7] shader/arithmetic_integer: Fix edge case and mark IADD.X Rd.CC as unimplemented IADD.X Rd.CC requires some extra logic that is not currently implemented. Abort when this is hit. --- src/video_core/shader/decode/arithmetic_integer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index addd7f5333..ced5c3dc12 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -35,7 +35,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); + UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); @@ -49,6 +50,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { + // Avoid changing result's carry flag + SetTemporary(bb, 0, std::move(value)); + value = GetTemporary(0); + const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0); From 871aadbe36b7480f7dc318e2a8bebb064a1fbaaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 28 Apr 2020 17:14:53 -0300 Subject: [PATCH 7/7] shader/arithmetic_integer: Fix tracking issue in temporary This temporary is not needed as we mark Rd.CC + IADD.X as unimplemented. It caused issues when tracking global buffers. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index ced5c3dc12..a041519b74 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -50,10 +50,6 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { - // Avoid changing result's carry flag - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0);