From 3d65aa4caf88a440eeaf2082b1f5ca3e2c41317c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:08:31 -0300 Subject: [PATCH] gl_shader_decompiler: Implement HFMA2 instructions --- src/video_core/engines/shader_bytecode.h | 32 +++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 53 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 3fbdd20b88..23bfd89885 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -614,6 +614,29 @@ union Instruction { } } half_imm; + union { + union { + BitField<37, 2, HalfPrecision> precision; + BitField<32, 1, u64> saturate; + + BitField<30, 1, u64> negate_c; + BitField<35, 2, HalfType> type_c; + } rr; + + BitField<57, 2, HalfPrecision> precision; + BitField<52, 1, u64> saturate; + + BitField<49, 2, HalfMerge> merge; + + BitField<47, 2, HalfType> type_a; + + BitField<56, 1, u64> negate_b; + BitField<28, 2, HalfType> type_b; + + BitField<51, 1, u64> negate_c; + BitField<53, 2, HalfType> type_reg39; + } hfma2; + union { BitField<40, 1, u64> invert; } popc; @@ -1212,6 +1235,10 @@ public: HMUL2_C, HMUL2_R, HMUL2_IMM, + HFMA2_CR, + HFMA2_RC, + HFMA2_RR, + HFMA2_IMM_R, POPC_C, POPC_R, POPC_IMM, @@ -1290,6 +1317,7 @@ public: Bfe, Shift, Ffma, + Hfma2, Flow, Synch, Memory, @@ -1464,6 +1492,10 @@ private: INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), + INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), + INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), + INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), + INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ab30aafc3a..ca2030e975 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1964,6 +1964,59 @@ private: instr.alu.saturate_d); break; } + case OpCode::Type::Hfma2: { + if (opcode->GetId() == OpCode::Id::HFMA2_RR) { + ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None, + "Unimplemented"); + } else { + ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None, + "Unimplemented"); + } + const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR + ? instr.hfma2.rr.saturate != 0 + : instr.hfma2.saturate != 0; + + const std::string op_a = + GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); + std::string op_b, op_c; + + switch (opcode->GetId()) { + case OpCode::Id::HFMA2_CR: + op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::UnsignedInteger), + instr.hfma2.type_b, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_c); + break; + case OpCode::Id::HFMA2_RC: + op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::UnsignedInteger), + instr.hfma2.type_b, false, instr.hfma2.negate_c); + break; + case OpCode::Id::HFMA2_RR: + op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.hfma2.type_b, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); + break; + case OpCode::Id::HFMA2_IMM_R: + op_b = UnpackHalfImmediate(instr, true); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_c); + break; + default: + UNREACHABLE(); + op_c = op_b = "vec2(0)"; + break; + } + + const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; + + regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); + break; + } case OpCode::Type::Conversion: { switch (opcode->GetId()) { case OpCode::Id::I2I_R: {