Improved Shader accuracy on Vertex and Geometry Shaders with FFMA, FMUL and FADD

This commit is contained in:
FernandoS27 2018-10-27 20:09:26 -04:00
parent a9dc34ea5c
commit d8d557df86
2 changed files with 58 additions and 6 deletions

View File

@ -1510,8 +1510,25 @@ private:
ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
shader.AddLine('{');
++shader.scope;
// This avoids optimizations of constant propagation and keeps the code as the original
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
shader.AddLine("float tmp = " + op_a + " * " + op_b + ';');
} else {
shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';');
}
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
instr.alu.saturate_d);
--shader.scope;
shader.AddLine('}');
break;
}
case OpCode::Id::FADD_C:
@ -1519,8 +1536,25 @@ private:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
shader.AddLine('{');
++shader.scope;
// This avoids optimizations of constant propagation and keeps the code as the original
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
shader.AddLine("float tmp = " + op_a + " + " + op_b + ';');
} else {
shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';');
}
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
instr.alu.saturate_d);
--shader.scope;
shader.AddLine('}');
break;
}
case OpCode::Id::MUFU: {
@ -2087,8 +2121,23 @@ private:
}
}
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
shader.AddLine('{');
++shader.scope;
// This avoids optimizations of constant propagation and keeps the code as the original
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
} else {
shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
}
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
instr.alu.saturate_d);
--shader.scope;
shader.AddLine('}');
break;
}
case OpCode::Type::Hfma2: {

View File

@ -15,7 +15,8 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n";
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += R"(
@ -83,7 +84,8 @@ void main() {
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n";
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
@ -117,7 +119,8 @@ void main() {
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n";
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";