From e5d417213ce67bc23ac644132828d125a59c2455 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 19 Mar 2016 15:16:16 -0400 Subject: [PATCH 01/15] emitter: Support arbitrary FixupBranch targets. --- src/common/x64/emitter.cpp | 16 ++++++++++++++++ src/common/x64/emitter.h | 1 + 2 files changed, 17 insertions(+) diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 1dcf2416c..6c8d10ea7 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -531,6 +531,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch) } } +void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) +{ + if (branch.type == 0) + { + s64 distance = (s64)(target - branch.ptr); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + branch.ptr[-1] = (u8)(s8)distance; + } + else if (branch.type == 1) + { + s64 distance = (s64)(target - branch.ptr); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + ((s32*)branch.ptr)[-1] = (s32)distance; + } +} + //Single byte opcodes //There is no PUSHAD/POPAD in 64-bit mode. void XEmitter::INT3() {Write8(0xCC);} diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 7c6548fb5..80dfa96d2 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -431,6 +431,7 @@ public: void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); void SetJumpTarget(const FixupBranch& branch); + void SetJumpTarget(const FixupBranch& branch, const u8* target); void SETcc(CCFlags flag, OpArg dest); // Note: CMOV brings small if any benefit on current cpus. From 135aec7beab9e484183565eea9d3cab03fe0b879 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 17 Mar 2016 19:51:43 -0400 Subject: [PATCH 02/15] shader_jit_x64: Fix strict memory aliasing issues. --- src/video_core/shader/shader_jit_x64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index dffe051ef..d74b58d84 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -741,7 +741,9 @@ void JitCompiler::Compile_Block(unsigned end) { void JitCompiler::Compile_NextInstr(unsigned* offset) { offset_ptr = offset; - Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++]; + Instruction instr; + std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction)); + OpCode::Id opcode = instr.opcode.Value(); auto instr_func = instr_table[static_cast(opcode)]; From 4632791a40f8ec5af7e166ff90fd4f8cd69b2745 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 17 Mar 2016 19:45:09 -0400 Subject: [PATCH 03/15] shader_jit_x64: Rewrite flow control to support arbitrary CALL and JMP instructions. --- src/video_core/shader/shader_jit_x64.cpp | 128 +++++++++++++++++------ src/video_core/shader/shader_jit_x64.h | 32 +++++- 2 files changed, 122 insertions(+), 38 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d74b58d84..c798992ec 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -137,6 +137,15 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b; /// Raw constant for the destination register enable mask that indicates all components are enabled static const u8 NO_DEST_REG_MASK = 0xf; +/** + * Get the vertex shader instruction for a given offset in the current shader program + * @param offset Offset in the current shader program of the instruction + * @return Instruction at the specified offset + */ +static Instruction GetVertexShaderInstruction(size_t offset) { + return { g_state.vs.program_code[offset] }; +} + /** * Loads and swizzles a source register into the specified XMM register. * @param instr VS instruction, used for determining how to load the source register @@ -564,10 +573,23 @@ void JitCompiler::Compile_END(Instruction instr) { } void JitCompiler::Compile_CALL(Instruction instr) { - unsigned offset = instr.flow_control.dest_offset; - while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { - Compile_NextInstr(&offset); - } + // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip + constexpr unsigned SKIP = 21; + const uintptr_t start = reinterpret_cast(GetCodePtr()); + + // Push return address - not using CALL because we also want to push the offset of the return before jumping + MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP)); + PUSH(RAX); + + // Push offset of the return + PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); + + // Jump + FixupBranch b = J(true); + fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + + // Make sure that if the above code changes, SKIP gets updated + ASSERT(reinterpret_cast(GetCodePtr()) - start == SKIP); } void JitCompiler::Compile_CALLC(Instruction instr) { @@ -645,8 +667,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { } void JitCompiler::Compile_IF(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", - *offset_ptr, instr.flow_control.dest_offset.Value()); + ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported", + last_program_counter, instr.flow_control.dest_offset.Value()); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -677,8 +699,8 @@ void JitCompiler::Compile_IF(Instruction instr) { } void JitCompiler::Compile_LOOP(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", - *offset_ptr, instr.flow_control.dest_offset.Value()); + ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported", + last_program_counter, instr.flow_control.dest_offset.Value()); ASSERT_MSG(!looping, "Nested loops not supported"); looping = true; @@ -706,9 +728,6 @@ void JitCompiler::Compile_LOOP(Instruction instr) { } void JitCompiler::Compile_JMP(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", - *offset_ptr, instr.flow_control.dest_offset.Value()); - if (instr.opcode.Value() == OpCode::Id::JMPC) Compile_EvaluateCondition(instr); else if (instr.opcode.Value() == OpCode::Id::JMPU) @@ -718,31 +737,42 @@ void JitCompiler::Compile_JMP(Instruction instr) { bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); + FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); - - Compile_Block(instr.flow_control.dest_offset); - - SetJumpTarget(b); + fixup_branches.push_back({ b, instr.flow_control.dest_offset }); } void JitCompiler::Compile_Block(unsigned end) { - // Save current offset pointer - unsigned* prev_offset_ptr = offset_ptr; - unsigned offset = *prev_offset_ptr; - - while (offset < end) - Compile_NextInstr(&offset); - - // Restore current offset pointer - offset_ptr = prev_offset_ptr; - *offset_ptr = offset; + while (program_counter < end) { + Compile_NextInstr(); + } } -void JitCompiler::Compile_NextInstr(unsigned* offset) { - offset_ptr = offset; +void JitCompiler::Compile_Return() { + // Peek return offset on the stack and check if we're at that offset + MOV(64, R(RAX), MDisp(RSP, 0)); + CMP(32, R(RAX), Imm32(program_counter)); - Instruction instr; - std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction)); + // If so, jump back to before CALL + FixupBranch b = J_CC(CC_NZ, true); + ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack + POP(RAX); // Pop off return address + JMPptr(R(RAX)); + SetJumpTarget(b); +} + +void JitCompiler::Compile_NextInstr() { + last_program_counter = program_counter; + + auto search = return_offsets.find(program_counter); + if (search != return_offsets.end()) { + Compile_Return(); + } + + ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); + code_ptr[program_counter] = GetCodePtr(); + + Instruction instr = GetVertexShaderInstruction(program_counter++); OpCode::Id opcode = instr.opcode.Value(); auto instr_func = instr_table[static_cast(opcode)]; @@ -757,9 +787,24 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { } } +void JitCompiler::FindReturnOffsets() { + return_offsets.clear(); + + for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { + Instruction instr = GetVertexShaderInstruction(offset); + + switch (instr.opcode.Value()) { + case OpCode::Id::CALL: + case OpCode::Id::CALLC: + case OpCode::Id::CALLU: + return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions); + break; + } + } +} + CompiledShader* JitCompiler::Compile() { const u8* start = GetCodePtr(); - unsigned offset = g_state.regs.vs.main_offset; // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); @@ -782,10 +827,27 @@ CompiledShader* JitCompiler::Compile() { MOV(PTRBITS, R(RAX), ImmPtr(&neg)); MOVAPS(NEGBIT, MatR(RAX)); - looping = false; + // Find all `CALL` instructions and identify return locations + FindReturnOffsets(); - while (offset < g_state.vs.program_code.size()) { - Compile_NextInstr(&offset); + // Reset flow control state + last_program_counter = 0; + program_counter = 0; + looping = false; + code_ptr.fill(nullptr); + fixup_branches.clear(); + + // Jump to start of the shader program + if (g_state.regs.vs.main_offset != 0) { + fixup_branches.push_back({ J(true), g_state.regs.vs.main_offset }); + } + + // Compile entire program + Compile_Block(static_cast(g_state.vs.program_code.size())); + + // Set the target for any incomplete branches now that the entire shader program has been emitted + for (const auto& branch : fixup_branches) { + SetJumpTarget(branch.first, code_ptr[branch.second]); } return (CompiledShader*)start; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5357c964b..d6f03892d 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -4,6 +4,9 @@ #pragma once +#include +#include + #include #include "common/x64/emitter.h" @@ -66,8 +69,9 @@ public: void Compile_MAD(Instruction instr); private: + void Compile_Block(unsigned end); - void Compile_NextInstr(unsigned* offset); + void Compile_NextInstr(); void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); @@ -81,13 +85,31 @@ private: void Compile_EvaluateCondition(Instruction instr); void Compile_UniformCondition(Instruction instr); + /** + * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. + */ + void Compile_Return(); + BitSet32 PersistentCallerSavedRegs(); - /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. - unsigned* offset_ptr = nullptr; + /** + * Analyzes the entire shader program for `CALL` instructions before emitting any code, + * identifying the locations where a return needs to be inserted. + */ + void FindReturnOffsets(); - /// Set to true if currently in a loop, used to check for the existence of nested loops - bool looping = false; + /// Mapping of Pica VS instructions to pointers in the emitted code + std::array code_ptr; + + /// Offsets in code where a return needs to be inserted + std::set return_offsets; + + unsigned last_program_counter; ///< Offset of the most recent instruction decoded + unsigned program_counter; ///< Offset of the next instruction to decode + bool looping = false; ///< True if compiling a loop, used to check for nested loops + + /// Branches that need to be fixed up once the entire shader program is compiled + std::vector> fixup_branches; }; } // Shader From c9d10de644078a29e2310791ee221f3bc916e923 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 20 Mar 2016 00:37:05 -0400 Subject: [PATCH 04/15] shader_jit_x64: Allocate each program independently and persist for emu session. --- src/video_core/shader/shader.cpp | 29 ++++++++---------------- src/video_core/shader/shader_jit_x64.cpp | 17 +++++++------- src/video_core/shader/shader_jit_x64.h | 20 ++++++++-------- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 78d295c76..e17368a4a 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -28,15 +28,8 @@ namespace Pica { namespace Shader { #ifdef ARCHITECTURE_x86_64 -static std::unordered_map shader_map; -static JitCompiler jit; -static CompiledShader* jit_shader; - -static void ClearCache() { - shader_map.clear(); - jit.Clear(); - LOG_INFO(HW_GPU, "Shader JIT cache cleared"); -} +static std::unordered_map> shader_map; +static const JitCompiler* jit_shader; #endif // ARCHITECTURE_x86_64 void Setup(UnitState& state) { @@ -48,16 +41,12 @@ void Setup(UnitState& state) { auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { - jit_shader = iter->second; + jit_shader = iter->second.get(); } else { - // Check if remaining JIT code space is enough for at least one more (massive) shader - if (jit.GetSpaceLeft() < jit_shader_size) { - // If not, clear the cache of all previously compiled shaders - ClearCache(); - } - - jit_shader = jit.Compile(); - shader_map.emplace(cache_key, jit_shader); + auto shader = std::make_unique(); + shader->Compile(); + jit_shader = shader.get(); + shader_map[cache_key] = std::move(shader); } } #endif // ARCHITECTURE_x86_64 @@ -65,7 +54,7 @@ void Setup(UnitState& state) { void Shutdown() { #ifdef ARCHITECTURE_x86_64 - ClearCache(); + shader_map.clear(); #endif // ARCHITECTURE_x86_64 } @@ -109,7 +98,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) - jit_shader(&state.registers); + jit_shader->Run(&state.registers); else RunInterpreter(state); #else diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index c798992ec..3da4e51fa 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -589,7 +589,7 @@ void JitCompiler::Compile_CALL(Instruction instr) { fixup_branches.push_back({ b, instr.flow_control.dest_offset }); // Make sure that if the above code changes, SKIP gets updated - ASSERT(reinterpret_cast(GetCodePtr()) - start == SKIP); + ASSERT(reinterpret_cast(GetCodePtr()) - start == SKIP); } void JitCompiler::Compile_CALLC(Instruction instr) { @@ -803,8 +803,8 @@ void JitCompiler::FindReturnOffsets() { } } -CompiledShader* JitCompiler::Compile() { - const u8* start = GetCodePtr(); +void JitCompiler::Compile() { + program = (CompiledShader*)GetCodePtr(); // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); @@ -850,15 +850,14 @@ CompiledShader* JitCompiler::Compile() { SetJumpTarget(branch.first, code_ptr[branch.second]); } - return (CompiledShader*)start; + uintptr_t size = reinterpret_cast(GetCodePtr()) - reinterpret_cast(program); + ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); + + LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); } JitCompiler::JitCompiler() { - AllocCodeSpace(jit_cache_size); -} - -void JitCompiler::Clear() { - ClearCodeSpace(); + AllocCodeSpace(MAX_SHADER_SIZE); } } // namespace Shader diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index d6f03892d..19f9bdb56 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -22,10 +22,8 @@ namespace Pica { namespace Shader { -/// Memory needed to be available to compile the next shader (otherwise, clear the cache) -constexpr size_t jit_shader_size = 1024 * 512; -/// Memory allocated for the JIT code space cache -constexpr size_t jit_cache_size = 1024 * 1024 * 8; +/// Memory allocated for each compiled shader (64Kb) +constexpr size_t MAX_SHADER_SIZE = 1024 * 64; using CompiledShader = void(void* registers); @@ -37,9 +35,11 @@ class JitCompiler : public Gen::XCodeBlock { public: JitCompiler(); - CompiledShader* Compile(); + void Run(void* registers) const { + program(registers); + } - void Clear(); + void Compile(); void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); @@ -104,12 +104,14 @@ private: /// Offsets in code where a return needs to be inserted std::set return_offsets; - unsigned last_program_counter; ///< Offset of the most recent instruction decoded - unsigned program_counter; ///< Offset of the next instruction to decode - bool looping = false; ///< True if compiling a loop, used to check for nested loops + unsigned last_program_counter = 0; ///< Offset of the most recent instruction decoded + unsigned program_counter = 0; ///< Offset of the next instruction to decode + bool looping = false; ///< True if compiling a loop, used to check for nested loops /// Branches that need to be fixed up once the entire shader program is compiled std::vector> fixup_branches; + + CompiledShader* program = nullptr; }; } // Shader From a5a74eb121e0586706c3196d450c088280f996a5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 26 Mar 2016 21:02:15 -0400 Subject: [PATCH 05/15] shader_jit_x64: Specify shader main offset at runtime. --- src/video_core/shader/shader.cpp | 5 ++--- src/video_core/shader/shader_jit_x64.cpp | 4 +--- src/video_core/shader/shader_jit_x64.h | 7 +++---- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index e17368a4a..b35413488 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -36,8 +36,7 @@ void Setup(UnitState& state) { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ - Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ - g_state.regs.vs.main_offset); + Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -98,7 +97,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) - jit_shader->Run(&state.registers); + jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); else RunInterpreter(state); #else diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 3da4e51fa..cbdc1e40f 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -838,9 +838,7 @@ void JitCompiler::Compile() { fixup_branches.clear(); // Jump to start of the shader program - if (g_state.regs.vs.main_offset != 0) { - fixup_branches.push_back({ J(true), g_state.regs.vs.main_offset }); - } + JMPptr(R(ABI_PARAM2)); // Compile entire program Compile_Block(static_cast(g_state.vs.program_code.size())); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 19f9bdb56..1501d13bf 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -25,8 +25,6 @@ namespace Shader { /// Memory allocated for each compiled shader (64Kb) constexpr size_t MAX_SHADER_SIZE = 1024 * 64; -using CompiledShader = void(void* registers); - /** * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 * code that can be executed on the host machine directly. @@ -35,8 +33,8 @@ class JitCompiler : public Gen::XCodeBlock { public: JitCompiler(); - void Run(void* registers) const { - program(registers); + void Run(void* registers, unsigned offset) const { + program(registers, code_ptr[offset]); } void Compile(); @@ -111,6 +109,7 @@ private: /// Branches that need to be fixed up once the entire shader program is compiled std::vector> fixup_branches; + using CompiledShader = void(void* registers, const u8* start_addr); CompiledShader* program = nullptr; }; From ffcf7ecee9f0b2843783e3678edaffbe1dda8ca2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 1 Apr 2016 23:33:03 -0400 Subject: [PATCH 06/15] shader: Remove unused 'state' argument from 'Setup' function. --- src/video_core/command_processor.cpp | 4 ++-- src/video_core/shader/shader.cpp | 2 +- src/video_core/shader/shader.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 08ec2907a..3abe79c09 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_attribute_id = 0; Shader::UnitState shader_unit; - Shader::Setup(shader_unit); + Shader::Setup(); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast(&immediate_input)); @@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { vertex_cache_ids.fill(-1); Shader::UnitState shader_unit; - Shader::Setup(shader_unit); + Shader::Setup(); for (unsigned int index = 0; index < regs.num_vertices; ++index) { diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index b35413488..5214864ec 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -32,7 +32,7 @@ static std::unordered_map> shader_map; static const JitCompiler* jit_shader; #endif // ARCHITECTURE_x86_64 -void Setup(UnitState& state) { +void Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7af8f1fa1..9c5bd97bd 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -339,9 +339,8 @@ struct UnitState { /** * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per * vertex, which would happen within the `Run` function). - * @param state Shader unit state, must be setup per shader and per shader unit */ -void Setup(UnitState& state); +void Setup(); /// Performs any cleanup when the emulator is shutdown void Shutdown(); From f3afe24594bad11d7e0fd28902d1ce1e6e22e3a2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 2 Apr 2016 00:02:03 -0400 Subject: [PATCH 07/15] shader_jit_x64: Execute certain asserts at runtime. - This is because we compile the full shader code space, and therefore its common to compile malformed instructions. --- src/video_core/shader/shader_jit_x64.cpp | 18 +++++++++++++----- src/video_core/shader/shader_jit_x64.h | 6 ++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index cbdc1e40f..dda9bcef7 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -146,6 +146,16 @@ static Instruction GetVertexShaderInstruction(size_t offset) { return { g_state.vs.program_code[offset] }; } +static void LogCritical(const char* msg) { + LOG_CRITICAL(HW_GPU, msg); +} + +void JitCompiler::RuntimeAssert(bool condition, const char* msg) { + if (!condition) { + ABI_CallFunctionP(reinterpret_cast(LogCritical), const_cast(msg)); + } +} + /** * Loads and swizzles a source register into the specified XMM register. * @param instr VS instruction, used for determining how to load the source register @@ -667,8 +677,7 @@ void JitCompiler::Compile_MAD(Instruction instr) { } void JitCompiler::Compile_IF(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported", - last_program_counter, instr.flow_control.dest_offset.Value()); + RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements not supported"); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -699,9 +708,8 @@ void JitCompiler::Compile_IF(Instruction instr) { } void JitCompiler::Compile_LOOP(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported", - last_program_counter, instr.flow_control.dest_offset.Value()); - ASSERT_MSG(!looping, "Nested loops not supported"); + RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards loops not supported"); + RuntimeAssert(!looping, "Nested loops not supported"); looping = true; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 1501d13bf..159b902b2 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -90,6 +90,12 @@ private: BitSet32 PersistentCallerSavedRegs(); + /** + * Assertion evaluated at compile-time, but only triggered if executed at runtime. + * @param msg Message to be logged if the assertion fails. + */ + void RuntimeAssert(bool condition, const char* msg); + /** * Analyzes the entire shader program for `CALL` instructions before emitting any code, * identifying the locations where a return needs to be inserted. From 6e0319eec91341101505b944a652e0b635a51b6e Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Apr 2016 11:24:48 -0400 Subject: [PATCH 08/15] shader_jit_x64: Get rid of unnecessary last_program_counter variable. --- src/video_core/shader/shader_jit_x64.cpp | 7 ++----- src/video_core/shader/shader_jit_x64.h | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index dda9bcef7..fae7e8b41 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -677,7 +677,7 @@ void JitCompiler::Compile_MAD(Instruction instr) { } void JitCompiler::Compile_IF(Instruction instr) { - RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements not supported"); + RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -708,7 +708,7 @@ void JitCompiler::Compile_IF(Instruction instr) { } void JitCompiler::Compile_LOOP(Instruction instr) { - RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards loops not supported"); + RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); RuntimeAssert(!looping, "Nested loops not supported"); looping = true; @@ -770,8 +770,6 @@ void JitCompiler::Compile_Return() { } void JitCompiler::Compile_NextInstr() { - last_program_counter = program_counter; - auto search = return_offsets.find(program_counter); if (search != return_offsets.end()) { Compile_Return(); @@ -839,7 +837,6 @@ void JitCompiler::Compile() { FindReturnOffsets(); // Reset flow control state - last_program_counter = 0; program_counter = 0; looping = false; code_ptr.fill(nullptr); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 159b902b2..920a269e2 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -108,7 +108,6 @@ private: /// Offsets in code where a return needs to be inserted std::set return_offsets; - unsigned last_program_counter = 0; ///< Offset of the most recent instruction decoded unsigned program_counter = 0; ///< Offset of the next instruction to decode bool looping = false; ///< True if compiling a loop, used to check for nested loops From 1d45b57939b10bc1bc13ee33ad74e968850af703 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Apr 2016 11:39:56 -0400 Subject: [PATCH 09/15] shader_jit_x64: Separate initialization and code generation for readability. --- src/video_core/shader/shader_jit_x64.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index fae7e8b41..efea55811 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -810,7 +810,15 @@ void JitCompiler::FindReturnOffsets() { } void JitCompiler::Compile() { + // Reset flow control state program = (CompiledShader*)GetCodePtr(); + program_counter = 0; + looping = false; + code_ptr.fill(nullptr); + fixup_branches.clear(); + + // Find all `CALL` instructions and identify return locations + FindReturnOffsets(); // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); @@ -833,15 +841,6 @@ void JitCompiler::Compile() { MOV(PTRBITS, R(RAX), ImmPtr(&neg)); MOVAPS(NEGBIT, MatR(RAX)); - // Find all `CALL` instructions and identify return locations - FindReturnOffsets(); - - // Reset flow control state - program_counter = 0; - looping = false; - code_ptr.fill(nullptr); - fixup_branches.clear(); - // Jump to start of the shader program JMPptr(R(ABI_PARAM2)); From 507e0b59896779d0276456c780ad2aefc3dbc28a Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Apr 2016 17:42:48 -0400 Subject: [PATCH 10/15] emitter: Add CALL that can be fixed up. --- src/common/x64/emitter.cpp | 12 ++++++++++++ src/common/x64/emitter.h | 1 + 2 files changed, 13 insertions(+) diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 6c8d10ea7..5662f7f86 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr) Write32(u32(distance)); } +FixupBranch XEmitter::CALL() +{ + FixupBranch branch; + branch.type = 1; + branch.ptr = code + 5; + + Write8(0xE8); + Write32(0); + + return branch; +} + FixupBranch XEmitter::J(bool force5bytes) { FixupBranch branch; diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 80dfa96d2..a33724146 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -425,6 +425,7 @@ public: #undef CALL #endif void CALL(const void* fnptr); + FixupBranch CALL(); void CALLptr(OpArg arg); FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); From 60749f2cda38f35a80a144f990d45c9b016ed0e2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Apr 2016 17:46:13 -0400 Subject: [PATCH 11/15] shader_jit_x64: Use CALL/RET instead of JMP for subroutines. --- src/video_core/shader/shader_jit_x64.cpp | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index efea55811..503fad158 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -583,23 +583,15 @@ void JitCompiler::Compile_END(Instruction instr) { } void JitCompiler::Compile_CALL(Instruction instr) { - // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip - constexpr unsigned SKIP = 21; - const uintptr_t start = reinterpret_cast(GetCodePtr()); - - // Push return address - not using CALL because we also want to push the offset of the return before jumping - MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP)); - PUSH(RAX); - // Push offset of the return - PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); + PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); - // Jump - FixupBranch b = J(true); + // Call the subroutine + FixupBranch b = CALL(); fixup_branches.push_back({ b, instr.flow_control.dest_offset }); - // Make sure that if the above code changes, SKIP gets updated - ASSERT(reinterpret_cast(GetCodePtr()) - start == SKIP); + // Skip over the return offset that's on the stack + ADD(64, R(RSP), Imm32(8)); } void JitCompiler::Compile_CALLC(Instruction instr) { @@ -758,14 +750,12 @@ void JitCompiler::Compile_Block(unsigned end) { void JitCompiler::Compile_Return() { // Peek return offset on the stack and check if we're at that offset - MOV(64, R(RAX), MDisp(RSP, 0)); + MOV(64, R(RAX), MDisp(RSP, 8)); CMP(32, R(RAX), Imm32(program_counter)); // If so, jump back to before CALL FixupBranch b = J_CC(CC_NZ, true); - ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack - POP(RAX); // Pop off return address - JMPptr(R(RAX)); + RET(); SetJumpTarget(b); } From 60aa72e1177c436351c91be291ef869816df79e0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 12 Apr 2016 23:24:34 -0400 Subject: [PATCH 12/15] shader_jit_x64: Use a sorted vector instead of a set for keeping track of return addresses. --- src/video_core/shader/shader_jit_x64.cpp | 9 ++++++--- src/video_core/shader/shader_jit_x64.h | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 503fad158..e32a4e720 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/x64/abi.h" @@ -760,8 +761,7 @@ void JitCompiler::Compile_Return() { } void JitCompiler::Compile_NextInstr() { - auto search = return_offsets.find(program_counter); - if (search != return_offsets.end()) { + if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { Compile_Return(); } @@ -793,10 +793,13 @@ void JitCompiler::FindReturnOffsets() { case OpCode::Id::CALL: case OpCode::Id::CALLC: case OpCode::Id::CALLU: - return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions); + return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); break; } } + + // Sort for efficient binary search later + std::sort(return_offsets.begin(), return_offsets.end()); } void JitCompiler::Compile() { diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 920a269e2..aa5060584 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include @@ -106,7 +106,7 @@ private: std::array code_ptr; /// Offsets in code where a return needs to be inserted - std::set return_offsets; + std::vector return_offsets; unsigned program_counter = 0; ///< Offset of the next instruction to decode bool looping = false; ///< True if compiling a loop, used to check for nested loops From 847fb951e29bb9bfb2735cf6bb1186e0374f3654 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 12 Apr 2016 23:29:25 -0400 Subject: [PATCH 13/15] shader_jit_x64: Free memory that's no longer needed after compilation. --- src/video_core/shader/shader_jit_x64.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index e32a4e720..773542283 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -845,6 +845,12 @@ void JitCompiler::Compile() { SetJumpTarget(branch.first, code_ptr[branch.second]); } + // Free memory that's no longer needed + return_offsets.clear(); + return_offsets.shrink_to_fit(); + fixup_branches.clear(); + fixup_branches.shrink_to_fit(); + uintptr_t size = reinterpret_cast(GetCodePtr()) - reinterpret_cast(program); ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); From 3f623b2561eb829b5c9c3855cb24a612b12f7d6f Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 12 Apr 2016 23:34:03 -0400 Subject: [PATCH 14/15] shader_jit_x64.cpp: Rename JitCompiler to JitShader. --- src/video_core/shader/shader.cpp | 6 +- src/video_core/shader/shader_jit_x64.cpp | 174 +++++++++++------------ src/video_core/shader/shader_jit_x64.h | 4 +- 3 files changed, 92 insertions(+), 92 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 5214864ec..75301accd 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -28,8 +28,8 @@ namespace Pica { namespace Shader { #ifdef ARCHITECTURE_x86_64 -static std::unordered_map> shader_map; -static const JitCompiler* jit_shader; +static std::unordered_map> shader_map; +static const JitShader* jit_shader; #endif // ARCHITECTURE_x86_64 void Setup() { @@ -42,7 +42,7 @@ void Setup() { if (iter != shader_map.end()) { jit_shader = iter->second.get(); } else { - auto shader = std::make_unique(); + auto shader = std::make_unique(); shader->Compile(); jit_shader = shader.get(); shader_map[cache_key] = std::move(shader); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 773542283..9369d2fe5 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -20,73 +20,73 @@ namespace Shader { using namespace Gen; -typedef void (JitCompiler::*JitFunction)(Instruction instr); +typedef void (JitShader::*JitFunction)(Instruction instr); const JitFunction instr_table[64] = { - &JitCompiler::Compile_ADD, // add - &JitCompiler::Compile_DP3, // dp3 - &JitCompiler::Compile_DP4, // dp4 - &JitCompiler::Compile_DPH, // dph + &JitShader::Compile_ADD, // add + &JitShader::Compile_DP3, // dp3 + &JitShader::Compile_DP4, // dp4 + &JitShader::Compile_DPH, // dph nullptr, // unknown - &JitCompiler::Compile_EX2, // ex2 - &JitCompiler::Compile_LG2, // lg2 + &JitShader::Compile_EX2, // ex2 + &JitShader::Compile_LG2, // lg2 nullptr, // unknown - &JitCompiler::Compile_MUL, // mul - &JitCompiler::Compile_SGE, // sge - &JitCompiler::Compile_SLT, // slt - &JitCompiler::Compile_FLR, // flr - &JitCompiler::Compile_MAX, // max - &JitCompiler::Compile_MIN, // min - &JitCompiler::Compile_RCP, // rcp - &JitCompiler::Compile_RSQ, // rsq + &JitShader::Compile_MUL, // mul + &JitShader::Compile_SGE, // sge + &JitShader::Compile_SLT, // slt + &JitShader::Compile_FLR, // flr + &JitShader::Compile_MAX, // max + &JitShader::Compile_MIN, // min + &JitShader::Compile_RCP, // rcp + &JitShader::Compile_RSQ, // rsq nullptr, // unknown nullptr, // unknown - &JitCompiler::Compile_MOVA, // mova - &JitCompiler::Compile_MOV, // mov + &JitShader::Compile_MOVA, // mova + &JitShader::Compile_MOV, // mov nullptr, // unknown nullptr, // unknown nullptr, // unknown nullptr, // unknown - &JitCompiler::Compile_DPH, // dphi + &JitShader::Compile_DPH, // dphi nullptr, // unknown - &JitCompiler::Compile_SGE, // sgei - &JitCompiler::Compile_SLT, // slti + &JitShader::Compile_SGE, // sgei + &JitShader::Compile_SLT, // slti nullptr, // unknown nullptr, // unknown nullptr, // unknown nullptr, // unknown nullptr, // unknown - &JitCompiler::Compile_NOP, // nop - &JitCompiler::Compile_END, // end + &JitShader::Compile_NOP, // nop + &JitShader::Compile_END, // end nullptr, // break - &JitCompiler::Compile_CALL, // call - &JitCompiler::Compile_CALLC, // callc - &JitCompiler::Compile_CALLU, // callu - &JitCompiler::Compile_IF, // ifu - &JitCompiler::Compile_IF, // ifc - &JitCompiler::Compile_LOOP, // loop + &JitShader::Compile_CALL, // call + &JitShader::Compile_CALLC, // callc + &JitShader::Compile_CALLU, // callu + &JitShader::Compile_IF, // ifu + &JitShader::Compile_IF, // ifc + &JitShader::Compile_LOOP, // loop nullptr, // emit nullptr, // sete - &JitCompiler::Compile_JMP, // jmpc - &JitCompiler::Compile_JMP, // jmpu - &JitCompiler::Compile_CMP, // cmp - &JitCompiler::Compile_CMP, // cmp - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // madi - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad - &JitCompiler::Compile_MAD, // mad + &JitShader::Compile_JMP, // jmpc + &JitShader::Compile_JMP, // jmpu + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad }; // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can @@ -151,7 +151,7 @@ static void LogCritical(const char* msg) { LOG_CRITICAL(HW_GPU, msg); } -void JitCompiler::RuntimeAssert(bool condition, const char* msg) { +void JitShader::RuntimeAssert(bool condition, const char* msg) { if (!condition) { ABI_CallFunctionP(reinterpret_cast(LogCritical), const_cast(msg)); } @@ -164,7 +164,7 @@ void JitCompiler::RuntimeAssert(bool condition, const char* msg) { * @param src_reg SourceRegister object corresponding to the source register to load * @param dest Destination XMM register to store the loaded, swizzled source register */ -void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { +void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { X64Reg src_ptr; size_t src_offset; @@ -236,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source } } -void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { +void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { DestRegister dest; unsigned operand_desc_id; if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || @@ -283,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { } } -void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { +void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { MOVAPS(scratch, R(src1)); CMPPS(scratch, R(src2), CMP_ORD); @@ -296,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen:: ANDPS(src1, R(scratch)); } -void JitCompiler::Compile_EvaluateCondition(Instruction instr) { +void JitShader::Compile_EvaluateCondition(Instruction instr) { // Note: NXOR is used below to check for equality switch (instr.flow_control.op) { case Instruction::FlowControlType::Or: @@ -327,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) { } } -void JitCompiler::Compile_UniformCondition(Instruction instr) { +void JitShader::Compile_UniformCondition(Instruction instr) { int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); } -BitSet32 JitCompiler::PersistentCallerSavedRegs() { +BitSet32 JitShader::PersistentCallerSavedRegs() { return persistent_regs & ABI_ALL_CALLER_SAVED; } -void JitCompiler::Compile_ADD(Instruction instr) { +void JitShader::Compile_ADD(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); ADDPS(SRC1, R(SRC2)); Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_DP3(Instruction instr) { +void JitShader::Compile_DP3(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -362,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_DP4(Instruction instr) { +void JitShader::Compile_DP4(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -379,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_DPH(Instruction instr) { +void JitShader::Compile_DPH(Instruction instr) { if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); @@ -411,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_EX2(Instruction instr) { +void JitShader::Compile_EX2(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); MOVSS(XMM0, R(SRC1)); @@ -424,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_LG2(Instruction instr) { +void JitShader::Compile_LG2(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); MOVSS(XMM0, R(SRC1)); @@ -437,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_MUL(Instruction instr) { +void JitShader::Compile_MUL(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); Compile_SanitizedMul(SRC1, SRC2, SCRATCH); Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_SGE(Instruction instr) { +void JitShader::Compile_SGE(Instruction instr) { if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); @@ -459,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) { Compile_DestEnable(instr, SRC2); } -void JitCompiler::Compile_SLT(Instruction instr) { +void JitShader::Compile_SLT(Instruction instr) { if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); @@ -474,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_FLR(Instruction instr) { +void JitShader::Compile_FLR(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); if (Common::GetCPUCaps().sse4_1) { @@ -487,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_MAX(Instruction instr) { +void JitShader::Compile_MAX(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. @@ -495,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_MIN(Instruction instr) { +void JitShader::Compile_MIN(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. @@ -503,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_MOVA(Instruction instr) { +void JitShader::Compile_MOVA(Instruction instr) { SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { @@ -548,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) { } } -void JitCompiler::Compile_MOV(Instruction instr) { +void JitShader::Compile_MOV(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_RCP(Instruction instr) { +void JitShader::Compile_RCP(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica @@ -564,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_RSQ(Instruction instr) { +void JitShader::Compile_RSQ(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica @@ -575,15 +575,15 @@ void JitCompiler::Compile_RSQ(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_NOP(Instruction instr) { +void JitShader::Compile_NOP(Instruction instr) { } -void JitCompiler::Compile_END(Instruction instr) { +void JitShader::Compile_END(Instruction instr) { ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); } -void JitCompiler::Compile_CALL(Instruction instr) { +void JitShader::Compile_CALL(Instruction instr) { // Push offset of the return PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); @@ -595,21 +595,21 @@ void JitCompiler::Compile_CALL(Instruction instr) { ADD(64, R(RSP), Imm32(8)); } -void JitCompiler::Compile_CALLC(Instruction instr) { +void JitShader::Compile_CALLC(Instruction instr) { Compile_EvaluateCondition(instr); FixupBranch b = J_CC(CC_Z, true); Compile_CALL(instr); SetJumpTarget(b); } -void JitCompiler::Compile_CALLU(Instruction instr) { +void JitShader::Compile_CALLU(Instruction instr) { Compile_UniformCondition(instr); FixupBranch b = J_CC(CC_Z, true); Compile_CALL(instr); SetJumpTarget(b); } -void JitCompiler::Compile_CMP(Instruction instr) { +void JitShader::Compile_CMP(Instruction instr) { using Op = Instruction::Common::CompareOpType::Op; Op op_x = instr.common.compare_op.x; Op op_y = instr.common.compare_op.y; @@ -652,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) { SHR(64, R(COND1), Imm8(63)); } -void JitCompiler::Compile_MAD(Instruction instr) { +void JitShader::Compile_MAD(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { @@ -669,7 +669,7 @@ void JitCompiler::Compile_MAD(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_IF(Instruction instr) { +void JitShader::Compile_IF(Instruction instr) { RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); // Evaluate the "IF" condition @@ -700,7 +700,7 @@ void JitCompiler::Compile_IF(Instruction instr) { SetJumpTarget(b2); } -void JitCompiler::Compile_LOOP(Instruction instr) { +void JitShader::Compile_LOOP(Instruction instr) { RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); RuntimeAssert(!looping, "Nested loops not supported"); @@ -728,7 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) { looping = false; } -void JitCompiler::Compile_JMP(Instruction instr) { +void JitShader::Compile_JMP(Instruction instr) { if (instr.opcode.Value() == OpCode::Id::JMPC) Compile_EvaluateCondition(instr); else if (instr.opcode.Value() == OpCode::Id::JMPU) @@ -743,13 +743,13 @@ void JitCompiler::Compile_JMP(Instruction instr) { fixup_branches.push_back({ b, instr.flow_control.dest_offset }); } -void JitCompiler::Compile_Block(unsigned end) { +void JitShader::Compile_Block(unsigned end) { while (program_counter < end) { Compile_NextInstr(); } } -void JitCompiler::Compile_Return() { +void JitShader::Compile_Return() { // Peek return offset on the stack and check if we're at that offset MOV(64, R(RAX), MDisp(RSP, 8)); CMP(32, R(RAX), Imm32(program_counter)); @@ -760,7 +760,7 @@ void JitCompiler::Compile_Return() { SetJumpTarget(b); } -void JitCompiler::Compile_NextInstr() { +void JitShader::Compile_NextInstr() { if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { Compile_Return(); } @@ -783,7 +783,7 @@ void JitCompiler::Compile_NextInstr() { } } -void JitCompiler::FindReturnOffsets() { +void JitShader::FindReturnOffsets() { return_offsets.clear(); for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { @@ -802,7 +802,7 @@ void JitCompiler::FindReturnOffsets() { std::sort(return_offsets.begin(), return_offsets.end()); } -void JitCompiler::Compile() { +void JitShader::Compile() { // Reset flow control state program = (CompiledShader*)GetCodePtr(); program_counter = 0; @@ -857,7 +857,7 @@ void JitCompiler::Compile() { LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); } -JitCompiler::JitCompiler() { +JitShader::JitShader() { AllocCodeSpace(MAX_SHADER_SIZE); } diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index aa5060584..005fbdbe3 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -29,9 +29,9 @@ constexpr size_t MAX_SHADER_SIZE = 1024 * 64; * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 * code that can be executed on the host machine directly. */ -class JitCompiler : public Gen::XCodeBlock { +class JitShader : public Gen::XCodeBlock { public: - JitCompiler(); + JitShader(); void Run(void* registers, unsigned offset) const { program(registers, code_ptr[offset]); From d7fe2784cca9c13d1f79f4063691fc4ced1c4759 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 12 Apr 2016 23:35:36 -0400 Subject: [PATCH 15/15] shader_jit_x64: Rename RuntimeAssert to Compile_Assert. --- src/video_core/shader/shader_jit_x64.cpp | 8 ++++---- src/video_core/shader/shader_jit_x64.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 9369d2fe5..b47d3beda 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -151,7 +151,7 @@ static void LogCritical(const char* msg) { LOG_CRITICAL(HW_GPU, msg); } -void JitShader::RuntimeAssert(bool condition, const char* msg) { +void JitShader::Compile_Assert(bool condition, const char* msg) { if (!condition) { ABI_CallFunctionP(reinterpret_cast(LogCritical), const_cast(msg)); } @@ -670,7 +670,7 @@ void JitShader::Compile_MAD(Instruction instr) { } void JitShader::Compile_IF(Instruction instr) { - RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -701,8 +701,8 @@ void JitShader::Compile_IF(Instruction instr) { } void JitShader::Compile_LOOP(Instruction instr) { - RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); - RuntimeAssert(!looping, "Nested loops not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); + Compile_Assert(!looping, "Nested loops not supported"); looping = true; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 005fbdbe3..cd6280ade 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -94,7 +94,7 @@ private: * Assertion evaluated at compile-time, but only triggered if executed at runtime. * @param msg Message to be logged if the assertion fails. */ - void RuntimeAssert(bool condition, const char* msg); + void Compile_Assert(bool condition, const char* msg); /** * Analyzes the entire shader program for `CALL` instructions before emitting any code,