diff --git a/externals/dynarmic b/externals/dynarmic index 19a423034e..f9696760db 160000 --- a/externals/dynarmic +++ b/externals/dynarmic @@ -1 +1 @@ -Subproject commit 19a423034e1abcaf1a61fa61ceffffebf45a0240 +Subproject commit f9696760db4f63a413093dedd185875da64dff58 diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 2810cec15f..877e0faa44 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_fastmem_check.SetGlobal(true); + values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true); // Renderer values.renderer_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index d06b23a145..a37d83fb39 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -484,12 +484,15 @@ struct Values { BasicSetting cpuopt_misc_ir{true, "cpuopt_misc_ir"}; BasicSetting cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"}; BasicSetting cpuopt_fastmem{true, "cpuopt_fastmem"}; + BasicSetting cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"}; + BasicSetting cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"}; Setting cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"}; Setting cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"}; Setting cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"}; Setting cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"}; Setting cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"}; + Setting cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"}; // Renderer RangedSetting renderer_backend{ diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b0d89c5398..286976623f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -137,6 +137,8 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; // Multi-process state config.processor_id = core_index; @@ -178,6 +180,12 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -195,6 +203,9 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -203,6 +214,7 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_unique(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 56836bd051..d96226c41f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -185,6 +185,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.fastmem_pointer = page_table->fastmem_arena; config.fastmem_address_space_bits = address_space_bits; config.silently_mirror_fastmem = false; + + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; } // Multi-process state @@ -237,6 +240,12 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -254,6 +263,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -262,6 +274,7 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; config.fastmem_address_space_bits = 64; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_shared(config); diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp index 397d054a89..ea6b224e09 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad }); } -void DynarmicExclusiveMonitor::ClearExclusive() { - monitor.Clear(); +void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) { + monitor.ClearProcessor(core_index); } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h index 265c4ecef3..5a15b43ef4 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.h +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -29,7 +29,7 @@ public: u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; - void ClearExclusive() override; + void ClearExclusive(std::size_t core_index) override; bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index 62f6e60231..9914ca3da6 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -23,7 +23,7 @@ public: virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; - virtual void ClearExclusive() = 0; + virtual void ClearExclusive(std::size_t core_index) = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0; diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 783c698581..1d1f5e5f8c 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu } } else { // Otherwise, clear our exclusive hold and finish - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. @@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 } } else { // Otherwise, clear our exclusive hold and finish. - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index f915bd856e..c2b66ff149 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -609,6 +609,7 @@ void Config::ReadCpuValues() { ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { ReadBasicSetting(Settings::values.cpu_debug_mode); @@ -621,6 +622,8 @@ void Config::ReadCpuValues() { ReadBasicSetting(Settings::values.cpuopt_misc_ir); ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); ReadBasicSetting(Settings::values.cpuopt_fastmem); + ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives); + ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives); } qt_config->endGroup(); @@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() { WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { WriteBasicSetting(Settings::values.cpu_debug_mode); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index f66cab5d45..bf74ccc7c2 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); + ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( @@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() { Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); ui->cpuopt_unsafe_fastmem_check->setChecked( Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); + ui->cpuopt_unsafe_ignore_global_monitor->setChecked( + Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->accuracy->setCurrentIndex(static_cast(Settings::values.cpu_accuracy.GetValue())); @@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, ui->cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor, + ui->cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } void ConfigureCpu::changeEvent(QEvent* event) { @@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, Settings::values.cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor, + Settings::values.cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index ed9af0e9fe..733e38be49 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -45,6 +45,7 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; + ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index d8064db241..5d80a8c91a 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -150,6 +150,18 @@ + + + + + <div>This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.</div> + + + + Ignore global monitor + + + diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp index 05a90963df..616a0be756 100644 --- a/src/yuzu/configuration/configure_cpu_debug.cpp +++ b/src/yuzu/configuration/configure_cpu_debug.cpp @@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() { Settings::values.cpuopt_reduce_misalign_checks.GetValue()); ui->cpuopt_fastmem->setEnabled(runtime_lock); ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue()); + ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock); + ui->cpuopt_fastmem_exclusives->setChecked( + Settings::values.cpuopt_fastmem_exclusives.GetValue()); + ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock); + ui->cpuopt_recompile_exclusives->setChecked( + Settings::values.cpuopt_recompile_exclusives.GetValue()); } void ConfigureCpuDebug::ApplyConfiguration() { @@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() { Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); + Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked(); + Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked(); } void ConfigureCpuDebug::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index 6e635bb2f4..2bc2688106 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -144,7 +144,34 @@ - Enable Host MMU Emulation + Enable Host MMU Emulation (general memory instructions) + + + + + + + + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.</div> + <div style="white-space: nowrap">Disabling this forces all exclusive memory accesses to use Software MMU Emulation.</div> + + + + Enable Host MMU Emulation (exclusive memory instructions) + + + + + + + + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.</div> + + + + Enable recompilation of exclusive memory instructions diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 30963a8bbb..b74411c84f 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -280,11 +280,14 @@ void Config::ReadValues() { ReadSetting("Cpu", Settings::values.cpuopt_misc_ir); ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks); ReadSetting("Cpu", Settings::values.cpuopt_fastmem); + ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives); + ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check); + ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor); // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6d613bf7a1..3ac1440c9f 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -174,6 +174,14 @@ cpuopt_reduce_misalign_checks = # 0: Disabled, 1 (default): Enabled cpuopt_fastmem = +# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_fastmem_exclusives = + +# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_recompile_exclusives = + # Enable unfuse FMA (improve performance on CPUs without FMA) # Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. # 0: Disabled, 1 (default): Enabled @@ -199,6 +207,11 @@ cpuopt_unsafe_inaccurate_nan = # 0: Disabled, 1 (default): Enabled cpuopt_unsafe_fastmem_check = +# Enable faster exclusive instructions +# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. +# 0: Disabled, 1 (default): Enabled +cpuopt_unsafe_ignore_global_monitor = + [Renderer] # Which backend API to use. # 0 (default): OpenGL, 1: Vulkan