diff --git a/CMakeLists.txt b/CMakeLists.txt index f26a0c6b82..91ec50befe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,8 +477,8 @@ if (APPLE) find_library(COCOA_LIBRARY Cocoa) set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) elseif (WIN32) - # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista) - add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600) + # Target Windows 10 + add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi) if (MINGW) # PSAPI is the Process Status API diff --git a/dist/yuzu.manifest b/dist/yuzu.manifest index 10a8df9b57..f2c8639a20 100644 --- a/dist/yuzu.manifest +++ b/dist/yuzu.manifest @@ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later - - - - - - +#else +#include +#endif + +#include "common/steady_clock.h" + +namespace Common { + +#ifdef _WIN32 +static s64 WindowsQueryPerformanceFrequency() { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return frequency.QuadPart; +} + +static s64 WindowsQueryPerformanceCounter() { + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); + return counter.QuadPart; +} +#endif + +SteadyClock::time_point SteadyClock::Now() noexcept { +#if defined(_WIN32) + static const auto freq = WindowsQueryPerformanceFrequency(); + const auto counter = WindowsQueryPerformanceCounter(); + + // 10 MHz is a very common QPC frequency on modern PCs. + // Optimizing for this specific frequency can double the performance of + // this function by avoiding the expensive frequency conversion path. + static constexpr s64 TenMHz = 10'000'000; + + if (freq == TenMHz) [[likely]] { + static_assert(period::den % TenMHz == 0); + static constexpr s64 Multiplier = period::den / TenMHz; + return time_point{duration{counter * Multiplier}}; + } + + const auto whole = (counter / freq) * period::den; + const auto part = (counter % freq) * period::den / freq; + return time_point{duration{whole + part}}; +#elif defined(__APPLE__) + return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}}; +#else + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}}; +#endif +} + +}; // namespace Common diff --git a/src/common/steady_clock.h b/src/common/steady_clock.h new file mode 100644 index 0000000000..9497cf865d --- /dev/null +++ b/src/common/steady_clock.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Common { + +struct SteadyClock { + using rep = s64; + using period = std::nano; + using duration = std::chrono::nanoseconds; + using time_point = std::chrono::time_point; + + static constexpr bool is_steady = true; + + [[nodiscard]] static time_point Now() noexcept; +}; + +} // namespace Common diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ae07f28116..817e71d52f 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/wall_clock.h" @@ -11,45 +12,32 @@ namespace Common { -using base_timer = std::chrono::steady_clock; -using base_time_point = std::chrono::time_point; - class StandardWallClock final : public WallClock { public: explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { - start_time = base_timer::now(); - } + : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, + start_time{SteadyClock::Now()} {} std::chrono::nanoseconds GetTimeNS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return SteadyClock::Now() - start_time; } std::chrono::microseconds GetTimeUS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } std::chrono::milliseconds GetTimeMS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } u64 GetClockCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = - Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } u64 GetCPUCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } void Pause([[maybe_unused]] bool is_paused) override { @@ -57,7 +45,7 @@ public: } private: - base_time_point start_time; + SteadyClock::time_point start_time; }; #ifdef ARCHITECTURE_x86_64 @@ -93,4 +81,9 @@ std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, #endif +std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +} + } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 828a523a86..157ec5eaea 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -55,4 +55,7 @@ private: [[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency); +[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency); + } // namespace Common diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp new file mode 100644 index 0000000000..29c6e5c7e1 --- /dev/null +++ b/src/common/windows/timer_resolution.cpp @@ -0,0 +1,109 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/windows/timer_resolution.h" + +extern "C" { +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html +NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html +NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html +NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); +} + +// Defines for compatibility with older Windows 10 SDKs. + +#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED +#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1 +#endif +#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION +#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4 +#endif + +namespace Common::Windows { + +namespace { + +using namespace std::chrono; + +constexpr nanoseconds ToNS(ULONG hundred_ns) { + return nanoseconds{hundred_ns * 100}; +} + +constexpr ULONG ToHundredNS(nanoseconds ns) { + return static_cast(ns.count()) / 100; +} + +struct TimerResolution { + std::chrono::nanoseconds minimum; + std::chrono::nanoseconds maximum; + std::chrono::nanoseconds current; +}; + +TimerResolution GetTimerResolution() { + ULONG MinimumTimerResolution; + ULONG MaximumTimerResolution; + ULONG CurrentTimerResolution; + NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution, + &CurrentTimerResolution); + return { + .minimum{ToNS(MinimumTimerResolution)}, + .maximum{ToNS(MaximumTimerResolution)}, + .current{ToNS(CurrentTimerResolution)}, + }; +} + +void SetHighQoS() { + // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service + PROCESS_POWER_THROTTLING_STATE PowerThrottling{ + .Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION}, + .ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED | + PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION}, + .StateMask{}, + }; + SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling, + sizeof(PROCESS_POWER_THROTTLING_STATE)); +} + +} // Anonymous namespace + +nanoseconds GetMinimumTimerResolution() { + return GetTimerResolution().minimum; +} + +nanoseconds GetMaximumTimerResolution() { + return GetTimerResolution().maximum; +} + +nanoseconds GetCurrentTimerResolution() { + return GetTimerResolution().current; +} + +nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { + // Set the timer resolution, and return the current timer resolution. + const auto DesiredTimerResolution = ToHundredNS(timer_resolution); + ULONG CurrentTimerResolution; + NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution); + return ToNS(CurrentTimerResolution); +} + +nanoseconds SetCurrentTimerResolutionToMaximum() { + SetHighQoS(); + return SetCurrentTimerResolution(GetMaximumTimerResolution()); +} + +void SleepForOneTick() { + LARGE_INTEGER DelayInterval{ + .QuadPart{-1}, + }; + NtDelayExecution(FALSE, &DelayInterval); +} + +} // namespace Common::Windows diff --git a/src/common/windows/timer_resolution.h b/src/common/windows/timer_resolution.h new file mode 100644 index 0000000000..e1e50a62dc --- /dev/null +++ b/src/common/windows/timer_resolution.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common::Windows { + +/// Returns the minimum (least precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMinimumTimerResolution(); + +/// Returns the maximum (most precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMaximumTimerResolution(); + +/// Returns the current timer resolution in nanoseconds. +std::chrono::nanoseconds GetCurrentTimerResolution(); + +/** + * Sets the current timer resolution. + * + * @param timer_resolution Timer resolution in nanoseconds. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution); + +/** + * Sets the current timer resolution to the maximum supported timer resolution. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum(); + +/// Sleep for one tick of the current timer resolution. +void SleepForOneTick(); + +} // namespace Common::Windows diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 8b08332ab4..bc1a973b06 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -6,6 +6,7 @@ #include #include "common/atomic_ops.h" +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" @@ -39,6 +40,12 @@ static u64 FencedRDTSC() { } #endif +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + u64 EstimateRDTSCFrequency() { // Discard the first result measuring the rdtsc. FencedRDTSC(); @@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() { FencedRDTSC(); // Get the current time. - const auto start_time = std::chrono::steady_clock::now(); + const auto start_time = Common::SteadyClock::Now(); const u64 tsc_start = FencedRDTSC(); - // Wait for 200 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{200}); - const auto end_time = std::chrono::steady_clock::now(); + // Wait for 250 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{250}); + const auto end_time = Common::SteadyClock::Now(); const u64 tsc_end = FencedRDTSC(); // Calculate differences. const u64 timer_diff = static_cast( std::chrono::duration_cast(end_time - start_time).count()); const u64 tsc_diff = tsc_end - tsc_start; const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return tsc_freq; + return RoundToNearest<1000>(tsc_freq); } namespace X64 { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 3a63b52e3f..742cfb9967 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -6,6 +6,10 @@ #include #include +#ifdef _WIN32 +#include "common/windows/timer_resolution.h" +#endif + #include "common/microprofile.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -38,7 +42,8 @@ struct CoreTiming::Event { }; CoreTiming::CoreTiming() - : clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} + : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, + event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} CoreTiming::~CoreTiming() { Reset(); @@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetCPUTicks() const { - if (is_multicore) { - return clock->GetCPUCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetCPUCycles(); } return ticks; } u64 CoreTiming::GetClockTicks() const { - if (is_multicore) { - return clock->GetClockCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetClockCycles(); } return CpuCyclesToClockCycles(ticks); } @@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() { const auto next_time = Advance(); if (next_time) { // There are more events left in the queue, wait until the next event. - const auto wait_time = *next_time - GetGlobalTimeNs().count(); + auto wait_time = *next_time - GetGlobalTimeNs().count(); if (wait_time > 0) { #ifdef _WIN32 - // Assume a timer resolution of 1ms. - static constexpr s64 TimerResolutionNS = 1000000; + const auto timer_resolution_ns = + Common::Windows::GetCurrentTimerResolution().count(); - // Sleep in discrete intervals of the timer resolution, and spin the rest. - const auto sleep_time = wait_time - (wait_time % TimerResolutionNS); - if (sleep_time > 0) { - event.WaitFor(std::chrono::nanoseconds(sleep_time)); - } + while (!paused && !event.IsSet() && wait_time > 0) { + wait_time = *next_time - GetGlobalTimeNs().count(); - while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) { - // Yield to reduce thread starvation. - std::this_thread::yield(); + if (wait_time >= timer_resolution_ns) { + Common::Windows::SleepForOneTick(); + } else { + std::this_thread::yield(); + } } if (event.IsSet()) { @@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() { } paused_set = true; - clock->Pause(true); + event_clock->Pause(true); pause_event.Wait(); - clock->Pause(false); + event_clock->Pause(false); } } @@ -303,16 +307,23 @@ void CoreTiming::Reset() { has_started = false; } +std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { + if (is_multicore) [[likely]] { + return cpu_clock->GetTimeNS(); + } + return CyclesToNs(ticks); +} + std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - if (is_multicore) { - return clock->GetTimeNS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeNS(); } return CyclesToNs(ticks); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - if (is_multicore) { - return clock->GetTimeUS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeUS(); } return CyclesToUs(ticks); } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index da366637be..4b89c0c39b 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -122,6 +122,9 @@ public: /// Returns current time in emulated in Clock cycles u64 GetClockTicks() const; + /// Returns current time in nanoseconds. + std::chrono::nanoseconds GetCPUTimeNs() const; + /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; @@ -139,7 +142,8 @@ private: void Reset(); - std::unique_ptr clock; + std::unique_ptr cpu_clock; + std::unique_ptr event_clock; s64 global_timer = 0; diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index 45567b8404..191c28bb46 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -13,11 +13,9 @@ namespace Core { namespace Hardware { -// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz -// The exact value used is of course unverified. -constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked -constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed -constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores +constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz +constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz +constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores // Virtual to Physical core map. constexpr std::array()> VirtualToPhysicalCoreMap{ diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7024a19cf8..2e7f9c5edb 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -197,7 +197,7 @@ struct GPU::Impl { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { nanoseconds /= 256; } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index f233b065ea..c092507f41 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/microprofile.h" #include "common/scm_rev.h" #include "common/scope_exit.h" +#ifdef _WIN32 +#include "common/windows/timer_resolution.h" +#endif #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #endif @@ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr config_, bool has_broken_vulkan LOG_INFO(Frontend, "Host RAM: {:.2f} GiB", Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB}); LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB}); +#ifdef _WIN32 + LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms", + std::chrono::duration_cast>( + Common::Windows::SetCurrentTimerResolutionToMaximum()) + .count()); +#endif UpdateWindowTitle(); show(); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 77edd58ca4..5f39ece324 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -42,6 +42,8 @@ #include #include + +#include "common/windows/timer_resolution.h" #endif #undef _UNICODE @@ -314,6 +316,8 @@ int main(int argc, char** argv) { #ifdef _WIN32 LocalFree(argv_w); + + Common::Windows::SetCurrentTimerResolutionToMaximum(); #endif MicroProfileOnThreadCreate("EmuThread");