From d567b7e841558a367e37d64b032b3492ed4d5cf4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Oct 2020 21:45:08 -0700 Subject: [PATCH 01/10] hle: service: nvdrv: Implement SyncpointManager, to manage syncpoints. --- src/core/CMakeLists.txt | 2 + src/core/hle/service/nvdrv/nvdrv.cpp | 2 +- .../hle/service/nvdrv/syncpoint_manager.cpp | 39 +++++++++ .../hle/service/nvdrv/syncpoint_manager.h | 85 +++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/core/hle/service/nvdrv/syncpoint_manager.cpp create mode 100644 src/core/hle/service/nvdrv/syncpoint_manager.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e0f207f3ee..9a983e81d0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -454,6 +454,8 @@ add_library(core STATIC hle/service/nvdrv/nvdrv.h hle/service/nvdrv/nvmemp.cpp hle/service/nvdrv/nvmemp.h + hle/service/nvdrv/syncpoint_manager.cpp + hle/service/nvdrv/syncpoint_manager.h hle/service/nvflinger/buffer_queue.cpp hle/service/nvflinger/buffer_queue.h hle/service/nvflinger/nvflinger.cpp diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 803c1a9848..e6a205c8e5 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -36,7 +36,7 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger nvflinger.SetNVDrvInstance(module_); } -Module::Module(Core::System& system) { +Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp new file mode 100644 index 0000000000..0151a03b78 --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" +#include "video_core/gpu.h" + +namespace Service::Nvidia { + +SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {} + +SyncpointManager::~SyncpointManager() = default; + +u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { + syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id); + return GetSyncpointMin(syncpoint_id); +} + +u32 SyncpointManager::AllocateSyncpoint() { + for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { + if (!syncpoints[syncpoint_id].is_allocated) { + syncpoints[syncpoint_id].is_allocated = true; + return syncpoint_id; + } + } + UNREACHABLE_MSG("No more available syncpoints!"); + return {}; +} + +u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { + for (u32 index = 0; index < value; ++index) { + syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); + } + + return GetSyncpointMax(syncpoint_id); +} + +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h new file mode 100644 index 0000000000..4168b6c7ed --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.h @@ -0,0 +1,85 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "core/hle/service/nvdrv/nvdata.h" + +namespace Tegra { +class GPU; +} + +namespace Service::Nvidia { + +class SyncpointManager final { +public: + explicit SyncpointManager(Tegra::GPU& gpu); + ~SyncpointManager(); + + /** + * Returns true if the specified syncpoint is expired for the given value. + * @param syncpoint_id Syncpoint ID to check. + * @param value Value to check against the specified syncpoint. + * @returns True if the specified syncpoint is expired for the given value, otherwise False. + */ + bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { + return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); + } + + /** + * Gets the lower bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the lower bound for. + * @returns The lower bound for the specified syncpoint. + */ + u32 GetSyncpointMin(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed); + } + + /** + * Gets the uper bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the upper bound for. + * @returns The upper bound for the specified syncpoint. + */ + u32 GetSyncpointMax(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed); + } + + /** + * Refreshes the minimum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be refreshed. + * @returns The new syncpoint minimum value. + */ + u32 RefreshSyncpoint(u32 syncpoint_id); + + /** + * Allocates a new syncoint. + * @returns The syncpoint ID for the newly allocated syncpoint. + */ + u32 AllocateSyncpoint(); + + /** + * Increases the maximum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be increased. + * @param value Value to increase the specified syncpoint by. + * @returns The new syncpoint maximum value. + */ + u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); + +private: + struct Syncpoint { + std::atomic min; + std::atomic max; + std::atomic is_allocated; + }; + + std::array syncpoints{}; + + Tegra::GPU& gpu; +}; + +} // namespace Service::Nvidia From 4a3fd97e4803a68ee260a0d9e38f293967b783b4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Oct 2020 21:58:59 -0700 Subject: [PATCH 02/10] hle service: nvdrv: Update to instantiate SyncpointManager. --- src/core/hle/service/nvdrv/nvdrv.cpp | 9 +++++---- src/core/hle/service/nvdrv/nvdrv.h | 14 +++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index e6a205c8e5..26c1bf7517 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -21,6 +21,7 @@ #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvmemp.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/nvflinger/nvflinger.h" namespace Service::Nvidia { @@ -40,7 +41,7 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); + events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)}; events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } @@ -95,17 +96,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { if (events_interface.assigned_syncpt[i] == syncpoint_id && events_interface.assigned_value[i] == value) { events_interface.LiberateEvent(i); - events_interface.events[i].writable->Signal(); + events_interface.events[i].event.writable->Signal(); } } } std::shared_ptr Module::GetEvent(const u32 event_id) const { - return events_interface.events[event_id].readable; + return events_interface.events[event_id].event.readable; } std::shared_ptr Module::GetEventWriteable(const u32 event_id) const { - return events_interface.events[event_id].writable; + return events_interface.events[event_id].event.writable; } } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 7706a5590f..f3d863dac4 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/service.h" namespace Core { @@ -22,15 +23,23 @@ class NVFlinger; namespace Service::Nvidia { +class SyncpointManager; + namespace Devices { class nvdevice; } +/// Represents an Nvidia event +struct NvEvent { + Kernel::EventPair event; + Fence fence{}; +}; + struct EventInterface { // Mask representing currently busy events u64 events_mask{}; // Each kernel event associated to an NV event - std::array events; + std::array events; // The status of the current NVEvent std::array status{}; // Tells if an NVEvent is registered or not @@ -119,6 +128,9 @@ public: std::shared_ptr GetEventWriteable(u32 event_id) const; private: + /// Manages syncpoints on the host + SyncpointManager syncpoint_manager; + /// Id to use for the next open file descriptor. u32 next_fd = 1; From 66edfd61c67182761dacadf3c45011702b47c071 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Oct 2020 22:04:13 -0700 Subject: [PATCH 03/10] hle service: nvdrv: nvhost_ctrl: Update to use SyncpointManager. --- .../hle/service/nvdrv/devices/nvhost_ctrl.cpp | 33 +++++++++++++++---- .../hle/service/nvdrv/devices/nvhost_ctrl.h | 4 ++- src/core/hle/service/nvdrv/nvdrv.cpp | 3 +- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 75d9191ff2..8356a81392 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -15,8 +15,9 @@ namespace Service::Nvidia::Devices { -nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) - : nvdevice(system), events_interface{events_interface} {} +nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager) + : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} nvhost_ctrl::~nvhost_ctrl() = default; u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& return NvResult::BadParameter; } + if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id); + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + + if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id); + syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = new_value; + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); + // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { - event.writable->Signal(); + event.event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); - const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); + const u32 current_syncpoint_value = event.fence.value; const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { - event.writable->Signal(); + event.event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - event.writable->Clear(); + event.event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; @@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector& input, std::vecto u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector& input, std::vector& output) { IocCtrlEventSignalParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); + u32 event_id = params.event_id & 0x00FF; LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); + if (event_id >= MaxNvEvents) { return NvResult::BadParameter; } if (events_interface.status[event_id] == EventState::Waiting) { events_interface.LiberateEvent(event_id); - events_interface.events[event_id].writable->Signal(); } + + syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id); + return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index f7b04d9f17..24ad96cb95 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices { class nvhost_ctrl final : public nvdevice { public: - explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); + explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager); ~nvhost_ctrl() override; u32 ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -145,6 +146,7 @@ private: u32 IocCtrlClearEventWait(const std::vector& input, std::vector& output); EventInterface& events_interface; + SyncpointManager& syncpoint_manager; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 26c1bf7517..2e52f8bd6d 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -51,7 +51,8 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { devices["/dev/nvhost-ctrl-gpu"] = std::make_shared(system); devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared(system, nvmap_dev); - devices["/dev/nvhost-ctrl"] = std::make_shared(system, events_interface); + devices["/dev/nvhost-ctrl"] = + std::make_shared(system, events_interface, syncpoint_manager); devices["/dev/nvhost-nvdec"] = std::make_shared(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared(system); devices["/dev/nvhost-vic"] = std::make_shared(system, nvmap_dev); From 6053b955525be69eb73a928a7bdd43ba8f5e69a7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Oct 2020 22:11:41 -0700 Subject: [PATCH 04/10] video_core: gpu: Implement WaitFence and IncrementSyncPoint. --- src/video_core/dma_pusher.h | 25 +++++++++++++++++++ src/video_core/gpu.cpp | 48 ++++++++++++++++++------------------- src/video_core/gpu.h | 25 +++++++++++++++---- 3 files changed, 70 insertions(+), 28 deletions(-) diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d1709..2026b78571 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { IncreaseOnce = 5 }; +// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence +// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. +// So the values you see in docs might be multiplied by 4. +enum class BufferMethods : u32 { + BindObject = 0x0, + Nop = 0x2, + SemaphoreAddressHigh = 0x4, + SemaphoreAddressLow = 0x5, + SemaphoreSequence = 0x6, + SemaphoreTrigger = 0x7, + NotifyIntr = 0x8, + WrcacheFlush = 0x9, + Unk28 = 0xA, + UnkCacheFlush = 0xB, + RefCnt = 0x14, + SemaphoreAcquire = 0x1A, + SemaphoreRelease = 0x1B, + FenceValue = 0x1C, + FenceAction = 0x1D, + WaitForInterrupt = 0x1E, + Unk7c = 0x1F, + Yield = 0x20, + NonPullerMethods = 0x40, +}; + struct CommandListHeader { union { u64 raw; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183b..ebd149c3af 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { void GPU::OnCommandListEnd() { renderer->Rasterizer().ReleaseFences(); } -// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence -// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. -// So the values you see in docs might be multiplied by 4. -enum class BufferMethods { - BindObject = 0x0, - Nop = 0x2, - SemaphoreAddressHigh = 0x4, - SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, - WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, - RefCnt = 0x14, - SemaphoreAcquire = 0x1A, - SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - Unk78 = 0x1E, - Unk7c = 0x1F, - Yield = 0x20, - NonPullerMethods = 0x40, -}; enum class GpuSemaphoreOperation { AcquireEqual = 0x1, @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: + break; case BufferMethods::FenceAction: + ProcessFenceActionMethod(); + break; + case BufferMethods::WaitForInterrupt: + ProcessWaitForInterruptMethod(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { } } +void GPU::ProcessFenceActionMethod() { + switch (regs.fence_action.op) { + case FenceOperation::Acquire: + WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + break; + case FenceOperation::Increment: + IncrementSyncPoint(regs.fence_action.syncpoint_id); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented operation {}", + static_cast(regs.fence_action.op.Value())); + } +} + +void GPU::ProcessWaitForInterruptMethod() { + // TODO(bunnei) ImplementMe + LOG_WARNING(HW_GPU, "(STUBBED) called"); +} + void GPU::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; const auto op = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11f..5444b49f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -263,6 +263,24 @@ public: return use_nvdec; } + enum class FenceOperation : u32 { + Acquire = 0, + Increment = 1, + }; + + union FenceAction { + u32 raw; + BitField<0, 1, FenceOperation> op; + BitField<8, 24, u32> syncpoint_id; + + static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + FenceAction result{}; + result.op.Assign(op); + result.syncpoint_id.Assign(syncpoint_id); + return {result.raw}; + } + }; + struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -291,10 +309,7 @@ public: u32 semaphore_acquire; u32 semaphore_release; u32 fence_value; - union { - BitField<4, 4, u32> operation; - BitField<8, 8, u32> id; - } fence_action; + FenceAction fence_action; INSERT_UNION_PADDING_WORDS(0xE2); // Puller state @@ -342,6 +357,8 @@ protected: private: void ProcessBindMethod(const MethodCall& method_call); + void ProcessFenceActionMethod(); + void ProcessWaitForInterruptMethod(); void ProcessSemaphoreTriggerMethod(); void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); From 1d4cbb92f2f2c1702fe5843c6ece471f53d2b2d9 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 28 Oct 2020 17:17:38 -0700 Subject: [PATCH 05/10] service: hle: nvflinger: Fix potential shutdown crash when GPU is destroyed. --- src/core/hle/service/nvflinger/nvflinger.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index c64673dba5..44aa2bdaea 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -242,6 +242,10 @@ void NVFlinger::Compose() { const auto& igbp_buffer = buffer->get().igbp_buffer; + if (!system.IsPoweredOn()) { + return; // We are likely shutting down + } + auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; guard->unlock(); From c64545d07ae57816bc658ca7c45559d0b0d49f89 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 29 Oct 2020 21:13:04 -0700 Subject: [PATCH 06/10] video_core: dma_pusher: Add support for prefetched command lists. --- src/video_core/dma_pusher.cpp | 56 +++++++++++++++++++++-------------- src/video_core/dma_pusher.h | 21 +++++++++++-- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33a..9c49c61532 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -45,32 +45,42 @@ bool DmaPusher::Step() { return false; } - const CommandList& command_list{dma_pushbuffer.front()}; - ASSERT_OR_EXECUTE(!command_list.empty(), { - // Somehow the command_list is empty, in order to avoid a crash - // We ignore it and assume its size is 0. + CommandList& command_list{dma_pushbuffer.front()}; + + ASSERT_OR_EXECUTE( + command_list.command_lists.size() || command_list.prefetch_command_list.size(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); + + if (command_list.prefetch_command_list.size()) { + // Prefetched command list from nvdrv, used for things like synchronization + command_headers = std::move(command_list.prefetch_command_list); dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - return true; - }); - const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; - const GPUVAddr dma_get = command_list_header.addr; + } else { + const CommandListHeader command_list_header{ + command_list.command_lists[dma_pushbuffer_subindex]}; + const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; + const GPUVAddr dma_get = command_list_header.addr; - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; + if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } + + if (command_list_header.size == 0) { + return true; + } + + // Push buffer non-empty, read a word + command_headers.resize(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); } - - if (command_list_header.size == 0) { - return true; - } - - // Push buffer non-empty, read a word - command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); - for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 2026b78571..99b30ca0de 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -74,9 +74,26 @@ union CommandHeader { static_assert(std::is_standard_layout_v, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); +static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, + SubmissionMode mode) { + CommandHeader result{}; + result.method.Assign(static_cast(method)); + result.arg_count.Assign(arg_count); + result.mode.Assign(mode); + return result; +} + class GPU; -using CommandList = std::vector; +struct CommandList final { + CommandList() = default; + explicit CommandList(std::size_t size) : command_lists(size) {} + explicit CommandList(std::vector&& prefetch_command_list) + : prefetch_command_list{std::move(prefetch_command_list)} {} + + std::vector command_lists; + std::vector prefetch_command_list; +}; /** * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the @@ -85,7 +102,7 @@ using CommandList = std::vector; * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for * details on this implementation. */ -class DmaPusher { +class DmaPusher final { public: explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); From c6e1c46ac70bf31b54f756f9611b1cf086b63fb0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 29 Oct 2020 21:13:48 -0700 Subject: [PATCH 07/10] video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash. --- src/video_core/dma_pusher.cpp | 24 ++++++++++++++++++++++++ src/video_core/dma_pusher.h | 3 +++ 2 files changed, 27 insertions(+) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9c49c61532..105b85a922 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -12,6 +13,20 @@ namespace Tegra { +void CommandList::RefreshIntegrityChecks(GPU& gpu) { + command_list_hashes.resize(command_lists.size()); + + for (std::size_t index = 0; index < command_lists.size(); ++index) { + const CommandListHeader command_list_header = command_lists[index]; + std::vector command_headers(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), + command_list_header.size * sizeof(u32)); + command_list_hashes[index] = + Common::CityHash64(reinterpret_cast(command_headers.data()), + command_list_header.size * sizeof(u32)); + } +} + DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -80,6 +95,15 @@ bool DmaPusher::Step() { command_headers.resize(command_list_header.size); gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); + + // Integrity check + const u64 new_hash = Common::CityHash64(reinterpret_cast(command_headers.data()), + command_list_header.size * sizeof(u32)); + if (new_hash != next_hash) { + LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); + dma_pushbuffer.pop(); + return true; + } } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 99b30ca0de..8496ba2da7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -91,7 +91,10 @@ struct CommandList final { explicit CommandList(std::vector&& prefetch_command_list) : prefetch_command_list{std::move(prefetch_command_list)} {} + void RefreshIntegrityChecks(GPU& gpu); + std::vector command_lists; + std::vector command_list_hashes; std::vector prefetch_command_list; }; From e67b8678f8f261b582ff8449fff90058a9a7b901 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 29 Oct 2020 21:16:24 -0700 Subject: [PATCH 08/10] hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements. - Refactor so that SubmitGPFIFO and KickoffPB use shared functionality. - Implement add_wait and add_increment flags. --- .../hle/service/nvdrv/devices/nvhost_gpu.cpp | 134 ++++++++++++------ .../hle/service/nvdrv/devices/nvhost_gpu.h | 19 ++- src/core/hle/service/nvdrv/nvdrv.cpp | 3 +- 3 files changed, 108 insertions(+), 48 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index f1966ac0e5..2b9a8722cc 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -7,14 +7,17 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Service::Nvidia::Devices { -nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {} + nvhost_gpu::~nvhost_gpu() = default; u32 nvhost_gpu::ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -126,10 +129,9 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector& input, std::vector& ou params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, params.unk3); - auto& gpu = system.GPU(); - params.fence_out.id = assigned_syncpoints; - params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); - assigned_syncpoints++; + params.fence_out.id = syncpoint_manager.AllocateSyncpoint(); + params.fence_out.value = syncpoint_manager.RefreshSyncpoint(params.fence_out.id); + std::memcpy(output.data(), ¶ms, output.size()); return 0; } @@ -145,37 +147,95 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector& input, std::vector< return 0; } +static std::vector BuildWaitCommandList(Fence fence) { + return { + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {fence.value}, + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing), + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), + }; +} + +static std::vector BuildIncrementCommandList(Fence fence, u32 add_increment) { + std::vector result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {}}; + + for (u32 count = 0; count < add_increment; ++count) { + result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing)); + result.emplace_back( + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); + } + + return result; +} + +static std::vector BuildIncrementWithWfiCommandList(Fence fence, + u32 add_increment) { + std::vector result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, + Tegra::SubmissionMode::Increasing), + {}}; + const std::vector increment{ + BuildIncrementCommandList(fence, add_increment)}; + + result.insert(result.end(), increment.begin(), increment.end()); + + return result; +} + +u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector& output, + Tegra::CommandList&& entries) { + LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, + params.num_entries, params.flags.raw); + + auto& gpu = system.GPU(); + if (params.flags.add_wait.Value() && + !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { + gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); + } + + if (params.flags.add_increment.Value() || params.flags.increment.Value()) { + const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; + params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( + params.fence_out.id, params.AddIncrementValue() + increment_value); + } else { + params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); + } + + entries.RefreshIntegrityChecks(gpu); + gpu.PushGPUEntries(std::move(entries)); + + if (params.flags.add_increment.Value()) { + if (params.flags.suppress_wfi) { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); + } else { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); + } + } + + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); + return 0; +} + u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, - params.num_entries, params.flags.raw); - - ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + - params.num_entries * sizeof(Tegra::CommandListHeader), - "Incorrect input size"); Tegra::CommandList entries(params.num_entries); - std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], + std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); - - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; - } else { - params.fence_out.value = current_syncpoint_value; - } - gpu.PushGPUEntries(std::move(entries)); - - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); - return 0; + return SubmitGPFIFOImpl(params, output, std::move(entries)); } u32 nvhost_gpu::KickoffPB(const std::vector& input, std::vector& output, @@ -185,31 +245,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector& input, std::vector& output, } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, - params.num_entries, params.flags.raw); Tegra::CommandList entries(params.num_entries); if (version == IoctlVersion::Version2) { - std::memcpy(entries.data(), input2.data(), + std::memcpy(entries.command_lists.data(), input2.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } else { - system.Memory().ReadBlock(params.address, entries.data(), + system.Memory().ReadBlock(params.address, entries.command_lists.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; - } else { - params.fence_out.value = current_syncpoint_value; - } - gpu.PushGPUEntries(std::move(entries)); - - std::memcpy(output.data(), ¶ms, output.size()); - return 0; + return SubmitGPFIFOImpl(params, output, std::move(entries)); } u32 nvhost_gpu::GetWaitbase(const std::vector& input, std::vector& output) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 2ac74743ff..80054510f0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -11,6 +11,11 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "video_core/dma_pusher.h" + +namespace Service::Nvidia { +class SyncpointManager; +} namespace Service::Nvidia::Devices { @@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); class nvhost_gpu final : public nvdevice { public: - explicit nvhost_gpu(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_gpu(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_gpu() override; u32 ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -162,10 +168,15 @@ private: u32_le raw; BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list BitField<1, 1, u32_le> add_increment; // append an increment to the list - BitField<2, 1, u32_le> new_hw_format; // Mostly ignored + BitField<2, 1, u32_le> new_hw_format; // mostly ignored + BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt BitField<8, 1, u32_le> increment; // increment the returned fence } flags; Fence fence_out; // returned new fence object for others to wait on + + u32 AddIncrementValue() const { + return flags.add_increment.Value() << 1; + } }; static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), "IoctlSubmitGpfifo is incorrect size"); @@ -190,6 +201,8 @@ private: u32 SetChannelPriority(const std::vector& input, std::vector& output); u32 AllocGPFIFOEx2(const std::vector& input, std::vector& output); u32 AllocateObjectContext(const std::vector& input, std::vector& output); + u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector& output, + Tegra::CommandList&& entries); u32 SubmitGPFIFO(const std::vector& input, std::vector& output); u32 KickoffPB(const std::vector& input, std::vector& output, const std::vector& input2, IoctlVersion version); @@ -198,7 +211,7 @@ private: u32 ChannelSetTimeslice(const std::vector& input, std::vector& output); std::shared_ptr nvmap_dev; - u32 assigned_syncpoints{}; + SyncpointManager& syncpoint_manager; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 2e52f8bd6d..a46755cdc8 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -47,7 +47,8 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { } auto nvmap_dev = std::make_shared(system); devices["/dev/nvhost-as-gpu"] = std::make_shared(system, nvmap_dev); - devices["/dev/nvhost-gpu"] = std::make_shared(system, nvmap_dev); + devices["/dev/nvhost-gpu"] = + std::make_shared(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-ctrl-gpu"] = std::make_shared(system); devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared(system, nvmap_dev); From 7d2839d7a32fcefb66e1d5b1e0305da760c09eb0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 30 Oct 2020 23:16:35 -0700 Subject: [PATCH 09/10] core: Initialize GPU before services. --- src/core/core.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index fde2ccc096..2427960084 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -179,16 +179,18 @@ struct System::Impl { arp_manager.ResetAll(); telemetry_session = std::make_unique(); + + gpu_core = VideoCore::CreateGPU(emu_window, system); + if (!gpu_core) { + return ResultStatus::ErrorVideoCore; + } + service_manager = std::make_shared(kernel); Service::Init(service_manager, system); GDBStub::DeferStart(); interrupt_manager = std::make_unique(system); - gpu_core = VideoCore::CreateGPU(emu_window, system); - if (!gpu_core) { - return ResultStatus::ErrorVideoCore; - } // Initialize time manager, which must happen after kernel is created time_manager.Initialize(); From 848bdf8a40aa31f1525e7326225f0f9fc3126d4d Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 30 Oct 2020 23:17:05 -0700 Subject: [PATCH 10/10] fixup! hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements. --- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 13 ++++++++++--- src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 2b9a8722cc..1520195481 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -16,7 +16,10 @@ namespace Service::Nvidia::Devices { nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr nvmap_dev, SyncpointManager& syncpoint_manager) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {} + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} { + channel_fence.id = syncpoint_manager.AllocateSyncpoint(); + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); +} nvhost_gpu::~nvhost_gpu() = default; @@ -129,8 +132,9 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector& input, std::vector& ou params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, params.unk3); - params.fence_out.id = syncpoint_manager.AllocateSyncpoint(); - params.fence_out.value = syncpoint_manager.RefreshSyncpoint(params.fence_out.id); + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); + + params.fence_out = channel_fence; std::memcpy(output.data(), ¶ms, output.size()); return 0; @@ -194,6 +198,9 @@ u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector& out params.num_entries, params.flags.raw); auto& gpu = system.GPU(); + + params.fence_out.id = channel_fence.id; + if (params.flags.add_wait.Value() && !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 80054510f0..a252fc06d1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -212,6 +212,7 @@ private: std::shared_ptr nvmap_dev; SyncpointManager& syncpoint_manager; + Fence channel_fence; }; } // namespace Service::Nvidia::Devices