diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 0d7d4ad03c..8314d1ec28 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -20,8 +20,12 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& switch (command.group) { case 0x0: switch (command.cmd) { - case 0x1: - return Submit(input, output); + case 0x1: { + if (!fd_to_id.contains(fd)) { + fd_to_id[fd] = next_id++; + } + return Submit(fd, input, output); + } case 0x2: return GetSyncpoint(input, output); case 0x3: @@ -66,7 +70,10 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) {} void nvhost_nvdec::OnClose(DeviceFD fd) { LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); - system.GPU().ClearCdmaInstance(); + const auto iter = fd_to_id.find(fd); + if (iter != fd_to_id.end()) { + system.GPU().ClearCdmaInstance(iter->second); + } } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 523d96e3a6..a507c4d0a1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -24,6 +24,9 @@ public: void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; + +private: + u32 next_id{}; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index e61261f983..8a05f06688 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -59,7 +59,8 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { return NvResult::Success; } -NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& output) { +NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector& input, + std::vector& output) { IoctlSubmit params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); @@ -93,7 +94,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectoraddr + cmd_buffer.offset, cmdlist.data(), cmdlist.size() * sizeof(u32)); - gpu.PushCommandBuffer(cmdlist); + gpu.PushCommandBuffer(fd_to_id[fd], cmdlist); } std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); // Some games expect command_buffers to be written back diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 351625c17c..e28c54df61 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -104,13 +104,14 @@ protected: /// Ioctl command implementations NvResult SetNVMAPfd(const std::vector& input); - NvResult Submit(const std::vector& input, std::vector& output); + NvResult Submit(DeviceFD fd, const std::vector& input, std::vector& output); NvResult GetSyncpoint(const std::vector& input, std::vector& output); NvResult GetWaitbase(const std::vector& input, std::vector& output); NvResult MapBuffer(const std::vector& input, std::vector& output); NvResult UnmapBuffer(const std::vector& input, std::vector& output); NvResult SetSubmitTimeout(const std::vector& input, std::vector& output); + std::unordered_map fd_to_id{}; s32_le nvmap_fd{}; u32_le submit_timeout{}; std::shared_ptr nvmap_dev; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index eac4dd5309..76b39806f4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -21,7 +21,10 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& i case 0x0: switch (command.cmd) { case 0x1: - return Submit(input, output); + if (!fd_to_id.contains(fd)) { + fd_to_id[fd] = next_id++; + } + return Submit(fd, input, output); case 0x2: return GetSyncpoint(input, output); case 0x3: @@ -65,7 +68,10 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector& i void nvhost_vic::OnOpen(DeviceFD fd) {} void nvhost_vic::OnClose(DeviceFD fd) { - system.GPU().ClearCdmaInstance(); + const auto iter = fd_to_id.find(fd); + if (iter != fd_to_id.end()) { + system.GPU().ClearCdmaInstance(iter->second); + } } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index 6d7fda9d1d..c9732c0375 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -23,5 +23,8 @@ public: void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; + +private: + u32 next_id{}; }; } // namespace Service::Nvidia::Devices diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ab7c21a49b..8788f5148f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -185,16 +185,6 @@ struct GPU::Impl { return *dma_pusher; } - /// Returns a reference to the GPU CDMA pusher. - [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { - return *cdma_pusher; - } - - /// Returns a const reference to the GPU CDMA pusher. - [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { - return *cdma_pusher; - } - /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer() { return *renderer; @@ -338,25 +328,27 @@ struct GPU::Impl { } /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { if (!use_nvdec) { return; } - if (!cdma_pusher) { - cdma_pusher = std::make_unique(gpu); + if (!cdma_pushers.contains(id)) { + cdma_pushers.insert_or_assign(id, std::make_unique(gpu)); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working // TODO(ameerj): RE proper async nvdec operation // gpu_thread.SubmitCommandBuffer(std::move(entries)); - - cdma_pusher->ProcessEntries(std::move(entries)); + cdma_pushers[id]->ProcessEntries(std::move(entries)); } /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance() { - cdma_pusher.reset(); + void ClearCdmaInstance(u32 id) { + const auto iter = cdma_pushers.find(id); + if (iter != cdma_pushers.end()) { + cdma_pushers.erase(iter); + } } /// Swap buffers (render frame) @@ -659,7 +651,7 @@ struct GPU::Impl { Core::System& system; std::unique_ptr memory_manager; std::unique_ptr dma_pusher; - std::unique_ptr cdma_pusher; + std::map> cdma_pushers; std::unique_ptr renderer; VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; @@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const { return impl->DmaPusher(); } -Tegra::CDmaPusher& GPU::CDmaPusher() { - return impl->CDmaPusher(); -} - -const Tegra::CDmaPusher& GPU::CDmaPusher() const { - return impl->CDmaPusher(); -} - VideoCore::RendererBase& GPU::Renderer() { return impl->Renderer(); } @@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) { impl->PushGPUEntries(std::move(entries)); } -void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - impl->PushCommandBuffer(entries); +void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { + impl->PushCommandBuffer(id, entries); } -void GPU::ClearCdmaInstance() { - impl->ClearCdmaInstance(); +void GPU::ClearCdmaInstance(u32 id) { + impl->ClearCdmaInstance(id); } void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index c89a5d693c..500411176f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -242,10 +242,10 @@ public: void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance(); + void ClearCdmaInstance(u32 id); /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);