vk_scheduler: Delegate commands to a worker thread and state track

Introduce a worker thread approach for delegating Vulkan work derived from dxvk's approach. https://github.com/doitsujin/dxvk Now that the scheduler is what handles all Vulkan work related to command streaming, store state tracking in itself. This way we can know when to reupload Vulkan dynamic state to the queue (since this one is invalidated between command buffers unlike NVN). We can also store the renderpass state and graphics pipeline bound to avoid redundant binds and renderpass begins/ends.
2024-07-16 15:31:07 +02:00 · 2019-12-13 02:24:48 -03:00 · 2019-12-13 02:24:48 -03:00 · 2df9a2dcaf
commit 2df9a2dcaf
parent 8fc49a83b6
2 changed files with 315 additions and 41 deletions
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@ -11,46 +11,172 @@

 namespace Vulkan {

-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
-    : device{device}, resource_manager{resource_manager} {
-    next_fence = &resource_manager.CommitFence();
-    AllocateNewContext();
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+                                           const vk::DispatchLoaderDynamic& dld) {
+    auto command = first;
+    while (command != nullptr) {
+        auto next = command->GetNext();
+        command->Execute(cmdbuf, dld);
+        command->~Command();
+        command = next;
+    }
+
+    command_offset = 0;
+    first = nullptr;
+    last = nullptr;
 }

-VKScheduler::~VKScheduler() = default;
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager}, next_fence{
+                                                              &resource_manager.CommitFence()} {
+    AcquireNewChunk();
+    AllocateNewContext();
+    worker_thread = std::thread(&VKScheduler::WorkerThread, this);
+}
+
+VKScheduler::~VKScheduler() {
+    quit = true;
+    cv.notify_all();
+    worker_thread.join();
+}

 void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

 void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
    current_fence->Wait();
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

+void VKScheduler::WaitWorker() {
+    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+    DispatchWork();
+
+    bool finished = false;
+    do {
+        cv.notify_all();
+        std::unique_lock lock{mutex};
+        finished = chunk_queue.Empty();
+    } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+    if (chunk->Empty()) {
+        return;
+    }
+    chunk_queue.Push(std::move(chunk));
+    cv.notify_all();
+    AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+    if (state.renderpass && renderpass_bi == *state.renderpass) {
+        return;
+    }
+    const bool end_renderpass = state.renderpass.has_value();
+    state.renderpass = renderpass_bi;
+    Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+        if (end_renderpass) {
+            cmdbuf.endRenderPass(dld);
+        }
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+    });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+    EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+    if (state.graphics_pipeline == pipeline) {
+        return;
+    }
+    state.graphics_pipeline = pipeline;
+    Record([pipeline](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+    });
+}
+
+void VKScheduler::WorkerThread() {
+    std::unique_lock lock{mutex};
+    do {
+        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+        if (quit) {
+            continue;
+        }
+        auto extracted_chunk = std::move(chunk_queue.Front());
+        chunk_queue.Pop();
+        extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+        chunk_reserve.Push(std::move(extracted_chunk));
+    } while (!quit);
+}
+
 void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    EndPendingOperations();
+    InvalidateState();
+    WaitWorker();
+
+    std::unique_lock lock{mutex};
+
+    const auto queue = device.GetGraphicsQueue();
    const auto& dld = device.GetDispatchLoader();
    current_cmdbuf.end(dld);

-    const auto queue = device.GetGraphicsQueue();
-    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
                                     &semaphore);
-    queue.submit({submit_info}, *current_fence, dld);
+    queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
 }

 void VKScheduler::AllocateNewContext() {
+    std::unique_lock lock{mutex};
    current_fence = next_fence;
-    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
    next_fence = &resource_manager.CommitFence();

-    const auto& dld = device.GetDispatchLoader();
-    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+                         device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+    state.graphics_pipeline = nullptr;
+    state.viewports = false;
+    state.scissors = false;
+    state.depth_bias = false;
+    state.blend_constants = false;
+    state.depth_bounds = false;
+    state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+    EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+    if (!state.renderpass) {
+        return;
+    }
+    state.renderpass = std::nullopt;
+    Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+    if (chunk_reserve.Empty()) {
+        chunk = std::make_unique<CommandChunk>();
+        return;
+    }
+    chunk = std::move(chunk_reserve.Front());
+    chunk_reserve.Pop();
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@ -4,7 +4,14 @@

 #pragma once

+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
 #include "common/common_types.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/declarations.h"

 namespace Vulkan {
@ -30,23 +37,6 @@ private:
    VKFence* const& fence;
 };

-class VKCommandBufferView {
-public:
-    VKCommandBufferView() = default;
-    VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
-
-    const vk::CommandBuffer* operator->() const noexcept {
-        return &cmdbuf;
-    }
-
-    operator vk::CommandBuffer() const noexcept {
-        return cmdbuf;
-    }
-
-private:
-    const vk::CommandBuffer& cmdbuf;
-};
-
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
@ -54,32 +44,190 @@ public:
    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
    ~VKScheduler();

-    /// Gets a reference to the current fence.
-    VKFenceView GetFence() const {
-        return current_fence;
-    }
-
-    /// Gets a reference to the current command buffer.
-    VKCommandBufferView GetCommandBuffer() const {
-        return current_cmdbuf;
-    }
-
    /// Sends the current execution context to the GPU.
    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);

+    /// Waits for the worker thread to finish executing everything. After this function returns it's
+    /// safe to touch worker resources.
+    void WaitWorker();
+
+    /// Sends currently recorded work to the worker thread.
+    void DispatchWork();
+
+    /// Requests to begin a renderpass.
+    void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+    /// Requests the current executino context to be able to execute operations only allowed outside
+    /// of a renderpass.
+    void RequestOutsideRenderPassOperationContext();
+
+    /// Binds a pipeline to the current execution context.
+    void BindGraphicsPipeline(vk::Pipeline pipeline);
+
+    /// Returns true when viewports have been set in the current command buffer.
+    bool TouchViewports() {
+        return std::exchange(state.viewports, true);
+    }
+
+    /// Returns true when scissors have been set in the current command buffer.
+    bool TouchScissors() {
+        return std::exchange(state.scissors, true);
+    }
+
+    /// Returns true when depth bias have been set in the current command buffer.
+    bool TouchDepthBias() {
+        return std::exchange(state.depth_bias, true);
+    }
+
+    /// Returns true when blend constants have been set in the current command buffer.
+    bool TouchBlendConstants() {
+        return std::exchange(state.blend_constants, true);
+    }
+
+    /// Returns true when depth bounds have been set in the current command buffer.
+    bool TouchDepthBounds() {
+        return std::exchange(state.depth_bounds, true);
+    }
+
+    /// Returns true when stencil values have been set in the current command buffer.
+    bool TouchStencilValues() {
+        return std::exchange(state.stencil_values, true);
+    }
+
+    /// Send work to a separate thread.
+    template <typename T>
+    void Record(T&& command) {
+        if (chunk->Record(command)) {
+            return;
+        }
+        DispatchWork();
+        (void)chunk->Record(command);
+    }
+
+    /// Gets a reference to the current fence.
+    VKFenceView GetFence() const {
+        return current_fence;
+    }
+
 private:
+    class Command {
+    public:
+        virtual ~Command() = default;
+
+        virtual void Execute(vk::CommandBuffer cmdbuf,
+                             const vk::DispatchLoaderDynamic& dld) const = 0;
+
+        Command* GetNext() const {
+            return next;
+        }
+
+        void SetNext(Command* next_) {
+            next = next_;
+        }
+
+    private:
+        Command* next = nullptr;
+    };
+
+    template <typename T>
+    class TypedCommand final : public Command {
+    public:
+        explicit TypedCommand(T&& command) : command{std::move(command)} {}
+        ~TypedCommand() override = default;
+
+        TypedCommand(TypedCommand&&) = delete;
+        TypedCommand& operator=(TypedCommand&&) = delete;
+
+        void Execute(vk::CommandBuffer cmdbuf,
+                     const vk::DispatchLoaderDynamic& dld) const override {
+            command(cmdbuf, dld);
+        }
+
+    private:
+        T command;
+    };
+
+    class CommandChunk final {
+    public:
+        void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+        template <typename T>
+        bool Record(T& command) {
+            using FuncType = TypedCommand<T>;
+            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+            if (command_offset > sizeof(data) - sizeof(FuncType)) {
+                return false;
+            }
+
+            Command* current_last = last;
+
+            last = new (data.data() + command_offset) FuncType(std::move(command));
+
+            if (current_last) {
+                current_last->SetNext(last);
+            } else {
+                first = last;
+            }
+
+            command_offset += sizeof(FuncType);
+            return true;
+        }
+
+        bool Empty() const {
+            return command_offset == 0;
+        }
+
+    private:
+        Command* first = nullptr;
+        Command* last = nullptr;
+
+        std::size_t command_offset = 0;
+        std::array<u8, 0x8000> data{};
+    };
+
+    void WorkerThread();
+
    void SubmitExecution(vk::Semaphore semaphore);

    void AllocateNewContext();

+    void InvalidateState();
+
+    void EndPendingOperations();
+
+    void EndRenderPass();
+
+    void AcquireNewChunk();
+
    const VKDevice& device;
    VKResourceManager& resource_manager;
    vk::CommandBuffer current_cmdbuf;
    VKFence* current_fence = nullptr;
    VKFence* next_fence = nullptr;
+
+    struct State {
+        std::optional<vk::RenderPassBeginInfo> renderpass;
+        vk::Pipeline graphics_pipeline;
+        bool viewports = false;
+        bool scissors = false;
+        bool depth_bias = false;
+        bool blend_constants = false;
+        bool depth_bounds = false;
+        bool stencil_values = false;
+    } state;
+
+    std::unique_ptr<CommandChunk> chunk;
+    std::thread worker_thread;
+
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool quit = false;
 };

 } // namespace Vulkan