From aa6587d854e4953876b02ca71278a665bcae8179 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 4 Aug 2023 13:38:49 +0200 Subject: [PATCH] QueryCache: Implement dependant queries. --- src/video_core/query_cache/query_base.h | 1 + src/video_core/query_cache/query_cache.h | 18 +- src/video_core/query_cache/query_stream.h | 6 +- .../renderer_vulkan/vk_query_cache.cpp | 160 +++++++++++++++++- 4 files changed, 180 insertions(+), 5 deletions(-) diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 485ed669c7..0ae23af9f2 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 { IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. IsFence = 1 << 8, ///< Indicates the query is a fence. + IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f6af48d14a..f1393d5c77 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -489,8 +489,22 @@ void QueryCacheBase::PopAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); + u64 ran_mask = 0; + u64 next_phase = 0; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependenceMask(); + if ((dep_mask & ~ran_mask) != 0) { + next_phase |= dep_mask; + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PopUnsyncedQueries(); + }); + ran_mask |= next_phase; + } } // Invalidation diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index dd5f95b3c6..0e92755657 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -70,6 +70,10 @@ public: return id; } + u64 GetDependenceMask() const { + return dependance_mask; + } + protected: const size_t id; const u64 dependance_mask; @@ -78,7 +82,7 @@ protected: template class SimpleStreamer : public StreamerInterface { public: - SimpleStreamer(size_t id_) : StreamerInterface{id_} {} + SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} virtual ~SimpleStreamer() = default; protected: diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 42f5710072..ef891e26b5 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -525,6 +525,9 @@ private: vk::Buffer buffer; }; +template +class PrimitivesSucceededStreamer; + template class TFBCounterStreamer : public BaseStreamer { public: @@ -537,6 +540,7 @@ public: current_bank = nullptr; counter_buffers.fill(VK_NULL_HANDLE); offsets.fill(0); + last_queries.fill(0); const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -630,7 +634,7 @@ public: return index; } const size_t subreport = static_cast(*subreport_); - UpdateBuffers(); + last_queries[subreport] = address; if ((streams_mask & (1ULL << subreport)) == 0) { new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; return index; @@ -646,6 +650,13 @@ public: return index; } + std::optional GetLastQueryStream(size_t stream) { + if (last_queries[stream] != 0) { + return {last_queries[stream]}; + } + return std::nullopt; + } + bool HasUnsyncedQueries() override { return !pending_flush_queries.empty(); } @@ -657,6 +668,7 @@ public: size_t offset_base = staging_ref.offset; for (auto q : pending_flush_queries) { auto* query = GetQuery(q); + query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; auto& bank = bank_pool.GetBank(query->start_bank_id); bank.Sync(staging_ref, offset_base, query->start_slot, 1); offset_base += TFBQueryBank::QUERY_SIZE; @@ -741,13 +753,15 @@ private: cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } else { - scheduler.Record([this, total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, + total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); }); } } void UpdateBuffers() { + last_queries.fill(0); runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { buffers_count = 0; for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; @@ -804,6 +818,9 @@ private: return {current_bank_id, slot}; } + template + friend class PrimitivesSucceededStreamer; + static constexpr size_t NUM_STREAMS = 4; static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; @@ -833,9 +850,143 @@ private: size_t buffers_count{}; std::array counter_buffers{}; std::array offsets{}; + std::array last_queries; u64 streams_mask; }; +class PrimitivesQueryBase : public VideoCommon::QueryBase { +public: + // Default constructor + PrimitivesQueryBase() + : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, + dependant_index{}, dependant_manage{} {} + + // Parameterized constructor + PrimitivesQueryBase(bool is_long, VAddr address) + : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, + dependant_index{}, dependant_manage{} { + if (is_long) { + flags |= VideoCommon::QueryFlagBits::HasTimestamp; + } + } + + u64 stride; + VAddr dependant_address; + size_t dependant_index; + bool dependant_manage; +}; + +template +class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer { +public: + PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, + TFBCounterStreamer& tfb_streamer_, Core::Memory::Memory& cpu_memory_) + : VideoCommon::SimpleStreamer( + id, 1ULL << static_cast(VideoCommon::QueryType::StreamingByteCount)), + runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} + + size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, + std::optional subreport_) override { + auto index = BuildQuery(); + auto* new_query = GetQuery(index); + new_query->guest_address = address; + new_query->value = 0; + if (has_timestamp) { + new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; + } + if (!subreport_) { + new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + return index; + } + const size_t subreport = static_cast(*subreport_); + auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); + bool must_manage_dependance = false; + if (dependant_address_opt) { + new_query->dependant_address = *dependant_address_opt; + } else { + new_query->dependant_index = + tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); + auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index); + dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated; + must_manage_dependance = true; + if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { + new_query->value = 0; + new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + if (must_manage_dependance) { + tfb_streamer.Free(new_query->dependant_index); + } + return index; + } + } + + new_query->dependant_manage = must_manage_dependance; + runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; + i++) { + const auto& tf = regs.transform_feedback; + if (tf.controls[i].stream != subreport) { + continue; + } + new_query->stride = tf.controls[i].stride; + break; + } + }); + pending_flush_queries.push_back(index); + return index; + } + + bool HasUnsyncedQueries() override { + return !pending_flush_queries.empty(); + } + + void PushUnsyncedQueries() override { + std::scoped_lock lk(flush_guard); + pending_flush_sets.emplace_back(std::move(pending_flush_queries)); + pending_flush_queries.clear(); + } + + void PopUnsyncedQueries() override { + std::vector flushed_queries; + { + std::scoped_lock lk(flush_guard); + flushed_queries = std::move(pending_flush_sets.front()); + pending_flush_sets.pop_front(); + } + + for (auto q : flushed_queries) { + auto* query = GetQuery(q); + if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { + continue; + } + + query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + if (query->dependant_manage) { + auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); + query->value = dependant_query->value / query->stride; + tfb_streamer.Free(query->dependant_index); + } else { + u8* pointer = cpu_memory.GetPointer(query->dependant_address); + u32 result; + std::memcpy(&result, pointer, sizeof(u32)); + query->value = static_cast(result) / query->stride; + } + } + } + +private: + QueryCacheRuntime& runtime; + TFBCounterStreamer& tfb_streamer; + Core::Memory::Memory& cpu_memory; + + // syncing queue + std::vector pending_sync; + + // flush levels + std::vector pending_flush_queries; + std::deque> pending_flush_sets; + std::mutex flush_guard; +}; + } // namespace struct QueryCacheRuntimeImpl { @@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl { scheduler, memory_allocator), tfb_streamer(static_cast(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), + primitives_succeeded_streamer( + static_cast(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), hcr_setup{}, hcr_is_set{}, is_hcr_running{} { hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; @@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl { VideoCommon::GuestStreamer guest_streamer; SamplesStreamer sample_streamer; TFBCounterStreamer tfb_streamer; + PrimitivesSucceededStreamer primitives_succeeded_streamer; std::vector> little_cache; std::vector> buffers_to_upload_to; @@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp return &impl->sample_streamer; case QueryType::StreamingByteCount: return &impl->tfb_streamer; + case QueryType::StreamingPrimitivesSucceeded: + return &impl->primitives_succeeded_streamer; default: return nullptr; }