vulkan: Rework descriptor allocation algorithm

Create multiple descriptor pools on demand. There are some degrees of
freedom what is considered a compatible pool to avoid wasting large
pools on small descriptors.
This commit is contained in:
ReinUsesLisp 2021-04-25 00:15:32 -03:00 committed by ameerj
parent 5ed871398b
commit 2f3c3dfc10
15 changed files with 314 additions and 197 deletions

View File

@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
.bindingCount = 1,
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
};
template <u32 num_textures>
inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 0,
.texture_buffers = 0,
.image_buffers = 0,
.textures = num_textures,
.images = 0,
.score = 2,
};
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@ -326,14 +336,16 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
one_texture_descriptor_allocator{
descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
two_textures_descriptor_allocator{
descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@ -415,7 +427,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}

View File

@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,

View File

@ -116,7 +116,7 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool)
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),

View File

@ -16,7 +16,7 @@
namespace Vulkan {
class Device;
class VKDescriptorPool;
class DescriptorPool;
class VKScheduler;
class BufferCacheRuntime;
@ -61,7 +61,7 @@ public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool);
DescriptorPool& descriptor_pool);
void Finish();

View File

@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
constexpr size_t ASTC_NUM_BINDINGS = 4;
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = static_cast<u32>(size),
};
}
template <size_t size>
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = static_cast<u32>(size),
};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
return {{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
}
std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
return {{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_ENC_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
}
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
return {
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
}
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
BuildASTCPassDescriptorUpdateTemplateEntry() {
return {{
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 2,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 0,
.score = 2,
};
constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_ENC_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
constexpr DescriptorBankInfo ASTC_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 3,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 1,
.score = 4,
};
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
.dstArrayElement = 0,
@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
.stride = sizeof(DescriptorUpdateEntry),
},
}};
}
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
@ -159,14 +170,13 @@ struct AstcPushConstants {
u32 block_height;
u32 block_height_mask;
};
} // Anonymous namespace
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants,
std::span<const u32> code) {
ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@ -196,8 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
.pipelineLayout = *layout,
.set = 0,
});
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@ -226,23 +235,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
});
}
VKComputePass::~VKComputePass() = default;
ComputePass::~ComputePass() = default;
VkDescriptorSet VKComputePass::CommitDescriptorSet(
VKUpdateDescriptorQueue& update_descriptor_queue) {
VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
const VkDescriptorSet set = descriptor_allocator->Commit();
const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(descriptor_template.address(), set);
return set;
}
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
: ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@ -277,12 +286,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(),
BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@ -337,14 +346,13 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
}
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
: VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(),
BuildASTCPassDescriptorUpdateTemplateEntry(),
BuildComputePushConstantRange(sizeof(AstcPushConstants)),
ASTC_DECODER_COMP_SPV),
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}

View File

@ -4,7 +4,6 @@
#pragma once
#include <optional>
#include <span>
#include <utility>
@ -27,13 +26,14 @@ class VKUpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
class VKComputePass {
class ComputePass {
public:
explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~VKComputePass();
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~ComputePass();
protected:
VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
@ -44,14 +44,14 @@ protected:
private:
vk::DescriptorSetLayout descriptor_set_layout;
std::optional<DescriptorAllocator> descriptor_allocator;
DescriptorAllocator descriptor_allocator;
vk::ShaderModule module;
};
class Uint8Pass final : public VKComputePass {
class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
@ -66,10 +66,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public VKComputePass {
class QuadIndexedPass final : public ComputePass {
public:
explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
@ -84,10 +84,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class ASTCDecoderPass final : public VKComputePass {
class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);

View File

@ -18,7 +18,7 @@
namespace Vulkan {
ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
vk::ShaderModule spv_module_)
@ -30,7 +30,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
descriptor_set_layout = builder.CreateDescriptorSetLayout();
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
auto func{[this, &device] {
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{

View File

@ -25,7 +25,7 @@ class VKScheduler;
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker, const Shader::Info& info,
vk::ShaderModule spv_module);

View File

@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <mutex>
#include <span>
#include <vector>
#include "common/common_types.h"
@ -13,77 +15,149 @@
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr s32 SCORE_THRESHOLD = 3;
constexpr u32 SETS_PER_POOL = 64;
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
VkDescriptorSetLayout layout_)
: ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
descriptor_pool{&descriptor_pool_}, layout{layout_} {}
struct DescriptorBank {
DescriptorBankInfo info;
std::vector<vk::DescriptorPool> pools;
};
VkDescriptorSet DescriptorAllocator::Commit() {
const std::size_t index = CommitResource();
return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
textures >= subset.textures && images >= subset.image_buffers;
}
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin));
template <typename Descriptors>
static u32 Accumulate(const Descriptors& descriptors) {
u32 count = 0;
for (const auto& descriptor : descriptors) {
count += descriptor.count;
}
return count;
}
VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
AllocateNewPool()} {}
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
DescriptorBankInfo bank;
for (const Shader::Info& info : infos) {
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
bank.image_buffers += Accumulate(info.image_buffer_descriptors);
bank.textures += Accumulate(info.texture_descriptors);
bank.images += Accumulate(info.image_descriptors);
}
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
bank.image_buffers + bank.textures + bank.images;
return bank;
}
VKDescriptorPool::~VKDescriptorPool() = default;
vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
static constexpr u32 num_sets = 0x20000;
static constexpr VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
static void AllocatePool(const Device& device, DescriptorBank& bank) {
std::array<VkDescriptorPoolSize, 6> pool_sizes;
size_t pool_cursor{};
const auto add = [&](VkDescriptorType type, u32 count) {
if (count > 0) {
pool_sizes[pool_cursor++] = {
.type = type,
.descriptorCount = count * SETS_PER_POOL,
};
}
};
const VkDescriptorPoolCreateInfo ci{
const auto& info{bank.info};
add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = num_sets,
.poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
.maxSets = SETS_PER_POOL,
.poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
};
return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
}));
}
vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
std::size_t count) {
const std::vector layout_copies(count, layout);
VkDescriptorSetAllocateInfo ai{
DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_)
: ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
layout{layout_} {}
VkDescriptorSet DescriptorAllocator::Commit() {
const size_t index = CommitResource();
return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
void DescriptorAllocator::Allocate(size_t begin, size_t end) {
sets.push_back(AllocateDescriptors(end - begin));
}
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = **active_pool,
.descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
.pSetLayouts = layout_copies.data(),
.pSetLayouts = layouts.data(),
};
vk::DescriptorSets sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
return sets;
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// Our current pool is out of memory. Allocate a new one and retry
active_pool = AllocateNewPool();
ai.descriptorPool = **active_pool;
sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
return sets;
AllocatePool(*device, *bank);
allocate_info.descriptorPool = *bank->pools.back();
new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
DescriptorPool::~DescriptorPool() = default;
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos) {
return Allocator(layout, MakeBankInfo(infos));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const Shader::Info& info) {
return Allocator(layout, MakeBankInfo(std::array{info}));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const DescriptorBankInfo& info) {
return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
}
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
std::shared_lock read_lock{banks_mutex};
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
});
if (it != bank_infos.end()) {
return *banks[std::distance(bank_infos.begin(), it)].get();
}
read_lock.unlock();
std::unique_lock write_lock{banks_mutex};
bank_infos.push_back(reqs);
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
bank.info = reqs;
AllocatePool(device, bank);
return bank;
}
} // namespace Vulkan

View File

@ -4,21 +4,38 @@
#pragma once
#include <shared_mutex>
#include <span>
#include <vector>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
struct DescriptorBank;
struct DescriptorBankInfo {
[[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
u32 storage_buffers{}; ///< Number of storage buffer descriptors
u32 texture_buffers{}; ///< Number of texture buffer descriptors
u32 image_buffers{}; ///< Number of image buffer descriptors
u32 textures{}; ///< Number of texture descriptors
u32 images{}; ///< Number of image descriptors
s32 score{}; ///< Number of descriptors in total
};
class DescriptorAllocator final : public ResourcePool {
friend class DescriptorPool;
public:
explicit DescriptorAllocator() = default;
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override = default;
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
@ -29,36 +46,43 @@ public:
VkDescriptorSet Commit();
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
VKDescriptorPool* descriptor_pool{};
explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_);
void Allocate(size_t begin, size_t end) override;
vk::DescriptorSets AllocateDescriptors(size_t count);
const Device* device{};
DescriptorBank* bank{};
VkDescriptorSetLayout layout{};
std::vector<vk::DescriptorSets> descriptors_allocations;
std::vector<vk::DescriptorSets> sets;
};
class VKDescriptorPool final {
friend DescriptorAllocator;
class DescriptorPool {
public:
explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
~VKDescriptorPool();
explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
~DescriptorPool();
VKDescriptorPool(const VKDescriptorPool&) = delete;
VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
DescriptorPool& operator=(const DescriptorPool&) = delete;
DescriptorPool(const DescriptorPool&) = delete;
DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
private:
vk::DescriptorPool* AllocateNewPool();
vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
const Device& device;
MasterSemaphore& master_semaphore;
std::vector<vk::DescriptorPool> pools;
vk::DescriptorPool* active_pool;
std::shared_mutex banks_mutex;
std::vector<DescriptorBankInfo> bank_infos;
std::vector<std::unique_ptr<DescriptorBank>> banks;
};
} // namespace Vulkan

View File

@ -205,7 +205,7 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
BufferCache& buffer_cache_, TextureCache& texture_cache_,
const Device& device, VKDescriptorPool& descriptor_pool,
const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache,
@ -220,7 +220,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
descriptor_set_layout = builder.CreateDescriptorSetLayout();
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
auto func{[this, &device, &render_pass_cache, builder] {
const VkDescriptorSetLayout set_layout{*descriptor_set_layout};

View File

@ -67,7 +67,7 @@ public:
explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache,
const Device& device, VKDescriptorPool& descriptor_pool,
const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache,

View File

@ -647,7 +647,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_)

View File

@ -75,10 +75,10 @@ namespace Vulkan {
class ComputePipeline;
class Device;
class DescriptorPool;
class GenericEnvironment;
class RasterizerVulkan;
class RenderPassCache;
class VKDescriptorPool;
class VKScheduler;
class VKUpdateDescriptorQueue;
@ -105,7 +105,7 @@ public:
Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const Device& device,
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKScheduler& scheduler, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache);
@ -147,7 +147,7 @@ private:
const Device& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
DescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
RenderPassCache& render_pass_cache;
BufferCache& buffer_cache;

View File

@ -147,7 +147,7 @@ private:
VKScheduler& scheduler;
StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
DescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
ASTCDecoderPass astc_decoder_pass;