Shader/Pipeline Cache: Use VAddr instead of physical memory for addressing.

This commit is contained in:
Fernando Sahmkow 2020-04-05 19:18:00 -04:00
parent 3dd5c07454
commit ea535d9470
7 changed files with 62 additions and 87 deletions

View File

@ -18,22 +18,14 @@
class RasterizerCacheObject {
public:
explicit RasterizerCacheObject(const u8* host_ptr)
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
virtual ~RasterizerCacheObject();
CacheAddr GetCacheAddr() const {
return cache_addr;
VAddr GetCpuAddr() const {
return cpu_addr;
}
const u8* GetHostPtr() const {
return host_ptr;
}
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
@ -68,8 +60,7 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
};
template <class T>
@ -80,7 +71,7 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) {
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -90,7 +81,7 @@ public:
}
/// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) {
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -114,27 +105,20 @@ public:
protected:
/// Tries to get an object from the cache with the specified cache address
T TryGet(CacheAddr addr) const {
T TryGet(VAddr addr) const {
const auto iter = map_cache.find(addr);
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
T TryGet(const void* addr) const {
const auto iter = map_cache.find(ToCacheAddr(addr));
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
/// Register an object into the cache
virtual void Register(const T& object) {
std::lock_guard lock{mutex};
object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetCacheAddr(), object});
map_cache.insert({object->GetCpuAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
}
@ -144,7 +128,7 @@ protected:
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr();
const VAddr addr = object->GetCpuAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr);
}
@ -173,7 +157,7 @@ protected:
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
if (size == 0) {
return {};
}
@ -197,13 +181,13 @@ private:
}
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<CacheAddr, T>;
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetCacheAddr(),
object->GetCacheAddr() + object->GetSizeInBytes());
return ObjectInterval::right_open(object->GetCpuAddr(),
object->GetCpuAddr() + object->GetSizeInBytes());
}
ObjectCache map_cache;

View File

@ -671,9 +671,8 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (!addr || !size) {
return;
}
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(cache_addr, size);
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}

View File

@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
size_in_bytes{size_in_bytes}, program{std::move(program)} {}
CachedShader::~CachedShader() = default;
@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
return std::shared_ptr<CachedShader>(new CachedShader(
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@ -279,16 +278,15 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
return std::shared_ptr<CachedShader>(new CachedShader(
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
return std::shared_ptr<CachedShader>(
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program));
}
@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
const auto host_ptr{memory_manager.GetPointer(address)};
Shader shader{TryGet(host_ptr)};
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
const auto host_ptr{memory_manager.GetPointer(address)};
// No shader found - create a new one
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
ProgramCode code_b;
@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
*cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto host_ptr{memory_manager.GetPointer(code_addr)};
auto kernel = TryGet(host_ptr);
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (kernel) {
return kernel;
}
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
*cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {

View File

@ -65,11 +65,6 @@ public:
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
return size_in_bytes;
@ -90,13 +85,12 @@ public:
std::size_t size_in_bytes);
private:
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program;
};

View File

@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, registry},
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
u32 main_offset)
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)};
auto shader = TryGet(host_ptr);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one
constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset);
std::move(code), stage_offset);
Register(shader);
}
shaders[index] = std::move(shader);
@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code),
program_addr, *cpu_addr, std::move(code),
kernel_main_offset);
Register(shader);
}
@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
const auto shader = TryGet(host_ptr);
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
const auto shader = TryGet(*cpu_addr);
ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5

View File

@ -113,17 +113,13 @@ namespace Vulkan {
class CachedShader final : public RasterizerCacheObject {
public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
~CachedShader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64);
}
@ -149,7 +145,6 @@ private:
Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;

View File

@ -509,9 +509,8 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (!addr || !size) {
return;
}
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(cache_addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}