Texture Cache: Use vAddr instead of physical memory for caching.

This commit is contained in:
Fernando Sahmkow 2020-04-05 15:26:16 -04:00
parent 9c0f40a1f5
commit 6ee316cb8f
5 changed files with 81 additions and 130 deletions

View File

@ -662,7 +662,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr)); CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.FlushRegion(cache_addr, size); texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(cache_addr, size); buffer_cache.FlushRegion(cache_addr, size);
query_cache.FlushRegion(cache_addr, size); query_cache.FlushRegion(cache_addr, size);
} }
@ -673,7 +673,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr)); CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(cache_addr, size); texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(cache_addr, size); shader_cache.InvalidateRegion(cache_addr, size);
buffer_cache.InvalidateRegion(cache_addr, size); buffer_cache.InvalidateRegion(cache_addr, size);
query_cache.InvalidateRegion(cache_addr, size); query_cache.InvalidateRegion(cache_addr, size);
@ -718,8 +718,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto surface{ const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
if (!surface) { if (!surface) {
return {}; return {};
} }

View File

@ -500,7 +500,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr)); CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.FlushRegion(cache_addr, size); texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(cache_addr, size); buffer_cache.FlushRegion(cache_addr, size);
query_cache.FlushRegion(cache_addr, size); query_cache.FlushRegion(cache_addr, size);
} }
@ -510,7 +510,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr)); CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(cache_addr, size); texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(cache_addr, size); pipeline_cache.InvalidateRegion(cache_addr, size);
buffer_cache.InvalidateRegion(cache_addr, size); buffer_cache.InvalidateRegion(cache_addr, size);
query_cache.InvalidateRegion(cache_addr, size); query_cache.InvalidateRegion(cache_addr, size);
@ -548,8 +548,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return false; return false;
} }
const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
if (!surface) { if (!surface) {
return false; return false;
} }

View File

@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
MICROPROFILE_SCOPE(GPU_Load_Texture); MICROPROFILE_SCOPE(GPU_Load_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0); auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr; u8* host_ptr;
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer // Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1); auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size); tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data(); host_ptr = tmp_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
if (params.is_tiled) { if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
auto& staging_buffer = staging_cache.GetBuffer(0); auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr; u8* host_ptr;
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer // Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1); auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size); tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data(); host_ptr = tmp_buffer.data();
}
if (params.is_tiled) { if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
} }
} }
} }
if (!is_continuous) {
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
} }
}
} // namespace VideoCommon } // namespace VideoCommon

View File

@ -68,8 +68,8 @@ public:
return gpu_addr; return gpu_addr;
} }
bool Overlaps(const CacheAddr start, const CacheAddr end) const { bool Overlaps(const VAddr start, const VAddr end) const {
return (cache_addr < end) && (cache_addr_end > start); return (cpu_addr < end) && (cpu_addr_end > start);
} }
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@ -86,21 +86,13 @@ public:
return cpu_addr; return cpu_addr;
} }
VAddr GetCpuAddrEnd() const {
return cpu_addr_end;
}
void SetCpuAddr(const VAddr new_addr) { void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr; cpu_addr = new_addr;
} cpu_addr_end = new_addr + guest_memory_size;
CacheAddr GetCacheAddr() const {
return cache_addr;
}
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + guest_memory_size;
} }
const SurfaceParams& GetSurfaceParams() const { const SurfaceParams& GetSurfaceParams() const {
@ -119,14 +111,6 @@ public:
return mipmap_sizes[level]; return mipmap_sizes[level];
} }
void MarkAsContinuous(const bool is_continuous) {
this->is_continuous = is_continuous;
}
bool IsContinuous() const {
return is_continuous;
}
bool IsLinear() const { bool IsLinear() const {
return !params.is_tiled; return !params.is_tiled;
} }
@ -175,10 +159,8 @@ protected:
std::size_t guest_memory_size; std::size_t guest_memory_size;
std::size_t host_memory_size; std::size_t host_memory_size;
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
VAddr cpu_addr{}; VAddr cpu_addr{};
bool is_continuous{}; VAddr cpu_addr_end{};
bool is_converted{}; bool is_converted{};
std::vector<std::size_t> mipmap_sizes; std::vector<std::size_t> mipmap_sizes;

View File

@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView> template <typename TSurface, typename TView>
class TextureCache { class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
public: public:
void InvalidateRegion(CacheAddr addr, std::size_t size) { void InvalidateRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) { for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@ -76,7 +74,7 @@ public:
guard_samplers = new_guard; guard_samplers = new_guard;
} }
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto surfaces = GetSurfacesInRegion(addr, size); auto surfaces = GetSurfacesInRegion(addr, size);
@ -99,9 +97,9 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
@ -110,7 +108,7 @@ public:
} }
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -124,13 +122,13 @@ public:
if (!gpu_addr) { if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -159,14 +157,14 @@ public:
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first; depth_buffer.target = surface_view.first;
@ -199,15 +197,15 @@ public:
return {}; return {};
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
SetEmptyColorBuffer(index); SetEmptyColorBuffer(index);
return {}; return {};
} }
auto surface_view = auto surface_view =
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true); preserve_contents, true);
if (render_targets[index].target) if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@ -257,27 +255,26 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; const std::optional<VAddr> dst_cpu_addr =
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; const std::optional<VAddr> src_cpu_addr =
const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface = std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface = std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config); ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick()); dst_surface.first->MarkAsModified(true, Tick());
} }
TSurface TryFindFramebufferSurface(const u8* host_ptr) { TSurface TryFindFramebufferSurface(VAddr addr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr); if (!addr) {
if (!cache_addr) {
return nullptr; return nullptr;
} }
const CacheAddr page = cache_addr >> registry_page_bits; const VAddr page = addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page]; std::vector<TSurface>& list = registry[page];
for (auto& surface : list) { for (auto& surface : list) {
if (surface->GetCacheAddr() == cache_addr) { if (surface->GetCpuAddr() == addr) {
return surface; return surface;
} }
} }
@ -338,18 +335,14 @@ protected:
void Register(TSurface surface) { void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_ptr || !cpu_addr) { if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr); gpu_addr);
return; return;
} }
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
surface->MarkAsContinuous(continuous);
surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr); surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface); RegisterInnerCache(surface);
surface->MarkAsRegistered(true); surface->MarkAsRegistered(true);
@ -634,7 +627,7 @@ private:
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const SurfaceParams& params,
const GPUVAddr gpu_addr, const GPUVAddr gpu_addr,
const CacheAddr cache_addr, const VAddr cpu_addr,
bool preserve_contents) { bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D) {
bool failed = false; bool failed = false;
@ -659,7 +652,7 @@ private:
failed = true; failed = true;
break; break;
} }
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
modified |= surface->IsModified(); modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@ -679,7 +672,7 @@ private:
} else { } else {
for (const auto& surface : overlaps) { for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) { if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::values.use_accurate_gpu_emulation) {
return std::nullopt; return std::nullopt;
} }
@ -688,7 +681,7 @@ private:
} }
return std::nullopt; return std::nullopt;
} }
if (surface->GetCacheAddr() != cache_addr) { if (surface->GetCpuAddr() != cpu_addr) {
continue; continue;
} }
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@ -722,13 +715,13 @@ private:
* left blank. * left blank.
* @param is_render Whether or not the surface is a render target. * @param is_render Whether or not the surface is a render target.
**/ **/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
const SurfaceParams& params, bool preserve_contents, const SurfaceParams& params, bool preserve_contents,
bool is_render) { bool is_render) {
// Step 1 // Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface // Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done. // matches at certain level we are pretty much done.
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second; TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
@ -755,7 +748,7 @@ private:
// Step 2 // Step 2
// Obtain all possible overlaps in the memory region // Obtain all possible overlaps in the memory region
const std::size_t candidate_size = params.GetGuestSizeInBytes(); const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
// If none are found, we are done. we just load the surface and create it. // If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) { if (overlaps.empty()) {
@ -777,7 +770,7 @@ private:
// Check if it's a 3D texture // Check if it's a 3D texture
if (params.block_depth > 0) { if (params.block_depth > 0) {
auto surface = auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
if (surface) { if (surface) {
return *surface; return *surface;
} }
@ -852,16 +845,16 @@ private:
* @param params The parameters on the candidate surface. * @param params The parameters on the candidate surface.
**/ **/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
Deduction result{}; Deduction result{};
result.type = DeductionType::DeductionFailed; result.type = DeductionType::DeductionFailed;
return result; return result;
} }
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second; TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
@ -880,7 +873,7 @@ private:
} }
const std::size_t candidate_size = params.GetGuestSizeInBytes(); const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
if (overlaps.empty()) { if (overlaps.empty()) {
Deduction result{}; Deduction result{};
@ -1024,10 +1017,10 @@ private:
} }
void RegisterInnerCache(TSurface& surface) { void RegisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr(); const VAddr cpu_addr = surface->GetCpuAddr();
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache[cache_addr] = surface; l1_cache[cpu_addr] = surface;
while (start <= end) { while (start <= end) {
registry[start].push_back(surface); registry[start].push_back(surface);
start++; start++;
@ -1035,10 +1028,10 @@ private:
} }
void UnregisterInnerCache(TSurface& surface) { void UnregisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr(); const VAddr cpu_addr = surface->GetCpuAddr();
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cache_addr); l1_cache.erase(cpu_addr);
while (start <= end) { while (start <= end) {
auto& reg{registry[start]}; auto& reg{registry[start]};
reg.erase(std::find(reg.begin(), reg.end(), surface)); reg.erase(std::find(reg.begin(), reg.end(), surface));
@ -1046,18 +1039,18 @@ private:
} }
} }
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
const CacheAddr cache_addr_end = cache_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces; std::vector<TSurface> surfaces;
while (start <= end) { while (start <= end) {
std::vector<TSurface>& list = registry[start]; std::vector<TSurface>& list = registry[start];
for (auto& surface : list) { for (auto& surface : list) {
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
surface->MarkAsPicked(true); surface->MarkAsPicked(true);
surfaces.push_back(surface); surfaces.push_back(surface);
} }
@ -1146,14 +1139,14 @@ private:
// large in size. // large in size.
static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_bits{20};
static constexpr u64 registry_page_size{1 << registry_page_bits}; static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry; std::unordered_map<VAddr, std::vector<TSurface>> registry;
static constexpr u32 DEPTH_RT = 8; static constexpr u32 DEPTH_RT = 8;
static constexpr u32 NO_RT = 0xFFFFFFFF; static constexpr u32 NO_RT = 0xFFFFFFFF;
// The L1 Cache is used for fast texture lookup before checking the overlaps // The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs. // This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache; std::unordered_map<VAddr, TSurface> l1_cache;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and /// previously been used. This is to prevent surfaces from being constantly created and