From 8da16cf9fb6c2133faed1164a8573992e84a4297 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 26 Apr 2020 19:53:02 -0300 Subject: [PATCH] texture_cache: Reintroduce preserve_contents accurately This reverts commit 94b0e2e5dae4e0bd0021ac2d8fe1ff904a93ee69. preserve_contents proved to be a meaningful optimization. This commit reintroduces it but properly implemented on OpenGL. We have to make sure the clear removes all the previous contents of the image. It's not currently implemented on Vulkan because we can do smart things there that's preferred to be introduced in a separate commit. --- .../renderer_opengl/gl_rasterizer.cpp | 49 ++++++++++---- .../renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 +- src/video_core/texture_cache/texture_cache.h | 66 ++++++++++++------- 4 files changed, 81 insertions(+), 41 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6fe155bccd..69a74449cb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,7 +348,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { texture_cache.GuardRenderTargets(true); - View depth_surface = texture_cache.GetDepthBufferSurface(); + View depth_surface = texture_cache.GetDepthBufferSurface(true); const auto& regs = gpu.regs; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -357,7 +357,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { FramebufferCacheKey key; const auto colors_count = static_cast(regs.rt_control.count); for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index)}; + View color_surface{texture_cache.GetColorBufferSurface(index, true)}; if (!color_surface) { continue; } @@ -381,28 +381,52 @@ void RasterizerOpenGL::ConfigureFramebuffers() { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); } -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, - bool using_stencil_fb) { +void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; texture_cache.GuardRenderTargets(true); View color_surface; - if (using_color_fb) { + + if (using_color) { + // Determine if we have to preserve the contents. + // First we have to make sure all clear masks are enabled. + bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || + !regs.clear_buffers.B || !regs.clear_buffers.A; const std::size_t index = regs.clear_buffers.RT; - color_surface = texture_cache.GetColorBufferSurface(index); + if (regs.clear_flags.scissor) { + // Then we have to confirm scissor testing clears the whole image. + const auto& scissor = regs.scissor_test[0]; + preserve_contents |= scissor.min_x > 0; + preserve_contents |= scissor.min_y > 0; + preserve_contents |= scissor.max_x < regs.rt[index].width; + preserve_contents |= scissor.max_y < regs.rt[index].height; + } + + color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); texture_cache.MarkColorBufferInUse(index); } + View depth_surface; - if (using_depth_fb || using_stencil_fb) { - depth_surface = texture_cache.GetDepthBufferSurface(); + if (using_depth_stencil) { + bool preserve_contents = false; + if (regs.clear_flags.scissor) { + // For depth stencil clears we only have to confirm scissor test covers the whole image. + const auto& scissor = regs.scissor_test[0]; + preserve_contents |= scissor.min_x > 0; + preserve_contents |= scissor.min_y > 0; + preserve_contents |= scissor.max_x < regs.zeta_width; + preserve_contents |= scissor.max_y < regs.zeta_height; + } + + depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); texture_cache.MarkDepthBufferInUse(); } texture_cache.GuardRenderTargets(false); FramebufferCacheKey key; - key.colors[0] = color_surface; - key.zeta = depth_surface; + key.colors[0] = std::move(color_surface); + key.zeta = std::move(depth_surface); state_tracker.NotifyFramebuffer(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); @@ -422,8 +446,7 @@ void RasterizerOpenGL::Clear() { if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { use_color = true; - } - if (use_color) { + state_tracker.NotifyColorMask0(); glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); @@ -461,7 +484,7 @@ void RasterizerOpenGL::Clear() { UNIMPLEMENTED_IF(regs.clear_flags.viewport); - ConfigureClearFramebuffer(use_color, use_depth, use_stencil); + ConfigureClearFramebuffer(use_color, use_depth || use_stencil); if (use_color) { glClearBufferfv(GL_COLOR, 0, regs.clear_color); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ebd2173eba..87249fb6f3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -95,7 +95,8 @@ private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); - void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb); + /// Configures the color and depth framebuffer for clearing. + void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 68464e637e..51c3b0f770 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -652,7 +652,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { Texceptions texceptions; for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { if (update_rendertargets) { - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt); + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { texceptions[rt] = true; @@ -660,7 +660,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { } if (update_rendertargets) { - zeta_attachment = texture_cache.GetDepthBufferSurface(); + zeta_attachment = texture_cache.GetDepthBufferSurface(true); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { texceptions[ZETA_TEXCEPTION_INDEX] = true; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cf6bd005aa..d2d2846e6a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -143,7 +143,7 @@ public: } const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -163,7 +163,7 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -178,7 +178,7 @@ public: return any_rt; } - TView GetDepthBufferSurface() { + TView GetDepthBufferSurface(bool preserve_contents) { std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { @@ -199,7 +199,7 @@ public: return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; - auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true); + auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; @@ -209,7 +209,7 @@ public: return surface_view.second; } - TView GetColorBufferSurface(std::size_t index) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); @@ -239,8 +239,9 @@ public: return {}; } - auto surface_view = GetSurface(gpu_addr, *cpu_addr, - SurfaceParams::CreateForFramebuffer(system, index), true); + auto surface_view = + GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents, true); if (render_targets[index].target) { auto& surface = render_targets[index].target; surface->MarkAsRenderTarget(false, NO_RT); @@ -300,9 +301,9 @@ public: const std::optional src_cpu_addr = system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); std::pair dst_surface = - GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false); + GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); std::pair src_surface = - GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false); + GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } @@ -532,18 +533,22 @@ private: * @param overlaps The overlapping surfaces registered in the cache. * @param params The parameters for the new surface. * @param gpu_addr The starting address of the new surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or left + * blank. * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, + const bool preserve_contents, const MatchTopologyResult untopological) { + const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme()); + return InitializeSurface(gpu_addr, params, do_load); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -553,7 +558,7 @@ private: for (auto& surface : overlaps) { FlushSurface(surface); } - return InitializeSurface(gpu_addr, params); + return InitializeSurface(gpu_addr, params, preserve_contents); } case RecycleStrategy::BufferCopy: { auto new_surface = GetUncachedSurface(gpu_addr, params); @@ -562,7 +567,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params); + return InitializeSurface(gpu_addr, params, do_load); } } } @@ -700,11 +705,14 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. */ std::optional> Manage3DSurfaces(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const VAddr cpu_addr) { + const VAddr cpu_addr, + bool preserve_contents) { if (params.target == SurfaceTarget::Texture3D) { bool failed = false; if (params.num_levels > 1) { @@ -754,7 +762,7 @@ private: return std::nullopt; } Unregister(surface); - return InitializeSurface(gpu_addr, params); + return InitializeSurface(gpu_addr, params, preserve_contents); } return std::nullopt; } @@ -765,7 +773,7 @@ private: return {{surface, surface->GetMainView()}}; } } - return InitializeSurface(gpu_addr, params); + return InitializeSurface(gpu_addr, params, preserve_contents); } } @@ -788,10 +796,13 @@ private: * * @param gpu_addr The starting address of the candidate surface. * @param params The parameters on the candidate surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. * @param is_render Whether or not the surface is a render target. **/ std::pair GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, - const SurfaceParams& params, bool is_render) { + const SurfaceParams& params, bool preserve_contents, + bool is_render) { // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. @@ -800,7 +811,8 @@ private: const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } const auto struct_result = current_surface->MatchesStructure(params); @@ -825,7 +837,7 @@ private: // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { - return InitializeSurface(gpu_addr, params); + return InitializeSurface(gpu_addr, params, preserve_contents); } // Step 3 @@ -835,13 +847,15 @@ private: for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } // Check if it's a 3D texture if (params.block_depth > 0) { - auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); + auto surface = + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } @@ -861,7 +875,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -885,7 +900,7 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; @@ -901,7 +916,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } /** @@ -1059,10 +1075,10 @@ private: } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, - bool do_load = true) { + bool preserve_contents) { auto new_surface{GetUncachedSurface(gpu_addr, params)}; Register(new_surface); - if (do_load) { + if (preserve_contents) { LoadSurface(new_surface); } return {new_surface, new_surface->GetMainView()};