From 425ab9ef4b982213f4ee0d53196f5474e255374f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 17 Oct 2021 18:01:18 +0200 Subject: [PATCH] Texture Cache: Fix downscaling and correct memory comsumption. --- .../renderer_opengl/gl_texture_cache.cpp | 41 ++++++-- .../renderer_opengl/gl_texture_cache.h | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 93 +++++++++++++++++-- .../renderer_vulkan/vk_texture_cache.h | 3 + src/video_core/texture_cache/image_base.cpp | 4 +- src/video_core/texture_cache/image_base.h | 5 + src/video_core/texture_cache/texture_cache.h | 31 ++++--- .../texture_cache/texture_cache_base.h | 2 +- 8 files changed, 146 insertions(+), 35 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 944a3aa65d..34d3723e5b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -bool Image::Scale() { +bool Image::Scale(bool up_scale) { const auto format_type = GetFormatType(info.format); const GLenum attachment = [format_type] { switch (format_type) { @@ -944,14 +944,25 @@ bool Image::Scale() { const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = std::max(1u, original_width >> level); - const u32 src_level_height = std::max(1u, original_height >> level); - const u32 dst_level_width = std::max(1u, scaled_width >> level); - const u32 dst_level_height = std::max(1u, scaled_height >> level); + const u32 src_level_width = + std::max(1u, (up_scale ? original_width : scaled_width) >> level); + const u32 src_level_height = + std::max(1u, (up_scale ? original_height : scaled_height) >> level); + const u32 dst_level_width = + std::max(1u, (up_scale ? scaled_width : original_width) >> level); + const u32 dst_level_height = + std::max(1u, (up_scale ? scaled_height : original_height) >> level); + + if (up_scale) { + glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, + layer); + } else { + glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level, + layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer); + } - glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, - layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); } @@ -959,7 +970,12 @@ bool Image::Scale() { if (scissor_test != GL_FALSE) { glEnablei(GL_SCISSOR_TEST, 0); } - current_texture = upscaled_backup.handle; + if (up_scale) { + current_texture = upscaled_backup.handle; + } else { + current_texture = texture.handle; + } + return true; } @@ -981,6 +997,7 @@ bool Image::ScaleUp() { flags &= ~ImageFlagBits::Rescaled; return false; } + scale_count++; if (!Scale()) { flags &= ~ImageFlagBits::Rescaled; return false; @@ -996,7 +1013,11 @@ bool Image::ScaleDown() { if (!runtime->resolution.active) { return false; } - current_texture = texture.handle; + scale_count++; + if (!Scale(false)) { + flags &= ~ImageFlagBits::Rescaled; + return false; + } return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f90dbfe9e9..81aaef3da3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -205,7 +205,7 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - bool Scale(); + bool Scale(bool up_scale = true); OGLTexture texture; OGLTexture upscaled_backup; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 17c62e27d9..51367c01d6 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -592,7 +592,8 @@ struct RangedBarrierRange { } void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, - VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, + bool up_scaling = true) { const bool is_2d = info.type == ImageType::e2D; const auto resources = info.resources; const VkExtent2D extent{ @@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, - vk_filter](vk::CommandBuffer cmdbuf) { + vk_filter, up_scaling](vk::CommandBuffer cmdbuf) { const VkOffset2D src_size{ - .x = static_cast(extent.width), - .y = static_cast(extent.height), + .x = static_cast(up_scaling ? extent.width : resolution.ScaleUp(extent.width)), + .y = static_cast(is_2d && up_scaling ? extent.height + : resolution.ScaleUp(extent.height)), }; const VkOffset2D dst_size{ - .x = static_cast(resolution.ScaleUp(extent.width)), - .y = static_cast(is_2d ? resolution.ScaleUp(extent.height) : extent.height), + .x = static_cast(up_scaling ? resolution.ScaleUp(extent.width) : extent.width), + .y = static_cast(is_2d && up_scaling ? resolution.ScaleUp(extent.height) + : extent.height), }; boost::container::small_vector regions; regions.reserve(resources.levels); @@ -1134,6 +1137,7 @@ bool Image::ScaleUp() { if (!resolution.active) { return false; } + scale_count++; const auto& device = runtime->device; const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); @@ -1161,8 +1165,10 @@ bool Image::ScaleUp() { using namespace VideoCommon; static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; - const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); - scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + if (!scale_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + } auto* view_ptr = scale_view.get(); const Region2D src_region{ @@ -1178,7 +1184,10 @@ bool Image::ScaleUp() { .height = scaled_height, }; if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - scale_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); + if (!scale_framebuffer) { + scale_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); runtime->blit_image_helper.BlitColor( @@ -1186,7 +1195,10 @@ bool Image::ScaleUp() { Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); } else if (!runtime->device.IsBlitDepthStencilSupported() && aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - scale_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); + if (!scale_framebuffer) { + scale_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } runtime->blit_image_helper.BlitDepthStencil( scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); @@ -1209,6 +1221,67 @@ bool Image::ScaleDown() { if (!resolution.active) { return false; } + const auto& device = runtime->device; + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); + } else { + using namespace VideoCommon; + static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + + if (!normal_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + normal_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + } + auto* view_ptr = normal_view.get(); + + const Region2D src_region{ + .start = {0, 0}, + .end = {static_cast(scaled_width), static_cast(scaled_height)}, + }; + const Region2D dst_region{ + .start = {0, 0}, + .end = {static_cast(info.size.width), static_cast(info.size.height)}, + }; + const VkExtent2D extent{ + .width = scaled_width, + .height = scaled_height, + }; + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + if (!normal_framebuffer) { + normal_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } + const auto color_view = normal_view->Handle(Shader::TextureType::Color2D); + + runtime->blit_image_helper.BlitColor( + normal_framebuffer.get(), color_view, dst_region, src_region, + Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); + } else if (!runtime->device.IsBlitDepthStencilSupported() && + aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!normal_framebuffer) { + normal_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } + runtime->blit_image_helper.BlitDepthStencil( + normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(), + dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); + } else { + // TODO: Use helper blits where applicable + flags &= ~ImageFlagBits::Rescaled; + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); + return false; + } + } ASSERT(info.type != ImageType::Linear); current_image = *original_image; return true; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 6dc190632a..df854a20c1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -148,6 +148,9 @@ private: std::unique_ptr scale_framebuffer; std::unique_ptr scale_view; + + std::unique_ptr normal_framebuffer; + std::unique_ptr normal_view; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 1909c9ecb1..3db2ec8257 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -60,8 +60,8 @@ namespace { ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, - converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, - scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, + scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index bab290ac73..cd4b5f6365 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -77,6 +77,10 @@ struct ImageBase { void CheckBadOverlapState(); void CheckAliasState(); + bool HasScaled() { + return scale_count > 0; + } + ImageInfo info; u32 guest_size_bytes = 0; @@ -84,6 +88,7 @@ struct ImageBase { u32 converted_size_bytes = 0; u32 scale_rating = 0; u64 scale_tick = 0; + u32 scale_count = 0; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a035d2b187..cf0d33a45c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -854,8 +854,8 @@ void TextureCache

::InvalidateScale(Image& image) { } template -u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { - const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; +u64 TextureCache

::GetScaledImageSizeBytes(ImageBase& image) { + const f32 add_to_size = Settings::values.resolution_info.up_factor; const bool sign = std::signbit(add_to_size); const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); @@ -865,11 +865,14 @@ u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { template bool TextureCache

::ScaleUp(Image& image) { + const bool has_copy = image.HasScaled(); const bool rescaled = image.ScaleUp(); if (!rescaled) { return false; } - total_used_memory += GetScaledImageSizeBytes(image); + if (!has_copy) { + total_used_memory += GetScaledImageSizeBytes(image); + } InvalidateScale(image); return true; } @@ -880,7 +883,10 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } - total_used_memory -= GetScaledImageSizeBytes(image); + const bool has_copy = image.HasScaled(); + if (!has_copy) { + total_used_memory -= GetScaledImageSizeBytes(image); + } InvalidateScale(image); return true; } @@ -1391,13 +1397,6 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( @@ -1478,6 +1477,16 @@ template void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; + if (image.HasScaled()) { + total_used_memory -= GetScaledImageSizeBytes(image); + } + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); if (False(image.flags & ImageFlagBits::Sparse)) { rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); return; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4dbe050af3..e210393ba1 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -331,7 +331,7 @@ private: void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); - u64 GetScaledImageSizeBytes(Image& image); + u64 GetScaledImageSizeBytes(ImageBase& image); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer;