diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 1c91999d7e..fd3e414349 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,13 +11,9 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag - convert_b10g11r11_to_d24s8.frag convert_d24s8_to_abgr8.frag - convert_d24s8_to_b10g11r11.frag - convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag - convert_r16g16_to_d24s8.frag full_screen_triangle.vert fxaa.frag fxaa.vert diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index 4e4ab6a26b..ea055ddad9 100644 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -9,9 +9,10 @@ layout(binding = 0) uniform sampler2D color_texture; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f)); - uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; + uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f)); + uvec4 bytes = color << uvec4(24, 16, 8, 0); + uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w; - gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(color.a); + gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); + gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); } diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag deleted file mode 100644 index 2999a84cf2..0000000000 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 -#extension GL_ARB_shader_stencil_export : require - -layout(binding = 0) uniform sampler2D color_texture; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - vec4 color = texelFetch(color_texture, coord, 0).rgba; - uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22) - | (uint(color.g * (exp2(11) - 1.0f)) << 11) - | (uint(color.r * (exp2(11) - 1.0f))); - - gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); -} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index ff3bf82091..94368fb593 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -14,8 +14,10 @@ void main() { uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - color.r = float(depth >> 16) / (exp2(8) - 1.0); - color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0); - color.b = float(depth & 0x00FF) / (exp2(8) - 1.0); - color.a = float(stencil) / (exp2(8) - 1.0); + highp uint depth_val = + uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); + lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; + highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); + color.abgr = vec4(components) / (exp2(8.0) - 1.0); } diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag deleted file mode 100644 index c743d3a138..0000000000 --- a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 - -layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; - -layout(location = 0) out vec4 color; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - - color.b = float(depth >> 22) / (exp2(10) - 1.0); - color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0); - color.r = float(depth & 0x00FF) / (exp2(11) - 1.0); - color.a = 1.0f; -} diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag deleted file mode 100644 index 2a9443d3d6..0000000000 --- a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 - -layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; - -layout(location = 0) out vec4 color; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - - color.r = float(depth >> 16) / (exp2(16) - 1.0); - color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0); - color.b = 0.0f; - color.a = 1.0f; -} diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag deleted file mode 100644 index 3df70575ec..0000000000 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 -#extension GL_ARB_shader_stencil_export : require - -layout(binding = 0) uniform sampler2D color_texture; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - vec4 color = texelFetch(color_texture, coord, 0).rgba; - uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16) - | (uint(color.g * (exp2(16) - 1.0f)) << 16); - - gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); -} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index a63d4d222a..9a38b6b344 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -5,13 +5,9 @@ #include #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" -#include "video_core/host_shaders/convert_b10g11r11_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" -#include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" -#include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" -#include "video_core/host_shaders/convert_r16g16_to_d24s8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" @@ -361,11 +357,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), - convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), - convert_r16g16_to_d24s8_frag(BuildShader(device, CONVERT_R16G16_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), - convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), - convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -461,30 +453,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + ImageView& src_image_view, u32 up_scale, u32 down_shift) { ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), convert_abgr8_to_d24s8_frag, true); - Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); -} - -void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineDepthTargetEx(convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_b10g11r11_to_d24s8_frag, true); - Convert(*convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); -} - -void BlitImageHelper::ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineDepthTargetEx(convert_r16g16_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_r16g16_to_d24s8_frag, true); - Convert(*convert_r16g16_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); } void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, @@ -495,24 +468,6 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, down_shift); } -void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineColorTargetEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_b10g11r11_frag, false); - ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, - up_scale, down_shift); -} - -void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineColorTargetEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_r16g16_frag, false); - ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, - up_scale, down_shift); -} - void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -560,6 +515,53 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } +void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.ColorView(); + const VkSampler sampler = *nearest_sampler; + const VkExtent2D extent{ + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, + this](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *two_textures_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 3455c75f40..b1a717090b 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,28 +56,19 @@ public: void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift); - - void ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); - void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); - - void ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); - private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -114,11 +105,7 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; - vk::ShaderModule convert_b10g11r11_to_d24s8_frag; - vk::ShaderModule convert_r16g16_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; - vk::ShaderModule convert_d24s8_to_b10g11r11_frag; - vk::ShaderModule convert_d24s8_to_r16g16_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -131,11 +118,7 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; - vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; - vk::Pipeline convert_r16g16_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; - vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; - vk::Pipeline convert_d24s8_to_r16g16_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index c72f0c8978..197cba8e34 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -775,8 +775,18 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { if (VideoCore::Surface::GetFormatType(dst.info.format) == - VideoCore::Surface::SurfaceType::DepthStencil) { - return !device.IsExtShaderStencilExportSupported(); + VideoCore::Surface::SurfaceType::DepthStencil && + !device.IsExtShaderStencilExportSupported()) { + return true; + } + if (VideoCore::Surface::GetFormatType(src.info.format) == + VideoCore::Surface::SurfaceType::DepthStencil && + !device.IsExtShaderStencilExportSupported()) { + return true; + } + if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT || + src.info.format == PixelFormat::D32_FLOAT_S8_UINT) { + return true; } return false; } @@ -1058,21 +1068,10 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::A8B8G8R8_UNORM: - case PixelFormat::B8G8R8A8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); } break; - case PixelFormat::B10G11R11_FLOAT: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToB10G11R11(dst, src_view, up_scale, down_shift); - } - break; - case PixelFormat::R16G16_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToR16G16(dst, src_view, up_scale, down_shift); - } - break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); @@ -1084,16 +1083,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::S8_UINT_D24_UNORM: - if (src_view.format == PixelFormat::A8B8G8R8_UNORM || - src_view.format == PixelFormat::B8G8R8A8_UNORM) { - return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); - } - if (src_view.format == PixelFormat::B10G11R11_FLOAT) { - return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); - } - if (src_view.format == PixelFormat::R16G16_UNORM) { - return blit_image_helper.ConvertR16G16ToD24S8(dst, src_view, up_scale, down_shift); - } + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { @@ -1590,6 +1580,14 @@ VkImageView ImageView::StencilView() { return *stencil_view; } +VkImageView ImageView::ColorView() { + if (color_view) { + return *color_view; + } + color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT); + return *color_view; +} + VkImageView ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { if (image_format == Shader::ImageFormat::Typeless) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 44e9dcee42..753e3e8a1e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -184,6 +184,8 @@ public: [[nodiscard]] VkImageView StencilView(); + [[nodiscard]] VkImageView ColorView(); + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); @@ -224,6 +226,7 @@ private: std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; + vk::ImageView color_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44a0d42ba7..565b992541 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -472,7 +472,7 @@ template void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); + const BlitImages images = GetBlitImages(dst, src, copy); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -762,12 +762,15 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool broken_views = runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews); const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; + const bool flexible_formats = True(options & RelaxedOptions::Format); + ImageId image_id{}; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) + [[unlikely]] { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); const ImageInfo& existing = existing_image.info; @@ -776,17 +779,27 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, IsPitchLinearSameSize(existing, info, strict_size) && IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { image_id = existing_image_id; - return true; + image_ids.push_back(existing_image_id); + return !flexible_formats && existing.format == info.format; } } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, native_bgr)) { image_id = existing_image_id; - return true; + image_ids.push_back(existing_image_id); + return !flexible_formats && existing_image.info.format == info.format; } return false; }; ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; + if (image_ids.size() <= 1) [[likely]] { + return image_id; + } + auto image_ids_compare = [this](ImageId a, ImageId b) { + auto& image_a = slot_images[a]; + auto& image_b = slot_images[b]; + return image_a.modification_tick < image_b.modification_tick; + }; + return *std::ranges::max_element(image_ids, image_ids_compare); } template @@ -1078,32 +1091,58 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA template typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + + static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples; const GPUVAddr dst_addr = dst.Address(); const GPUVAddr src_addr = src.Address(); ImageInfo dst_info(dst); ImageInfo src_info(src); + const bool can_be_depth_blit = + dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point; ImageId dst_id; ImageId src_id; + RelaxedOptions try_options = FIND_OPTIONS; + if (can_be_depth_blit) { + try_options |= RelaxedOptions::Format; + } do { has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + src_id = FindImage(src_info, src_addr, try_options); + dst_id = FindImage(dst_info, dst_addr, try_options); const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; + if (src_image && src_image->info.num_samples > 1) { + RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; + src_id = FindOrInsertImage(src_info, src_addr, find_options); + dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); + if (has_deleted_images) { + continue; + } + break; } - RelaxedOptions find_options{}; - if (src_info.num_samples > 1) { - // it's a resolve, we must enforce the same format. - find_options = RelaxedOptions::ForceBrokenViews; + if (can_be_depth_blit) { + const ImageBase* const dst_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); } - src_id = FindOrInsertImage(src_info, src_addr, find_options); - dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); } while (has_deleted_images); + if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) { + // Make sure the images are depth and/or stencil textures. + do { + has_deleted_images = false; + src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); + dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); + } while (has_deleted_images); + } return BlitImages{ .dst_id = dst_id, .src_id = src_id, @@ -1160,7 +1199,14 @@ template ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, bool is_clear) { const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + ImageId image_id{}; + bool delete_state = has_deleted_images; + do { + has_deleted_images = false; + image_id = FindOrInsertImage(info, gpu_addr, options); + delete_state |= has_deleted_images; + } while (has_deleted_images); + has_deleted_images = delete_state; if (!image_id) { return NULL_IMAGE_VIEW_ID; } @@ -1783,7 +1829,13 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vectorinfo.format) != SurfaceType::ColorTexture) { - src_info.format = src->info.format; - } - is_resolve = src->info.num_samples > 1; - src_info.num_samples = src->info.num_samples; - src_info.size = src->info.size; + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; } if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; } if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { - if (dst) { - if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) { - src_info.format = original_src_format; - } - } else { - dst_info.format = src->info.format; - } + dst_info.format = src->info.format; } if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { if (src) { @@ -1183,7 +1170,6 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* src_info.format = dst->info.format; } } - ASSERT(!is_resolve || dst_info.format == src_info.format); } u32 MapSizeBytes(const ImageBase& image) {