From de1c8c5c2c3131bb122351e676014cdc7c442e78 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 29 Oct 2021 17:02:57 +0200 Subject: [PATCH] Texture Cahe/Shader decompiler: Resize PointSize on rescaling, refactor and make reaper more agressive on 4Gb GPUs. --- .../ir_opt/rescaling_pass.cpp | 21 +++++++++++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 5 +++-- src/video_core/texture_cache/image_base.h | 1 - src/video_core/texture_cache/image_info.cpp | 6 +++--- src/video_core/texture_cache/texture_cache.h | 18 ++-------------- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index a5fa4ee83a..81098c038f 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -75,6 +75,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { inst.ReplaceUsesWith(downscaled_frag_coord); } +void PatchPointSize(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 point_value{inst.Arg(1)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; + inst.SetArg(1, upscaled_point_value); +} + [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { IR::U32 scaled_value{value}; if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { @@ -253,6 +261,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { } break; } + case IR::Opcode::SetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PointSize: + if (inst.Flags() != 0xDEADBEEF) { + PatchPointSize(block, inst); + } + break; + default: + break; + } + break; + } case IR::Opcode::ImageQueryDimensions: PatchImageQueryDimensions(block, inst); break; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d8ac46d2a5..9b516c64ff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -976,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - - glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 02c6697663..89c111c006 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -38,7 +38,6 @@ enum class ImageFlagBits : u32 { Rescaled = 1 << 12, CheckingRescalable = 1 << 13, IsRescalable = 1 << 14, - Blacklisted = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index d8e414247e..015a2d33d3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -135,7 +135,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; @@ -165,7 +165,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; @@ -199,7 +199,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8031b6952..aec130a32c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -53,8 +53,8 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 4) / 10; const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); minimum_memory = 0; } else { // On OpenGL we can be more conservatives as the driver takes care. @@ -355,7 +355,6 @@ void TextureCache

::FillImageViews(DescriptorTable& table, if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { const ImageViewBase& image_view{slot_image_views[view.id]}; auto& image = slot_images[image_view.image_id]; - image.flags |= ImageFlagBits::Blacklisted; has_blacklisted |= ScaleDown(image); image.scale_rating = 0; } @@ -985,7 +984,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = info.rescaleable; bool any_rescaled = false; - bool any_blacklisted = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; @@ -993,7 +991,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted); } can_rescale &= any_rescaled; @@ -1007,9 +1004,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; ScaleDown(sibling); - if (any_blacklisted) { - sibling.flags |= ImageFlagBits::Blacklisted; - } } } @@ -1644,7 +1638,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); - bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; @@ -1652,7 +1645,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted); } } if (aliased_images.empty()) { @@ -1664,9 +1656,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { ScaleUp(image); } else { ScaleDown(image); - if (any_blacklisted) { - image.flags |= ImageFlagBits::Blacklisted; - } } } image.modification_tick = most_recent_tick; @@ -1684,9 +1673,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { Image& aliased_image = slot_images[aliased->id]; if (!can_rescale) { ScaleDown(aliased_image); - if (any_blacklisted) { - aliased_image.flags |= ImageFlagBits::Blacklisted; - } CopyImage(image_id, aliased->id, aliased->copies); continue; }