From 80de01a5b4a7f57ec7850079fbd38fac76b9d08f Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 3 Jan 2024 22:46:59 -0500 Subject: [PATCH 01/15] video_core: simplify accelerated surface fetch and crop handling between APIs --- src/video_core/CMakeLists.txt | 1 + src/video_core/framebuffer_config.cpp | 55 ++++ src/video_core/framebuffer_config.h | 3 + src/video_core/rasterizer_interface.h | 6 - .../renderer_null/null_rasterizer.cpp | 4 - .../renderer_null/null_rasterizer.h | 2 - src/video_core/renderer_opengl/gl_fsr.cpp | 21 +- src/video_core/renderer_opengl/gl_fsr.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 28 +-- .../renderer_opengl/gl_rasterizer.h | 13 +- .../renderer_opengl/renderer_opengl.cpp | 235 ++++++++---------- .../renderer_opengl/renderer_opengl.h | 32 +-- .../renderer_vulkan/renderer_vulkan.cpp | 22 +- .../renderer_vulkan/renderer_vulkan.h | 4 +- .../renderer_vulkan/vk_blit_screen.cpp | 89 ++----- .../renderer_vulkan/vk_blit_screen.h | 20 +- .../renderer_vulkan/vk_rasterizer.cpp | 27 +- .../renderer_vulkan/vk_rasterizer.h | 14 +- 18 files changed, 262 insertions(+), 316 deletions(-) create mode 100644 src/video_core/framebuffer_config.cpp diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0755ba772a..36aa7bb66f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -55,6 +55,7 @@ add_library(video_core STATIC engines/maxwell_dma.h engines/puller.cpp engines/puller.h + framebuffer_config.cpp framebuffer_config.h fsr.cpp fsr.h diff --git a/src/video_core/framebuffer_config.cpp b/src/video_core/framebuffer_config.cpp new file mode 100644 index 0000000000..e28d41f84c --- /dev/null +++ b/src/video_core/framebuffer_config.cpp @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "video_core/framebuffer_config.h" + +namespace Tegra { + +Common::Rectangle NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, + u32 texture_height) { + f32 left, top, right, bottom; + + if (!framebuffer.crop_rect.IsEmpty()) { + // If crop rectangle is not empty, apply properties from rectangle. + left = static_cast(framebuffer.crop_rect.left); + top = static_cast(framebuffer.crop_rect.top); + right = static_cast(framebuffer.crop_rect.right); + bottom = static_cast(framebuffer.crop_rect.bottom); + } else { + // Otherwise, fall back to framebuffer dimensions. + left = 0; + top = 0; + right = static_cast(framebuffer.width); + bottom = static_cast(framebuffer.height); + } + + // Apply transformation flags. + auto framebuffer_transform_flags = framebuffer.transform_flags; + + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { + // Switch left and right. + std::swap(left, right); + } + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { + // Switch top and bottom. + std::swap(top, bottom); + } + + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; + if (True(framebuffer_transform_flags)) { + UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", + static_cast(framebuffer_transform_flags)); + } + + // Normalize coordinate space. + left /= static_cast(texture_width); + top /= static_cast(texture_height); + right /= static_cast(texture_width); + bottom /= static_cast(texture_height); + + return Common::Rectangle(left, top, right, bottom); +} + +} // namespace Tegra diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 856f4bd529..10ddc75a71 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h @@ -24,4 +24,7 @@ struct FramebufferConfig { Common::Rectangle crop_rect; }; +Common::Rectangle NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, + u32 texture_height); + } // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8fa4e4d9a2..6e2eccfbf0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -155,12 +155,6 @@ public: virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) = 0; - /// Attempt to use a faster method to display the framebuffer to screen - [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { - return false; - } - /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index abfabb65bb..a5cda0f389 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -92,10 +92,6 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac } void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) {} -bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { - return true; -} void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) { diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a5789604f9..c7f5849c75 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -77,8 +77,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; void InitializeChannel(Tegra::Control::ChannelState& channel) override; diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp index 77262dcf16..429dcdc6ca 100644 --- a/src/video_core/renderer_opengl/gl_fsr.cpp +++ b/src/video_core/renderer_opengl/gl_fsr.cpp @@ -25,7 +25,7 @@ FSR::~FSR() = default; void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle& screen, u32 input_image_width, u32 input_image_height, - const Common::Rectangle& crop_rect) { + const Common::Rectangle& crop_rect) { const auto output_image_width = screen.GetWidth(); const auto output_image_height = screen.GetHeight(); @@ -57,14 +57,19 @@ void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle& sc glViewportIndexedf(0, 0.0f, 0.0f, static_cast(output_image_width), static_cast(output_image_height)); - FsrConstants constants; - FsrEasuConOffset( - constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12, + const f32 input_width = static_cast(input_image_width); + const f32 input_height = static_cast(input_image_height); + const f32 output_width = static_cast(screen.GetWidth()); + const f32 output_height = static_cast(screen.GetHeight()); + const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_width; + const f32 viewport_x = crop_rect.left * input_width; + const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_height; + const f32 viewport_y = crop_rect.top * input_height; - static_cast(crop_rect.GetWidth()), static_cast(crop_rect.GetHeight()), - static_cast(input_image_width), static_cast(input_image_height), - static_cast(output_image_width), static_cast(output_image_height), - static_cast(crop_rect.left), static_cast(crop_rect.top)); + FsrConstants constants; + FsrEasuConOffset(constants.data() + 0, constants.data() + 4, constants.data() + 8, + constants.data() + 12, viewport_width, viewport_height, input_width, + input_height, output_width, output_height, viewport_x, viewport_y); glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h index 1f6ae3115f..a5092e3969 100644 --- a/src/video_core/renderer_opengl/gl_fsr.h +++ b/src/video_core/renderer_opengl/gl_fsr.h @@ -22,7 +22,7 @@ public: void Draw(ProgramManager& program_manager, const Common::Rectangle& screen, u32 input_image_width, u32 input_image_height, - const Common::Rectangle& crop_rect); + const Common::Rectangle& crop_rect); void InitBuffers(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d5354ef2d6..050a74cca1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -71,10 +71,10 @@ std::optional MaxwellToVideoCoreQuery(VideoCommon::QueryTy RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - const Device& device_, ScreenInfo& screen_info_, - ProgramManager& program_manager_, StateTracker& state_tracker_) - : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), - program_manager(program_manager_), state_tracker(state_tracker_), + const Device& device_, ProgramManager& program_manager_, + StateTracker& state_tracker_) + : gpu(gpu_), device_memory(device_memory_), device(device_), program_manager(program_manager_), + state_tracker(state_tracker_), texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), texture_cache(texture_cache_runtime, device_memory_), buffer_cache_runtime(device, staging_buffer_pool), @@ -739,10 +739,10 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si query_cache.InvalidateRegion(*cpu_addr, copy_size); } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { +std::optional RasterizerOpenGL::AccelerateDisplay( + const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) { if (framebuffer_addr == 0) { - return false; + return {}; } MICROPROFILE_SCOPE(OpenGL_CacheManagement); @@ -750,16 +750,14 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, ImageView* const image_view{ texture_cache.TryFindFramebufferImageView(config, framebuffer_addr)}; if (!image_view) { - return false; + return {}; } - // Verify that the cached surface is the same size and format as the requested framebuffer - // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); - // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.texture.width = image_view->size.width; - screen_info.texture.height = image_view->size.height; - screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); - return true; + FramebufferTextureInfo info{}; + info.display_texture = image_view->Handle(Shader::TextureType::Color2D); + info.width = image_view->size.width; + info.height = image_view->size.height; + return info; } void RasterizerOpenGL::SyncState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 34aa735263..ee82d9f3a6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -37,7 +37,7 @@ class MemoryManager; namespace OpenGL { -struct ScreenInfo; +struct FramebufferTextureInfo; struct ShaderEntries; struct BindlessSSBO { @@ -76,8 +76,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerInterface, public: explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - const Device& device_, ScreenInfo& screen_info_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + const Device& device_, ProgramManager& program_manager_, + StateTracker& state_tracker_); ~RasterizerOpenGL() override; void Draw(bool is_indexed, u32 instance_count) override; @@ -122,8 +122,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -144,6 +142,10 @@ public: return true; } + std::optional AccelerateDisplay(const Tegra::FramebufferConfig& config, + VAddr framebuffer_addr, + u32 pixel_stride); + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -237,7 +239,6 @@ private: Tegra::MaxwellDeviceMemoryManager& device_memory; const Device& device; - ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b75376fdbc..ea5ed3e2fc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -148,8 +148,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, state_tracker{}, program_manager{device}, - rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, - state_tracker) { + rasterizer(emu_window, gpu, device_memory, device, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); @@ -184,11 +183,11 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - PrepareRendertarget(framebuffer); - RenderScreenshot(); + + RenderScreenshot(*framebuffer); state_tracker.BindFramebuffer(0); - DrawScreen(emu_window.GetFramebufferLayout()); + DrawScreen(*framebuffer, emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -199,41 +198,37 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.OnFrameDisplayed(); } -void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { - return; - } +FramebufferTextureInfo RendererOpenGL::PrepareRenderTarget( + const Tegra::FramebufferConfig& framebuffer) { // If framebuffer is provided, reload it from memory to a texture - if (screen_info.texture.width != static_cast(framebuffer->width) || - screen_info.texture.height != static_cast(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format || + if (framebuffer_texture.width != static_cast(framebuffer.width) || + framebuffer_texture.height != static_cast(framebuffer.height) || + framebuffer_texture.pixel_format != framebuffer.pixel_format || gl_framebuffer_data.empty()) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. - ConfigureFramebufferTexture(screen_info.texture, *framebuffer); + ConfigureFramebufferTexture(framebuffer); } - // Load the framebuffer from memory, draw it to the screen, and swap buffers - LoadFBToScreenInfo(*framebuffer); + // Load the framebuffer from memory if needed + return LoadFBToScreenInfo(framebuffer); } -void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { - // Framebuffer orientation handling - framebuffer_transform_flags = framebuffer.transform_flags; - framebuffer_crop_rect = framebuffer.crop_rect; - framebuffer_width = framebuffer.width; - framebuffer_height = framebuffer.height; - +FramebufferTextureInfo RendererOpenGL::LoadFBToScreenInfo( + const Tegra::FramebufferConfig& framebuffer) { const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - screen_info.was_accelerated = + const auto accelerated_info = rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); - if (screen_info.was_accelerated) { - return; + if (accelerated_info) { + return *accelerated_info; } // Reset the screen info's display texture to its own permanent texture - screen_info.display_texture = screen_info.texture.resource.handle; + FramebufferTextureInfo info{}; + info.display_texture = framebuffer_texture.resource.handle; + info.width = framebuffer.width; + info.height = framebuffer.height; // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; @@ -256,17 +251,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // they differ from the LCD resolution. // TODO: Applications could theoretically crash yuzu here by specifying too large // framebuffer sizes. We should make sure that this cannot happen. - glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, - framebuffer.height, screen_info.texture.gl_format, - screen_info.texture.gl_type, gl_framebuffer_data.data()); + glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, + framebuffer.height, framebuffer_texture.gl_format, + framebuffer_texture.gl_type, gl_framebuffer_data.data()); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -} -void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, - const TextureInfo& texture) { - const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r}; - glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); + return info; } void RendererOpenGL::InitOpenGLObjects() { @@ -343,15 +334,15 @@ void RendererOpenGL::InitOpenGLObjects() { glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); // Allocate textures for the screen - screen_info.texture.resource.Create(GL_TEXTURE_2D); + framebuffer_texture.resource.Create(GL_TEXTURE_2D); - const GLuint texture = screen_info.texture.resource.handle; + const GLuint texture = framebuffer_texture.resource.handle; glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); - screen_info.display_texture = screen_info.texture.resource.handle; - // Clear screen to black - LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + const u8 framebuffer_data[4] = {0, 0, 0, 0}; + glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, + framebuffer_data); aa_framebuffer.Create(); @@ -380,60 +371,65 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, - const Tegra::FramebufferConfig& framebuffer) { - texture.width = framebuffer.width; - texture.height = framebuffer.height; - texture.pixel_format = framebuffer.pixel_format; +void RendererOpenGL::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { + framebuffer_texture.width = framebuffer.width; + framebuffer_texture.height = framebuffer.height; + framebuffer_texture.pixel_format = framebuffer.pixel_format; const auto pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); + gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * + bytes_per_pixel); GLint internal_format; switch (framebuffer.pixel_format) { case Service::android::PixelFormat::Rgba8888: internal_format = GL_RGBA8; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; break; case Service::android::PixelFormat::Rgb565: internal_format = GL_RGB565; - texture.gl_format = GL_RGB; - texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; + framebuffer_texture.gl_format = GL_RGB; + framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; break; default: internal_format = GL_RGBA8; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", // static_cast(framebuffer.pixel_format)); break; } - texture.resource.Release(); - texture.resource.Create(GL_TEXTURE_2D); - glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); + framebuffer_texture.resource.Release(); + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, + framebuffer_texture.width, framebuffer_texture.height); aa_texture.Release(); aa_texture.Create(GL_TEXTURE_2D); glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); smaa_edges_tex.Release(); smaa_edges_tex.Create(GL_TEXTURE_2D); glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); smaa_blend_tex.Release(); smaa_blend_tex.Create(GL_TEXTURE_2D); glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); } -void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { +void RendererOpenGL::DrawScreen(const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); + const auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); + // TODO: Signal state tracker about these changes state_tracker.NotifyScreenDrawVertexArray(); state_tracker.NotifyPolygonModes(); @@ -469,7 +465,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthRangeIndexed(0, 0.0, 0.0); - glBindTextureUnit(0, screen_info.display_texture); + glBindTextureUnit(0, info.display_texture); auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { @@ -480,22 +476,22 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); - auto viewport_width = screen_info.texture.width; - auto scissor_width = framebuffer_crop_rect.GetWidth(); + auto viewport_width = info.width; + auto scissor_width = static_cast(crop.GetWidth()); if (scissor_width <= 0) { scissor_width = viewport_width; } - auto viewport_height = screen_info.texture.height; - auto scissor_height = framebuffer_crop_rect.GetHeight(); + auto viewport_height = info.height; + auto scissor_height = static_cast(crop.GetHeight()); if (scissor_height <= 0) { scissor_height = viewport_height; } - if (screen_info.was_accelerated) { - viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); - scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); - viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); - scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); - } + + viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); + scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); + viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); + scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); + glScissorIndexed(0, 0, 0, scissor_width, scissor_height); glViewportIndexedf(0, 0.0f, 0.0f, static_cast(viewport_width), static_cast(viewport_height)); @@ -536,7 +532,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { smaa_blending_weight_calculation_frag.handle); glDrawArrays(GL_TRIANGLES, 0, 3); - glBindTextureUnit(0, screen_info.display_texture); + glBindTextureUnit(0, info.display_texture); glBindTextureUnit(1, smaa_blend_tex.handle); glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); @@ -561,18 +557,10 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { fsr->InitBuffers(); } - auto crop_rect = framebuffer_crop_rect; - if (crop_rect.GetWidth() == 0) { - crop_rect.right = framebuffer_width; - } - if (crop_rect.GetHeight() == 0) { - crop_rect.bottom = framebuffer_height; - } - crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); - const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width); - const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height); + const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(info.width); + const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(info.height); glBindSampler(0, present_sampler.handle); - fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect); + fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop); } else { if (fsr->AreBuffersInitialized()) { fsr->ReleaseBuffers(); @@ -603,61 +591,34 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); - const auto& texcoords = screen_info.display_texcoords; - auto left = texcoords.left; - auto right = texcoords.right; - if (framebuffer_transform_flags != Service::android::BufferTransformFlags::Unset) { - if (framebuffer_transform_flags == Service::android::BufferTransformFlags::FlipV) { - // Flip the framebuffer vertically - left = texcoords.right; - right = texcoords.left; - } else { - // Other transformations are unsupported - LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", - framebuffer_transform_flags); - UNIMPLEMENTED(); - } - } - - ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented"); - - f32 left_start{}; - if (framebuffer_crop_rect.Top() > 0) { - left_start = static_cast(framebuffer_crop_rect.Top()) / - static_cast(framebuffer_crop_rect.Bottom()); - } - f32 scale_u = static_cast(framebuffer_width) / static_cast(screen_info.texture.width); - f32 scale_v = - static_cast(framebuffer_height) / static_cast(screen_info.texture.height); - - if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) { - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast(framebuffer_crop_rect.GetWidth()) / - static_cast(screen_info.texture.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast(framebuffer_crop_rect.GetHeight()) / - static_cast(screen_info.texture.height); - } - } - if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && - !screen_info.was_accelerated) { - scale_u /= Settings::values.resolution_info.up_factor; - scale_v /= Settings::values.resolution_info.up_factor; + f32 left, top, right, bottom; + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + // FSR has already applied the crop, so we just want to render the image + // it has produced. + left = 0; + top = 0; + right = 1; + bottom = 1; + } else { + // Apply the precomputed crop. + left = crop.left; + top = crop.top; + right = crop.right; + bottom = crop.bottom; } + // Map the coordinates to the screen. const auto& screen = layout.screen; + const auto x = screen.left; + const auto y = screen.top; + const auto w = screen.GetWidth(); + const auto h = screen.GetHeight(); + const std::array vertices = { - ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, - left_start + left * scale_v), - ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, - left_start + left * scale_v), - ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, - left_start + right * scale_v), - ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, - left_start + right * scale_v), + ScreenRectVertex(x, y, left, top), + ScreenRectVertex(x + w, y, right, top), + ScreenRectVertex(x, y + h, left, bottom), + ScreenRectVertex(x + w, y + h, right, bottom), }; glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); @@ -701,7 +662,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // program_manager.RestoreGuestPipeline(); } -void RendererOpenGL::RenderScreenshot() { +void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer) { if (!renderer_settings.screenshot_requested) { return; } @@ -723,7 +684,7 @@ void RendererOpenGL::RenderScreenshot() { glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - DrawScreen(layout); + DrawScreen(framebuffer, layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 18699610a0..cde8c57026 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -50,11 +50,10 @@ struct TextureInfo { }; /// Structure used for storing information about the display target for the Switch screen -struct ScreenInfo { +struct FramebufferTextureInfo { GLuint display_texture{}; - bool was_accelerated = false; - const Common::Rectangle display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; - TextureInfo texture; + u32 width; + u32 height; }; class RendererOpenGL final : public VideoCore::RendererBase { @@ -81,23 +80,18 @@ private: void AddTelemetryFields(); - void ConfigureFramebufferTexture(TextureInfo& texture, - const Tegra::FramebufferConfig& framebuffer); + void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); /// Draws the emulated screens to the emulator window. - void DrawScreen(const Layout::FramebufferLayout& layout); + void DrawScreen(const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); - void RenderScreenshot(); + void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); /// Loads framebuffer from emulated memory into the active OpenGL texture. - void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); + FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); - /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture - /// can be 1x1 but will stretch across whatever it's rendered on. - void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, - const TextureInfo& texture); - - void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; @@ -126,7 +120,7 @@ private: GLuint64EXT vertex_buffer_address = 0; /// Display information for Switch screen - ScreenInfo screen_info; + TextureInfo framebuffer_texture; OGLTexture aa_texture; OGLFramebuffer aa_framebuffer; @@ -145,12 +139,6 @@ private: /// OpenGL framebuffer data std::vector gl_framebuffer_data; - - /// Used for transforming the framebuffer orientation - Service::android::BufferTransformFlags framebuffer_transform_flags{}; - Common::Rectangle framebuffer_crop_rect; - u32 framebuffer_width; - u32 framebuffer_height; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 1631276c6d..e1fe53bbd9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -98,9 +98,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, surface), blit_screen(device_memory, render_window, device, memory_allocator, swapchain, - present_manager, scheduler, screen_info), - rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, - state_tracker, scheduler) { + present_manager, scheduler), + rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, + scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); @@ -124,17 +124,10 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!render_window.IsShown()) { return; } - // Update screen info if the framebuffer size has changed. - screen_info.width = framebuffer->width; - screen_info.height = framebuffer->height; - - const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - RenderScreenshot(*framebuffer, use_accelerated); + RenderScreenshot(*framebuffer); Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated); + blit_screen.DrawToSwapchain(rasterizer, frame, *framebuffer); scheduler.Flush(*frame->render_ready); present_manager.Present(frame); @@ -168,8 +161,7 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer) { if (!renderer_settings.screenshot_requested) { return; } @@ -221,7 +213,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr }); const VkExtent2D render_area{.width = layout.width, .height = layout.height}; const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); - blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated); + blit_screen.Draw(rasterizer, framebuffer, *screenshot_fb, layout, render_area); const auto buffer_size = static_cast(layout.width * layout.height * 4); const VkBufferCreateInfo dst_buffer_info{ diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 11c52287ad..d7d006b202 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -59,7 +59,7 @@ public: private: void Report() const; - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); + void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; @@ -72,8 +72,6 @@ private: vk::DebugUtilsMessenger debug_messenger; vk::SurfaceKHR surface; - ScreenInfo screen_info; - Device device; MemoryAllocator memory_allocator; StateTracker state_tracker; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 610f27c846..c21a9c8fe7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -124,11 +124,10 @@ struct BlitScreen::BufferData { BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, Core::Frontend::EmuWindow& render_window_, const Device& device_, MemoryAllocator& memory_allocator_, Swapchain& swapchain_, - PresentManager& present_manager_, Scheduler& scheduler_, - const ScreenInfo& screen_info_) + PresentManager& present_manager_, Scheduler& scheduler_) : device_memory{device_memory_}, render_window{render_window_}, device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, - scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { + scheduler{scheduler_}, image_count{swapchain.GetImageCount()} { resource_ticks.resize(image_count); swapchain_view_format = swapchain.GetImageViewFormat(); @@ -138,56 +137,6 @@ BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, BlitScreen::~BlitScreen() = default; -static Common::Rectangle NormalizeCrop(const Tegra::FramebufferConfig& framebuffer, - const ScreenInfo& screen_info) { - f32 left, top, right, bottom; - - if (!framebuffer.crop_rect.IsEmpty()) { - // If crop rectangle is not empty, apply properties from rectangle. - left = static_cast(framebuffer.crop_rect.left); - top = static_cast(framebuffer.crop_rect.top); - right = static_cast(framebuffer.crop_rect.right); - bottom = static_cast(framebuffer.crop_rect.bottom); - } else { - // Otherwise, fall back to framebuffer dimensions. - left = 0; - top = 0; - right = static_cast(framebuffer.width); - bottom = static_cast(framebuffer.height); - } - - // Apply transformation flags. - auto framebuffer_transform_flags = framebuffer.transform_flags; - - if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { - // Switch left and right. - std::swap(left, right); - } - if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { - // Switch top and bottom. - std::swap(top, bottom); - } - - framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; - framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; - if (True(framebuffer_transform_flags)) { - UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", - static_cast(framebuffer_transform_flags)); - } - - // Get the screen properties. - const f32 screen_width = static_cast(screen_info.width); - const f32 screen_height = static_cast(screen_info.height); - - // Normalize coordinate space. - left /= screen_width; - top /= screen_height; - right /= screen_width; - bottom /= screen_height; - - return Common::Rectangle(left, top, right, bottom); -} - void BlitScreen::Recreate() { present_manager.WaitPresent(); scheduler.Finish(); @@ -195,9 +144,16 @@ void BlitScreen::Recreate() { CreateDynamicResources(); } -void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, +void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, - VkExtent2D render_area, bool use_accelerated) { + VkExtent2D render_area) { + + const auto texture_info = rasterizer.AccelerateDisplay( + framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); + const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; + const u32 texture_height = texture_info ? texture_info->height : framebuffer.height; + const bool use_accelerated = texture_info.has_value(); + RefreshResources(framebuffer); // Finish any pending renderpass @@ -206,13 +162,13 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - VkImage source_image = use_accelerated ? screen_info.image : *raw_images[image_index]; + VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index]; VkImageView source_image_view = - use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; + texture_info ? texture_info->image_view : *raw_image_views[image_index]; BufferData data; SetUniformData(data, layout); - SetVertexData(data, framebuffer, layout); + SetVertexData(data, framebuffer, layout, texture_width, texture_height); const std::span mapped_span = buffer.Mapped(); std::memcpy(mapped_span.data(), &data, sizeof(data)); @@ -405,10 +361,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); } if (fsr) { - const auto crop_rect = NormalizeCrop(framebuffer, screen_info); + const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); const VkExtent2D fsr_input_size{ - .width = Settings::values.resolution_info.ScaleUp(screen_info.width), - .height = Settings::values.resolution_info.ScaleUp(screen_info.height), + .width = Settings::values.resolution_info.ScaleUp(texture_width), + .height = Settings::values.resolution_info.ScaleUp(texture_height), }; VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); @@ -480,8 +436,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, }); } -void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +void BlitScreen::DrawToSwapchain(RasterizerVulkan& rasterizer, Frame* frame, + const Tegra::FramebufferConfig& framebuffer) { // Recreate dynamic resources if the the image count or input format changed const VkFormat current_framebuffer_format = std::exchange(framebuffer_view_format, GetFormat(framebuffer)); @@ -500,7 +456,7 @@ void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& f } const VkExtent2D render_area{frame->width, frame->height}; - Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated); + Draw(rasterizer, framebuffer, *frame->framebuffer, layout, render_area); if (++image_index >= image_count) { image_index = 0; } @@ -1434,7 +1390,8 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou } void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout) const { + const Layout::FramebufferLayout layout, u32 texture_width, + u32 texture_height) const { f32 left, top, right, bottom; if (fsr) { @@ -1446,7 +1403,7 @@ void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& bottom = 1; } else { // Get the normalized crop rectangle. - const auto crop = NormalizeCrop(framebuffer, screen_info); + const auto crop = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); // Apply the crop. left = crop.left; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 3eff760092..40338886a2 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -32,8 +32,6 @@ enum class PixelFormat : u32; namespace Vulkan { -struct ScreenInfo; - class Device; class FSR; class RasterizerVulkan; @@ -44,7 +42,7 @@ class PresentManager; struct Frame; -struct ScreenInfo { +struct FramebufferTextureInfo { VkImage image{}; VkImageView image_view{}; u32 width{}; @@ -56,17 +54,17 @@ public: explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, Core::Frontend::EmuWindow& render_window, const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, - PresentManager& present_manager, Scheduler& scheduler, - const ScreenInfo& screen_info); + PresentManager& present_manager, Scheduler& scheduler); ~BlitScreen(); void Recreate(); - void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, - const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); + void Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, + const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, + VkExtent2D render_area); - void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + void DrawToSwapchain(RasterizerVulkan& rasterizer, Frame* frame, + const Tegra::FramebufferConfig& framebuffer); [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent); @@ -99,7 +97,8 @@ private: void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout) const; + const Layout::FramebufferLayout layout, u32 texture_width, + u32 texture_height) const; void CreateSMAA(VkExtent2D smaa_size); void CreateFSR(); @@ -116,7 +115,6 @@ private: Scheduler& scheduler; std::size_t image_count; std::size_t image_index{}; - const ScreenInfo& screen_info; vk::ShaderModule vertex_shader; vk::ShaderModule fxaa_vertex_shader; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5bf41b81f6..e593d7225d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -165,10 +165,9 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - ScreenInfo& screen_info_, const Device& device_, - MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, - Scheduler& scheduler_) - : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, + const Device& device_, MemoryAllocator& memory_allocator_, + StateTracker& state_tracker_, Scheduler& scheduler_) + : gpu{gpu_}, device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), @@ -783,23 +782,25 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si query_cache.InvalidateRegion(*cpu_addr, copy_size); } -bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { +std::optional RasterizerVulkan::AccelerateDisplay( + const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { - return false; + return {}; } std::scoped_lock lock{texture_cache.mutex}; ImageView* const image_view = texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); if (!image_view) { - return false; + return {}; } query_cache.NotifySegment(false); - screen_info.image = image_view->ImageHandle(); - screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); - screen_info.width = image_view->size.width; - screen_info.height = image_view->size.height; - return true; + + FramebufferTextureInfo info{}; + info.image = image_view->ImageHandle(); + info.image_view = image_view->Handle(Shader::TextureType::Color2D); + info.width = image_view->size.width; + info.height = image_view->size.height; + return info; } void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 881ee0993e..0617b37f05 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -43,7 +43,7 @@ class Maxwell3D; namespace Vulkan { -struct ScreenInfo; +struct FramebufferTextureInfo; class StateTracker; @@ -78,9 +78,8 @@ class RasterizerVulkan final : public VideoCore::RasterizerInterface, public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - ScreenInfo& screen_info_, const Device& device_, - MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, - Scheduler& scheduler_); + const Device& device_, MemoryAllocator& memory_allocator_, + StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; void Draw(bool is_indexed, u32 instance_count) override; @@ -126,8 +125,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -137,6 +134,10 @@ public: void ReleaseChannel(s32 channel_id) override; + std::optional AccelerateDisplay(const Tegra::FramebufferConfig& config, + VAddr framebuffer_addr, + u32 pixel_stride); + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -182,7 +183,6 @@ private: Tegra::GPU& gpu; Tegra::MaxwellDeviceMemoryManager& device_memory; - ScreenInfo& screen_info; const Device& device; MemoryAllocator& memory_allocator; StateTracker& state_tracker; From 453091f61100effba637950dc840da41d95be477 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 26 Jan 2024 22:27:34 -0500 Subject: [PATCH 02/15] video_core: consistently account for resolution scaling when rendering --- .../renderer_opengl/gl_rasterizer.cpp | 8 ++++-- .../renderer_opengl/gl_texture_cache.cpp | 4 +++ .../renderer_opengl/gl_texture_cache.h | 2 ++ .../renderer_opengl/renderer_opengl.cpp | 28 ++++++------------- .../renderer_opengl/renderer_opengl.h | 2 ++ .../renderer_vulkan/vk_blit_screen.cpp | 6 ++-- .../renderer_vulkan/vk_blit_screen.h | 2 ++ .../renderer_vulkan/vk_rasterizer.cpp | 6 +++- src/video_core/texture_cache/texture_cache.h | 9 +++--- .../texture_cache/texture_cache_base.h | 4 +-- 10 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 050a74cca1..b42fb110c7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -747,16 +747,20 @@ std::optional RasterizerOpenGL::AccelerateDisplay( MICROPROFILE_SCOPE(OpenGL_CacheManagement); std::scoped_lock lock{texture_cache.mutex}; - ImageView* const image_view{ - texture_cache.TryFindFramebufferImageView(config, framebuffer_addr)}; + const auto [image_view, scaled] = + texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); if (!image_view) { return {}; } + const auto& resolution = Settings::values.resolution_info; + FramebufferTextureInfo info{}; info.display_texture = image_view->Handle(Shader::TextureType::Color2D); info.width = image_view->size.width; info.height = image_view->size.height; + info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width; + info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height; return info; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 66a5ca03e9..be14494ca5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1051,6 +1051,10 @@ void Image::Scale(bool up_scale) { state_tracker.NotifyScissor0(); } +bool Image::IsRescaled() const { + return True(flags & ImageFlagBits::Rescaled); +} + bool Image::ScaleUp(bool ignore) { const auto& resolution = runtime->resolution; if (!resolution.active) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 34870c81fa..3e54edcc21 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -217,6 +217,8 @@ public: return gl_type; } + bool IsRescaled() const; + bool ScaleUp(bool ignore = false); bool ScaleDown(bool ignore = false); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index ea5ed3e2fc..2b9ebff92a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -229,6 +229,8 @@ FramebufferTextureInfo RendererOpenGL::LoadFBToScreenInfo( info.display_texture = framebuffer_texture.resource.handle; info.width = framebuffer.width; info.height = framebuffer.height; + info.scaled_width = framebuffer.width; + info.scaled_height = framebuffer.height; // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; @@ -476,25 +478,13 @@ void RendererOpenGL::DrawScreen(const Tegra::FramebufferConfig& framebuffer, if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); - auto viewport_width = info.width; - auto scissor_width = static_cast(crop.GetWidth()); - if (scissor_width <= 0) { - scissor_width = viewport_width; - } - auto viewport_height = info.height; - auto scissor_height = static_cast(crop.GetHeight()); - if (scissor_height <= 0) { - scissor_height = viewport_height; - } - - viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); - scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); - viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); - scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); + auto scissor_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); + auto viewport_width = static_cast(scissor_width); + auto scissor_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); + auto viewport_height = static_cast(scissor_height); glScissorIndexed(0, 0, 0, scissor_width, scissor_height); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(viewport_width), - static_cast(viewport_height)); + glViewportIndexedf(0, 0.0f, 0.0f, viewport_width, viewport_height); glBindSampler(0, present_sampler.handle); GLint old_read_fb; @@ -557,10 +547,8 @@ void RendererOpenGL::DrawScreen(const Tegra::FramebufferConfig& framebuffer, fsr->InitBuffers(); } - const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(info.width); - const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(info.height); glBindSampler(0, present_sampler.handle); - fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop); + fsr->Draw(program_manager, layout.screen, info.scaled_width, info.scaled_height, crop); } else { if (fsr->AreBuffersInitialized()) { fsr->ReleaseBuffers(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cde8c57026..3a83a9b78b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -54,6 +54,8 @@ struct FramebufferTextureInfo { GLuint display_texture{}; u32 width; u32 height; + u32 scaled_width; + u32 scaled_height; }; class RendererOpenGL final : public VideoCore::RendererBase { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index c21a9c8fe7..24781860bc 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -152,6 +152,8 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; const u32 texture_height = texture_info ? texture_info->height : framebuffer.height; + const u32 scaled_width = texture_info ? texture_info->scaled_width : texture_width; + const u32 scaled_height = texture_info ? texture_info->scaled_height : texture_height; const bool use_accelerated = texture_info.has_value(); RefreshResources(framebuffer); @@ -363,8 +365,8 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf if (fsr) { const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); const VkExtent2D fsr_input_size{ - .width = Settings::values.resolution_info.ScaleUp(texture_width), - .height = Settings::values.resolution_info.ScaleUp(texture_height), + .width = scaled_width, + .height = scaled_height, }; VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 40338886a2..56ac47f089 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -47,6 +47,8 @@ struct FramebufferTextureInfo { VkImageView image_view{}; u32 width{}; u32 height{}; + u32 scaled_width{}; + u32 scaled_height{}; }; class BlitScreen { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e593d7225d..aa0a027bbd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -788,18 +788,22 @@ std::optional RasterizerVulkan::AccelerateDisplay( return {}; } std::scoped_lock lock{texture_cache.mutex}; - ImageView* const image_view = + const auto [image_view, scaled] = texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); if (!image_view) { return {}; } query_cache.NotifySegment(false); + const auto& resolution = Settings::values.resolution_info; + FramebufferTextureInfo info{}; info.image = image_view->ImageHandle(); info.image_view = image_view->Handle(Shader::TextureType::Color2D); info.width = image_view->size.width; info.height = image_view->size.height; + info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width; + info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height; return info; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a7400adfab..a20c956ffc 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -713,12 +713,12 @@ bool TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, } template -typename P::ImageView* TextureCache

::TryFindFramebufferImageView( +std::pair TextureCache

::TryFindFramebufferImageView( const Tegra::FramebufferConfig& config, DAddr cpu_addr) { // TODO: Properly implement this const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); if (it == page_table.end()) { - return nullptr; + return {}; } const auto& image_map_ids = it->second; boost::container::small_vector valid_image_ids; @@ -747,7 +747,8 @@ typename P::ImageView* TextureCache

::TryFindFramebufferImageView( const auto GetImageViewForFramebuffer = [&](ImageId image_id) { const ImageViewInfo info{ImageViewType::e2D, view_format}; - return &slot_image_views[FindOrEmplaceImageView(image_id, info)]; + return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)], + slot_images[image_id].IsRescaled()); }; if (valid_image_ids.size() == 1) [[likely]] { @@ -761,7 +762,7 @@ typename P::ImageView* TextureCache

::TryFindFramebufferImageView( return GetImageViewForFramebuffer(*most_recent); } - return nullptr; + return {}; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index f9aebb293e..e7b9101215 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -212,8 +212,8 @@ public: const Tegra::Engines::Fermi2D::Config& copy); /// Try to find a cached image view in the given CPU address - [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, - DAddr cpu_addr); + [[nodiscard]] std::pair TryFindFramebufferImageView( + const Tegra::FramebufferConfig& config, DAddr cpu_addr); /// Return true when there are uncommitted images to be downloaded [[nodiscard]] bool HasUncommittedFlushes() const noexcept; From 2b1dd3bef511806aa479ec93e3d9b414db80d4a9 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 12 Jan 2024 00:46:17 -0500 Subject: [PATCH 03/15] renderer_opengl: isolate core presentation code --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_blit_screen.cpp | 519 ++++++++++++++++++ .../renderer_opengl/gl_blit_screen.h | 110 ++++ .../renderer_opengl/gl_rasterizer.h | 1 + .../renderer_opengl/renderer_opengl.cpp | 506 +---------------- .../renderer_opengl/renderer_opengl.h | 78 +-- 6 files changed, 639 insertions(+), 577 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_blit_screen.cpp create mode 100644 src/video_core/renderer_opengl/gl_blit_screen.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 36aa7bb66f..c158970f2e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -118,6 +118,8 @@ add_library(video_core STATIC renderer_null/renderer_null.h renderer_opengl/blit_image.cpp renderer_opengl/blit_image.h + renderer_opengl/gl_blit_screen.cpp + renderer_opengl/gl_blit_screen.h renderer_opengl/gl_buffer_cache_base.cpp renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp new file mode 100644 index 0000000000..88757ba388 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -0,0 +1,519 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/framebuffer_config.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" +#include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/host_shaders/opengl_smaa_glsl.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" +#include "video_core/host_shaders/smaa_edge_detection_frag.h" +#include "video_core/host_shaders/smaa_edge_detection_vert.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/smaa_area_tex.h" +#include "video_core/smaa_search_tex.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +namespace { +constexpr GLint PositionLocation = 0; +constexpr GLint TexCoordLocation = 1; +constexpr GLint ModelViewMatrixLocation = 0; + +struct ScreenRectVertex { + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} + + std::array position; + std::array tex_coord; +}; + +/** + * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + */ +std::array MakeOrthographicMatrix(float width, float height) { + std::array matrix; // Laid out in column-major order + + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // Last matrix row is implicitly assumed to be [0, 0, 1]. + // clang-format on + + return matrix; +} +} // namespace + +BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, + Tegra::MaxwellDeviceMemoryManager& device_memory_, + StateTracker& state_tracker_, ProgramManager& program_manager_, + Device& device_) + : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), + program_manager(program_manager_), device(device_) { + // Create shader programs + fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); + + const auto replace_include = [](std::string& shader_source, std::string_view include_name, + std::string_view include_content) { + const std::string include_string = fmt::format("#include \"{}\"", include_name); + const std::size_t pos = shader_source.find(include_string); + ASSERT(pos != std::string::npos); + shader_source.replace(pos, include_string.size(), include_content); + }; + + const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { + std::string shader_source{specialized_source}; + replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); + return CreateProgram(shader_source, stage); + }; + + smaa_edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); + smaa_edge_detection_frag = + SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); + smaa_blending_weight_calculation_vert = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); + smaa_blending_weight_calculation_frag = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); + smaa_neighborhood_blending_vert = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); + smaa_neighborhood_blending_frag = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); + + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_gaussian_fragment = + CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); + present_scaleforce_fragment = + CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), + GL_FRAGMENT_SHADER); + + std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; + replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); + replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + + std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; + std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; + replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + + fsr = std::make_unique(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, + fsr_rcas_frag_source); + + // Generate presentation sampler + present_sampler.Create(); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + + present_sampler_nn.Create(); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + + // Generate VBO handle for drawing + vertex_buffer.Create(); + + // Attach vertex data to VAO + glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); + + // Allocate textures for the screen + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + + const GLuint texture = framebuffer_texture.resource.handle; + glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); + + // Clear screen to black + const u8 framebuffer_data[4] = {0, 0, 0, 0}; + glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, + framebuffer_data); + + aa_framebuffer.Create(); + + smaa_area_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(smaa_area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); + glTextureSubImage2D(smaa_area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, + GL_UNSIGNED_BYTE, areaTexBytes); + smaa_search_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(smaa_search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); + glTextureSubImage2D(smaa_search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, + GL_UNSIGNED_BYTE, searchTexBytes); + + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } +} + +FramebufferTextureInfo BlitScreen::PrepareRenderTarget( + const Tegra::FramebufferConfig& framebuffer) { + // If framebuffer is provided, reload it from memory to a texture + if (framebuffer_texture.width != static_cast(framebuffer.width) || + framebuffer_texture.height != static_cast(framebuffer.height) || + framebuffer_texture.pixel_format != framebuffer.pixel_format || + gl_framebuffer_data.empty()) { + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(framebuffer); + } + + // Load the framebuffer from memory if needed + return LoadFBToScreenInfo(framebuffer); +} + +FramebufferTextureInfo BlitScreen::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { + const DAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; + const auto accelerated_info = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (accelerated_info) { + return *accelerated_info; + } + + // Reset the screen info's display texture to its own permanent texture + FramebufferTextureInfo info{}; + info.display_texture = framebuffer_texture.resource.handle; + info.width = framebuffer.width; + info.height = framebuffer.height; + info.scaled_width = framebuffer.width; + info.scaled_height = framebuffer.height; + + // TODO(Rodrigo): Read this from HLE + constexpr u32 block_height_log2 = 4; + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{device_memory.GetPointer(framebuffer_addr)}; + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); + + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. + // TODO: Applications could theoretically crash yuzu here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, + framebuffer.height, framebuffer_texture.gl_format, + framebuffer_texture.gl_type, gl_framebuffer_data.data()); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + return info; +} + +void BlitScreen::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { + framebuffer_texture.width = framebuffer.width; + framebuffer_texture.height = framebuffer.height; + framebuffer_texture.pixel_format = framebuffer.pixel_format; + + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * + bytes_per_pixel); + + GLint internal_format; + switch (framebuffer.pixel_format) { + case Service::android::PixelFormat::Rgba8888: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + break; + case Service::android::PixelFormat::Rgb565: + internal_format = GL_RGB565; + framebuffer_texture.gl_format = GL_RGB; + framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; + break; + default: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast(framebuffer.pixel_format)); + break; + } + + framebuffer_texture.resource.Release(); + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, + framebuffer_texture.width, framebuffer_texture.height); + aa_texture.Release(); + aa_texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F, + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); + smaa_edges_tex.Release(); + smaa_edges_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F, + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + smaa_blend_tex.Release(); + smaa_blend_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F, + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); +} + +void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); + const auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); + + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask(0); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + glBindTextureUnit(0, info.display_texture); + + auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { + LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); + anti_aliasing = Settings::AntiAliasing::None; + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } + + if (anti_aliasing != Settings::AntiAliasing::None) { + glEnablei(GL_SCISSOR_TEST, 0); + auto scissor_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); + auto viewport_width = static_cast(scissor_width); + auto scissor_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); + auto viewport_height = static_cast(scissor_height); + + glScissorIndexed(0, 0, 0, scissor_width, scissor_height); + glViewportIndexedf(0, 0.0f, 0.0f, viewport_width, viewport_height); + + glBindSampler(0, present_sampler.handle); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + switch (anti_aliasing) { + case Settings::AntiAliasing::Fxaa: { + program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } break; + case Settings::AntiAliasing::Smaa: { + glClearColor(0, 0, 0, 0); + glFrontFace(GL_CCW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); + glBindSampler(1, present_sampler.handle); + glBindSampler(2, present_sampler.handle); + + glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, + smaa_edges_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(smaa_edge_detection_vert.handle, + smaa_edge_detection_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, smaa_edges_tex.handle); + glBindTextureUnit(1, smaa_area_tex.handle); + glBindTextureUnit(2, smaa_search_tex.handle); + glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, + smaa_blend_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(smaa_blending_weight_calculation_vert.handle, + smaa_blending_weight_calculation_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, info.display_texture); + glBindTextureUnit(1, smaa_blend_tex.handle); + glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, + aa_texture.handle, 0); + program_manager.BindPresentPrograms(smaa_neighborhood_blending_vert.handle, + smaa_neighborhood_blending_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + glFrontFace(GL_CW); + } break; + default: + UNREACHABLE(); + } + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + + glBindTextureUnit(0, aa_texture.handle); + } + glDisablei(GL_SCISSOR_TEST, 0); + + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr->AreBuffersInitialized()) { + fsr->InitBuffers(); + } + + glBindSampler(0, present_sampler.handle); + fsr->Draw(program_manager, layout.screen, info.scaled_width, info.scaled_height, crop); + } else { + if (fsr->AreBuffersInitialized()) { + fsr->ReleaseBuffers(); + } + } + + const std::array ortho_matrix = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); + + const auto fragment_handle = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return present_bilinear_fragment.handle; + case Settings::ScalingFilter::Bicubic: + return present_bicubic_fragment.handle; + case Settings::ScalingFilter::Gaussian: + return present_gaussian_fragment.handle; + case Settings::ScalingFilter::ScaleForce: + return present_scaleforce_fragment.handle; + case Settings::ScalingFilter::Fsr: + return fsr->GetPresentFragmentProgram().handle; + default: + return present_bilinear_fragment.handle; + } + }(); + program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); + + f32 left, top, right, bottom; + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + // FSR has already applied the crop, so we just want to render the image + // it has produced. + left = 0; + top = 0; + right = 1; + bottom = 1; + } else { + // Apply the precomputed crop. + left = crop.left; + top = crop.top; + right = crop.right; + bottom = crop.bottom; + } + + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = screen.left; + const auto y = screen.top; + const auto w = screen.GetWidth(); + const auto h = screen.GetHeight(); + + const std::array vertices = { + ScreenRectVertex(x, y, left, top), + ScreenRectVertex(x + w, y, right, top), + ScreenRectVertex(x, y + h, left, bottom), + ScreenRectVertex(x + w, y + h, right, bottom), + }; + glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); + + glDisable(GL_FRAMEBUFFER_SRGB); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), + static_cast(layout.height)); + + glEnableVertexAttribArray(PositionLocation); + glEnableVertexAttribArray(TexCoordLocation); + glVertexAttribDivisor(PositionLocation, 0); + glVertexAttribDivisor(TexCoordLocation, 0); + glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, position)); + glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribBinding(PositionLocation, 0); + glVertexAttribBinding(TexCoordLocation, 0); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } + + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { + glBindSampler(0, present_sampler.handle); + } else { + glBindSampler(0, present_sampler_nn.handle); + } + + // Update background color before drawing + glClearColor(Settings::values.bg_red.GetValue() / 255.0f, + Settings::values.bg_green.GetValue() / 255.0f, + Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + // TODO + // program_manager.RestoreGuestPipeline(); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h new file mode 100644 index 0000000000..13d769958c --- /dev/null +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_opengl/gl_fsr.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace OpenGL { + +class Device; +class RasterizerOpenGL; +class StateTracker; + +/// Structure used for storing information about the textures for the Switch screen +struct TextureInfo { + OGLTexture resource; + GLsizei width; + GLsizei height; + GLenum gl_format; + GLenum gl_type; + Service::android::PixelFormat pixel_format; +}; + +/// Structure used for storing information about the display target for the Switch screen +struct FramebufferTextureInfo { + GLuint display_texture{}; + u32 width; + u32 height; + u32 scaled_width; + u32 scaled_height; +}; + +class BlitScreen { +public: + explicit BlitScreen(RasterizerOpenGL& rasterizer, + Tegra::MaxwellDeviceMemoryManager& device_memory, + StateTracker& state_tracker, ProgramManager& program_manager, + Device& device); + + void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); + + /// Draws the emulated screens to the emulator window. + void DrawScreen(const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); + + void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); + + /// Loads framebuffer from emulated memory into the active OpenGL texture. + FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); + + FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); + +private: + RasterizerOpenGL& rasterizer; + Tegra::MaxwellDeviceMemoryManager& device_memory; + StateTracker& state_tracker; + ProgramManager& program_manager; + Device& device; + + OGLSampler present_sampler; + OGLSampler present_sampler_nn; + OGLBuffer vertex_buffer; + OGLProgram fxaa_vertex; + OGLProgram fxaa_fragment; + OGLProgram present_vertex; + OGLProgram present_bilinear_fragment; + OGLProgram present_bicubic_fragment; + OGLProgram present_gaussian_fragment; + OGLProgram present_scaleforce_fragment; + + /// Display information for Switch screen + TextureInfo framebuffer_texture; + OGLTexture aa_texture; + OGLFramebuffer aa_framebuffer; + + OGLProgram smaa_edge_detection_vert; + OGLProgram smaa_blending_weight_calculation_vert; + OGLProgram smaa_neighborhood_blending_vert; + OGLProgram smaa_edge_detection_frag; + OGLProgram smaa_blending_weight_calculation_frag; + OGLProgram smaa_neighborhood_blending_frag; + OGLTexture smaa_area_tex; + OGLTexture smaa_search_tex; + OGLTexture smaa_edges_tex; + OGLTexture smaa_blend_tex; + + std::unique_ptr fsr; + + /// OpenGL framebuffer data + std::vector gl_framebuffer_data; + + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ee82d9f3a6..6eae51ff7d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2b9ebff92a..38b0aacf47 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -16,68 +16,16 @@ #include "core/core_timing.h" #include "core/frontend/emu_window.h" #include "core/telemetry_session.h" -#include "video_core/host_shaders/ffx_a_h.h" -#include "video_core/host_shaders/ffx_fsr1_h.h" -#include "video_core/host_shaders/full_screen_triangle_vert.h" -#include "video_core/host_shaders/fxaa_frag.h" -#include "video_core/host_shaders/fxaa_vert.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" -#include "video_core/host_shaders/opengl_present_frag.h" -#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" -#include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/host_shaders/opengl_smaa_glsl.h" -#include "video_core/host_shaders/present_bicubic_frag.h" -#include "video_core/host_shaders/present_gaussian_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" -#include "video_core/host_shaders/smaa_edge_detection_frag.h" -#include "video_core/host_shaders/smaa_edge_detection_vert.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/smaa_area_tex.h" -#include "video_core/smaa_search_tex.h" #include "video_core/textures/decoders.h" namespace OpenGL { namespace { -constexpr GLint PositionLocation = 0; -constexpr GLint TexCoordLocation = 1; -constexpr GLint ModelViewMatrixLocation = 0; - -struct ScreenRectVertex { - constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) - : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} - - std::array position; - std::array tex_coord; -}; - -/** - * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left - * corner and (width, height) on the lower-bottom. - * - * The projection part of the matrix is trivial, hence these operations are represented - * by a 3x2 matrix. - */ -std::array MakeOrthographicMatrix(float width, float height) { - std::array matrix; // Laid out in column-major order - - // clang-format off - matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; - matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; - // Last matrix row is implicitly assumed to be [0, 0, 1]. - // clang-format on - - return matrix; -} - const char* GetSource(GLenum source) { switch (source) { case GL_DEBUG_SOURCE_API: @@ -155,7 +103,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, glDebugMessageCallback(DebugHandler, nullptr); } AddTelemetryFields(); - InitOpenGLObjects(); // Initialize default attributes to match hardware's disabled attributes GLint max_attribs{}; @@ -167,14 +114,8 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } + blit_screen = std::make_unique(rasterizer, device_memory, state_tracker, + program_manager, device); } RendererOpenGL::~RendererOpenGL() = default; @@ -187,7 +128,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { RenderScreenshot(*framebuffer); state_tracker.BindFramebuffer(0); - DrawScreen(*framebuffer, emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(*framebuffer, emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -198,166 +139,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.OnFrameDisplayed(); } -FramebufferTextureInfo RendererOpenGL::PrepareRenderTarget( - const Tegra::FramebufferConfig& framebuffer) { - // If framebuffer is provided, reload it from memory to a texture - if (framebuffer_texture.width != static_cast(framebuffer.width) || - framebuffer_texture.height != static_cast(framebuffer.height) || - framebuffer_texture.pixel_format != framebuffer.pixel_format || - gl_framebuffer_data.empty()) { - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(framebuffer); - } - - // Load the framebuffer from memory if needed - return LoadFBToScreenInfo(framebuffer); -} - -FramebufferTextureInfo RendererOpenGL::LoadFBToScreenInfo( - const Tegra::FramebufferConfig& framebuffer) { - const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - const auto accelerated_info = - rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); - if (accelerated_info) { - return *accelerated_info; - } - - // Reset the screen info's display texture to its own permanent texture - FramebufferTextureInfo info{}; - info.display_texture = framebuffer_texture.resource.handle; - info.width = framebuffer.width; - info.height = framebuffer.height; - info.scaled_width = framebuffer.width; - info.scaled_height = framebuffer.height; - - // TODO(Rodrigo): Read this from HLE - constexpr u32 block_height_log2 = 4; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - const u64 size_in_bytes{Tegra::Texture::CalculateSize( - true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; - const u8* const host_ptr{device_memory.GetPointer(framebuffer_addr)}; - const std::span input_data(host_ptr, size_in_bytes); - Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, - 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); - - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that - // they differ from the LCD resolution. - // TODO: Applications could theoretically crash yuzu here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, - framebuffer.height, framebuffer_texture.gl_format, - framebuffer_texture.gl_type, gl_framebuffer_data.data()); - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - return info; -} - -void RendererOpenGL::InitOpenGLObjects() { - // Create shader programs - fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); - fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); - - const auto replace_include = [](std::string& shader_source, std::string_view include_name, - std::string_view include_content) { - const std::string include_string = fmt::format("#include \"{}\"", include_name); - const std::size_t pos = shader_source.find(include_string); - ASSERT(pos != std::string::npos); - shader_source.replace(pos, include_string.size(), include_content); - }; - - const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { - std::string shader_source{specialized_source}; - replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); - return CreateProgram(shader_source, stage); - }; - - smaa_edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); - smaa_edge_detection_frag = - SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); - smaa_blending_weight_calculation_vert = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); - smaa_blending_weight_calculation_frag = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); - smaa_neighborhood_blending_vert = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); - smaa_neighborhood_blending_frag = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); - - present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); - present_gaussian_fragment = - CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); - present_scaleforce_fragment = - CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), - GL_FRAGMENT_SHADER); - - std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; - replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); - replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); - - std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; - std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; - replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - - fsr = std::make_unique(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, - fsr_rcas_frag_source); - - // Generate presentation sampler - present_sampler.Create(); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - present_sampler_nn.Create(); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - // Generate VBO handle for drawing - vertex_buffer.Create(); - - // Attach vertex data to VAO - glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - - // Allocate textures for the screen - framebuffer_texture.resource.Create(GL_TEXTURE_2D); - - const GLuint texture = framebuffer_texture.resource.handle; - glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); - - // Clear screen to black - const u8 framebuffer_data[4] = {0, 0, 0, 0}; - glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, - framebuffer_data); - - aa_framebuffer.Create(); - - smaa_area_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); - glTextureSubImage2D(smaa_area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, - GL_UNSIGNED_BYTE, areaTexBytes); - smaa_search_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); - glTextureSubImage2D(smaa_search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, - GL_UNSIGNED_BYTE, searchTexBytes); -} - void RendererOpenGL::AddTelemetryFields() { const char* const gl_version{reinterpret_cast(glGetString(GL_VERSION))}; const char* const gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; @@ -373,283 +154,6 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { - framebuffer_texture.width = framebuffer.width; - framebuffer_texture.height = framebuffer.height; - framebuffer_texture.pixel_format = framebuffer.pixel_format; - - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * - bytes_per_pixel); - - GLint internal_format; - switch (framebuffer.pixel_format) { - case Service::android::PixelFormat::Rgba8888: - internal_format = GL_RGBA8; - framebuffer_texture.gl_format = GL_RGBA; - framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - break; - case Service::android::PixelFormat::Rgb565: - internal_format = GL_RGB565; - framebuffer_texture.gl_format = GL_RGB; - framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; - break; - default: - internal_format = GL_RGBA8; - framebuffer_texture.gl_format = GL_RGBA; - framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - // static_cast(framebuffer.pixel_format)); - break; - } - - framebuffer_texture.resource.Release(); - framebuffer_texture.resource.Create(GL_TEXTURE_2D); - glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, - framebuffer_texture.width, framebuffer_texture.height); - aa_texture.Release(); - aa_texture.Create(GL_TEXTURE_2D); - glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); - smaa_edges_tex.Release(); - smaa_edges_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - smaa_blend_tex.Release(); - smaa_blend_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); -} - -void RendererOpenGL::DrawScreen(const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout) { - FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); - const auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); - - // TODO: Signal state tracker about these changes - state_tracker.NotifyScreenDrawVertexArray(); - state_tracker.NotifyPolygonModes(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask(0); - state_tracker.NotifyBlend0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyFrontFace(); - state_tracker.NotifyCullTest(); - state_tracker.NotifyDepthTest(); - state_tracker.NotifyStencilTest(); - state_tracker.NotifyPolygonOffset(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); - state_tracker.NotifyLogicOp(); - state_tracker.NotifyClipControl(); - state_tracker.NotifyAlphaTest(); - - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - - glEnable(GL_CULL_FACE); - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glDisable(GL_POLYGON_OFFSET_FILL); - glDisable(GL_RASTERIZER_DISCARD); - glDisable(GL_ALPHA_TEST); - glDisablei(GL_BLEND, 0); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); - glFrontFace(GL_CW); - glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glDepthRangeIndexed(0, 0.0, 0.0); - - glBindTextureUnit(0, info.display_texture); - - auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); - if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { - LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); - anti_aliasing = Settings::AntiAliasing::None; - Settings::values.anti_aliasing.SetValue(anti_aliasing); - } - - if (anti_aliasing != Settings::AntiAliasing::None) { - glEnablei(GL_SCISSOR_TEST, 0); - auto scissor_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); - auto viewport_width = static_cast(scissor_width); - auto scissor_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); - auto viewport_height = static_cast(scissor_height); - - glScissorIndexed(0, 0, 0, scissor_width, scissor_height); - glViewportIndexedf(0, 0.0f, 0.0f, viewport_width, viewport_height); - - glBindSampler(0, present_sampler.handle); - GLint old_read_fb; - GLint old_draw_fb; - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - - switch (anti_aliasing) { - case Settings::AntiAliasing::Fxaa: { - program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - } break; - case Settings::AntiAliasing::Smaa: { - glClearColor(0, 0, 0, 0); - glFrontFace(GL_CCW); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glBindSampler(1, present_sampler.handle); - glBindSampler(2, present_sampler.handle); - - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_edges_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_edge_detection_vert.handle, - smaa_edge_detection_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, smaa_edges_tex.handle); - glBindTextureUnit(1, smaa_area_tex.handle); - glBindTextureUnit(2, smaa_search_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_blend_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_blending_weight_calculation_vert.handle, - smaa_blending_weight_calculation_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, info.display_texture); - glBindTextureUnit(1, smaa_blend_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - aa_texture.handle, 0); - program_manager.BindPresentPrograms(smaa_neighborhood_blending_vert.handle, - smaa_neighborhood_blending_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - glFrontFace(GL_CW); - } break; - default: - UNREACHABLE(); - } - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - - glBindTextureUnit(0, aa_texture.handle); - } - glDisablei(GL_SCISSOR_TEST, 0); - - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr->AreBuffersInitialized()) { - fsr->InitBuffers(); - } - - glBindSampler(0, present_sampler.handle); - fsr->Draw(program_manager, layout.screen, info.scaled_width, info.scaled_height, crop); - } else { - if (fsr->AreBuffersInitialized()) { - fsr->ReleaseBuffers(); - } - } - - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - - const auto fragment_handle = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return present_bilinear_fragment.handle; - case Settings::ScalingFilter::Bicubic: - return present_bicubic_fragment.handle; - case Settings::ScalingFilter::Gaussian: - return present_gaussian_fragment.handle; - case Settings::ScalingFilter::ScaleForce: - return present_scaleforce_fragment.handle; - case Settings::ScalingFilter::Fsr: - return fsr->GetPresentFragmentProgram().handle; - default: - return present_bilinear_fragment.handle; - } - }(); - program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); - glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, - ortho_matrix.data()); - - f32 left, top, right, bottom; - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - // FSR has already applied the crop, so we just want to render the image - // it has produced. - left = 0; - top = 0; - right = 1; - bottom = 1; - } else { - // Apply the precomputed crop. - left = crop.left; - top = crop.top; - right = crop.right; - bottom = crop.bottom; - } - - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = screen.left; - const auto y = screen.top; - const auto w = screen.GetWidth(); - const auto h = screen.GetHeight(); - - const std::array vertices = { - ScreenRectVertex(x, y, left, top), - ScreenRectVertex(x + w, y, right, top), - ScreenRectVertex(x, y + h, left, bottom), - ScreenRectVertex(x + w, y + h, right, bottom), - }; - glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - - glDisable(GL_FRAMEBUFFER_SRGB); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), - static_cast(layout.height)); - - glEnableVertexAttribArray(PositionLocation); - glEnableVertexAttribArray(TexCoordLocation); - glVertexAttribDivisor(PositionLocation, 0); - glVertexAttribDivisor(TexCoordLocation, 0); - glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, position)); - glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, tex_coord)); - glVertexAttribBinding(PositionLocation, 0); - glVertexAttribBinding(TexCoordLocation, 0); - if (device.HasVertexBufferUnifiedMemory()) { - glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, - sizeof(vertices)); - } else { - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); - } - - if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { - glBindSampler(0, present_sampler.handle); - } else { - glBindSampler(0, present_sampler_nn.handle); - } - - // Update background color before drawing - glClearColor(Settings::values.bg_red.GetValue() / 255.0f, - Settings::values.bg_green.GetValue() / 255.0f, - Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); - - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - // TODO - // program_manager.RestoreGuestPipeline(); -} - void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer) { if (!renderer_settings.screenshot_requested) { return; @@ -672,7 +176,7 @@ void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig& framebuffe glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - DrawScreen(framebuffer, layout); + blit_screen->DrawScreen(framebuffer, layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 3a83a9b78b..23aff055aa 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -25,38 +25,13 @@ namespace Core::Frontend { class EmuWindow; } -namespace Core::Memory { -class Memory; -} - -namespace Layout { -struct FramebufferLayout; -} - namespace Tegra { class GPU; } namespace OpenGL { -/// Structure used for storing information about the textures for the Switch screen -struct TextureInfo { - OGLTexture resource; - GLsizei width; - GLsizei height; - GLenum gl_format; - GLenum gl_type; - Service::android::PixelFormat pixel_format; -}; - -/// Structure used for storing information about the display target for the Switch screen -struct FramebufferTextureInfo { - GLuint display_texture{}; - u32 width; - u32 height; - u32 scaled_width; - u32 scaled_height; -}; +class BlitScreen; class RendererOpenGL final : public VideoCore::RendererBase { public: @@ -77,24 +52,9 @@ public: } private: - /// Initializes the OpenGL state and creates persistent objects. - void InitOpenGLObjects(); - void AddTelemetryFields(); - - void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); - - /// Draws the emulated screens to the emulator window. - void DrawScreen(const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout); - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); - /// Loads framebuffer from emulated memory into the active OpenGL texture. - FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); - - FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); - Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; Tegra::MaxwellDeviceMemoryManager& device_memory; @@ -104,43 +64,9 @@ private: StateTracker state_tracker; ProgramManager program_manager; RasterizerOpenGL rasterizer; - - // OpenGL object IDs - OGLSampler present_sampler; - OGLSampler present_sampler_nn; - OGLBuffer vertex_buffer; - OGLProgram fxaa_vertex; - OGLProgram fxaa_fragment; - OGLProgram present_vertex; - OGLProgram present_bilinear_fragment; - OGLProgram present_bicubic_fragment; - OGLProgram present_gaussian_fragment; - OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; - // GPU address of the vertex buffer - GLuint64EXT vertex_buffer_address = 0; - - /// Display information for Switch screen - TextureInfo framebuffer_texture; - OGLTexture aa_texture; - OGLFramebuffer aa_framebuffer; - - OGLProgram smaa_edge_detection_vert; - OGLProgram smaa_blending_weight_calculation_vert; - OGLProgram smaa_neighborhood_blending_vert; - OGLProgram smaa_edge_detection_frag; - OGLProgram smaa_blending_weight_calculation_frag; - OGLProgram smaa_neighborhood_blending_frag; - OGLTexture smaa_area_tex; - OGLTexture smaa_search_tex; - OGLTexture smaa_edges_tex; - OGLTexture smaa_blend_tex; - - std::unique_ptr fsr; - - /// OpenGL framebuffer data - std::vector gl_framebuffer_data; + std::unique_ptr blit_screen; }; } // namespace OpenGL From 9568b310befe19154511afe14709188f641e4951 Mon Sep 17 00:00:00 2001 From: Liam Date: Sat, 13 Jan 2024 22:52:04 -0500 Subject: [PATCH 04/15] renderer_vulkan: isolate FXAA from blit screen --- src/video_core/CMakeLists.txt | 12 +- .../renderer_vulkan/present/anti_alias_pass.h | 27 ++ .../{vk_fsr.cpp => present/fsr.cpp} | 2 +- .../{vk_fsr.h => present/fsr.h} | 0 .../renderer_vulkan/present/fxaa.cpp | 144 +++++++ src/video_core/renderer_vulkan/present/fxaa.h | 63 +++ .../renderer_vulkan/present/smaa.cpp | 270 +++++++++++++ .../{vk_smaa.h => present/smaa.h} | 7 +- .../{vk_smaa.cpp => present/util.cpp} | 282 +------------ src/video_core/renderer_vulkan/present/util.h | 46 +++ .../renderer_vulkan/vk_blit_screen.cpp | 372 +----------------- .../renderer_vulkan/vk_blit_screen.h | 16 +- 12 files changed, 590 insertions(+), 651 deletions(-) create mode 100644 src/video_core/renderer_vulkan/present/anti_alias_pass.h rename src/video_core/renderer_vulkan/{vk_fsr.cpp => present/fsr.cpp} (99%) rename src/video_core/renderer_vulkan/{vk_fsr.h => present/fsr.h} (100%) create mode 100644 src/video_core/renderer_vulkan/present/fxaa.cpp create mode 100644 src/video_core/renderer_vulkan/present/fxaa.h create mode 100644 src/video_core/renderer_vulkan/present/smaa.cpp rename src/video_core/renderer_vulkan/{vk_smaa.h => present/smaa.h} (92%) rename src/video_core/renderer_vulkan/{vk_smaa.cpp => present/util.cpp} (58%) create mode 100644 src/video_core/renderer_vulkan/present/util.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c158970f2e..240b80c6e5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -158,6 +158,14 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/util_shaders.cpp renderer_opengl/util_shaders.h + renderer_vulkan/present/fsr.cpp + renderer_vulkan/present/fsr.h + renderer_vulkan/present/fxaa.cpp + renderer_vulkan/present/fxaa.h + renderer_vulkan/present/smaa.cpp + renderer_vulkan/present/smaa.h + renderer_vulkan/present/util.cpp + renderer_vulkan/present/util.h renderer_vulkan/blit_image.cpp renderer_vulkan/blit_image.h renderer_vulkan/fixed_pipeline_state.cpp @@ -184,8 +192,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_fsr.cpp - renderer_vulkan/vk_fsr.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp @@ -206,8 +212,6 @@ add_library(video_core STATIC renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h - renderer_vulkan/vk_smaa.cpp - renderer_vulkan/vk_smaa.h renderer_vulkan/vk_staging_buffer_pool.cpp renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_state_tracker.cpp diff --git a/src/video_core/renderer_vulkan/present/anti_alias_pass.h b/src/video_core/renderer_vulkan/present/anti_alias_pass.h new file mode 100644 index 0000000000..c1ec0b9a0d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/anti_alias_pass.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Scheduler; + +class AntiAliasPass { +public: + virtual ~AntiAliasPass() = default; + virtual VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view) = 0; +}; + +class NoAA final : public AntiAliasPass { +public: + virtual VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view) { + return source_image_view; + } +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp similarity index 99% rename from src/video_core/renderer_vulkan/vk_fsr.cpp rename to src/video_core/renderer_vulkan/present/fsr.cpp index f7a05fbc0d..30a16a785a 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -10,7 +10,7 @@ #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" -#include "video_core/renderer_vulkan/vk_fsr.h" +#include "video_core/renderer_vulkan/present/fsr.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/present/fsr.h similarity index 100% rename from src/video_core/renderer_vulkan/vk_fsr.h rename to src/video_core/renderer_vulkan/present/fsr.h diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp new file mode 100644 index 0000000000..6f87ddebb6 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -0,0 +1,144 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/common_types.h" + +#include "video_core/host_shaders/fxaa_frag_spv.h" +#include "video_core/host_shaders/fxaa_vert_spv.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +FXAA::FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) + : m_device(device), m_allocator(allocator), m_extent(extent), + m_image_count(static_cast(image_count)) { + CreateImages(); + CreateRenderPasses(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayouts(); + CreateDescriptorSets(); + CreatePipelineLayouts(); + CreatePipelines(); +} + +FXAA::~FXAA() = default; + +void FXAA::CreateImages() { + for (u32 i = 0; i < m_image_count; i++) { + Image& image = m_dynamic_images.emplace_back(); + + image.image = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + image.image_view = + CreateWrappedImageView(m_device, image.image, VK_FORMAT_R16G16B16A16_SFLOAT); + } +} + +void FXAA::CreateRenderPasses() { + m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& image : m_dynamic_images) { + image.framebuffer = + CreateWrappedFramebuffer(m_device, m_renderpass, image.image_view, m_extent); + } +} + +void FXAA::CreateSampler() { + m_sampler = CreateWrappedSampler(m_device); +} + +void FXAA::CreateShaders() { + m_vertex_shader = CreateWrappedShaderModule(m_device, FXAA_VERT_SPV); + m_fragment_shader = CreateWrappedShaderModule(m_device, FXAA_FRAG_SPV); +} + +void FXAA::CreateDescriptorPool() { + // 2 descriptors, 1 descriptor set per image + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, m_image_count); +} + +void FXAA::CreateDescriptorSetLayouts() { + m_descriptor_set_layout = CreateWrappedDescriptorSetLayout(m_device, 2); +} + +void FXAA::CreateDescriptorSets() { + VkDescriptorSetLayout layout = *m_descriptor_set_layout; + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, {layout}); + } +} + +void FXAA::CreatePipelineLayouts() { + m_pipeline_layout = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layout); +} + +void FXAA::CreatePipelines() { + m_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vertex_shader, m_fragment_shader)); +} + +void FXAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Image& image = m_dynamic_images[image_index]; + std::vector image_infos; + std::vector updates; + image_infos.reserve(2); + + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 0)); + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 1)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); +} + +void FXAA::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; + } + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& image : m_dynamic_images) { + ClearColorImage(cmdbuf, *image.image); + } + }); + scheduler.Finish(); + + m_images_ready = true; +} + +VkImageView FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view) { + const Image& image{m_dynamic_images[image_index]}; + const VkImage output_image{*image.image}; + const VkDescriptorSet descriptor_set{image.descriptor_sets[0]}; + const VkFramebuffer framebuffer{*image.framebuffer}; + const VkRenderPass renderpass{*m_renderpass}; + const VkPipeline pipeline{*m_pipeline}; + const VkPipelineLayout layout{*m_pipeline_layout}; + const VkExtent2D extent{m_extent}; + + UploadImages(scheduler); + UpdateDescriptorSets(source_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + return *image.image_view; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fxaa.h b/src/video_core/renderer_vulkan/present/fxaa.h new file mode 100644 index 0000000000..c083f3ff09 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fxaa.h @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/present/anti_alias_pass.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class Scheduler; +class StagingBufferPool; + +class FXAA final : public AntiAliasPass { +public: + explicit FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count, + VkExtent2D extent); + ~FXAA() override; + + VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view) override; + +private: + void CreateImages(); + void CreateRenderPasses(); + void CreateSampler(); + void CreateShaders(); + void CreateDescriptorPool(); + void CreateDescriptorSetLayouts(); + void CreateDescriptorSets(); + void CreatePipelineLayouts(); + void CreatePipelines(); + void UpdateDescriptorSets(VkImageView image_view, size_t image_index); + void UploadImages(Scheduler& scheduler); + + const Device& m_device; + MemoryAllocator& m_allocator; + const VkExtent2D m_extent; + const u32 m_image_count; + + vk::ShaderModule m_vertex_shader{}; + vk::ShaderModule m_fragment_shader{}; + vk::DescriptorPool m_descriptor_pool{}; + vk::DescriptorSetLayout m_descriptor_set_layout{}; + vk::PipelineLayout m_pipeline_layout{}; + vk::Pipeline m_pipeline{}; + vk::RenderPass m_renderpass{}; + + struct Image { + vk::DescriptorSets descriptor_sets{}; + vk::Framebuffer framebuffer{}; + vk::Image image{}; + vk::ImageView image_view{}; + }; + std::vector m_dynamic_images{}; + bool m_images_ready{}; + + vk::Sampler m_sampler{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/smaa.cpp b/src/video_core/renderer_vulkan/present/smaa.cpp new file mode 100644 index 0000000000..68cd22b08e --- /dev/null +++ b/src/video_core/renderer_vulkan/present/smaa.cpp @@ -0,0 +1,270 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/assert.h" +#include "common/polyfill_ranges.h" + +#include "video_core/renderer_vulkan/present/smaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/smaa_area_tex.h" +#include "video_core/smaa_search_tex.h" +#include "video_core/vulkan_common/vulkan_device.h" + +#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h" +#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h" +#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h" + +namespace Vulkan { + +SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) + : m_device(device), m_allocator(allocator), m_extent(extent), + m_image_count(static_cast(image_count)) { + CreateImages(); + CreateRenderPasses(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayouts(); + CreateDescriptorSets(); + CreatePipelineLayouts(); + CreatePipelines(); +} + +SMAA::~SMAA() = default; + +void SMAA::CreateImages() { + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + + m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); + m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); + + m_static_image_views[Area] = + CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); + m_static_image_views[Search] = + CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM); + + for (u32 i = 0; i < m_image_count; i++) { + Images& images = m_dynamic_images.emplace_back(); + + images.images[Blend] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); + images.images[Output] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + + images.image_views[Blend] = + CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Edges] = + CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT); + images.image_views[Output] = + CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT); + } +} + +void SMAA::CreateRenderPasses() { + m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT); + m_renderpasses[BlendingWeightCalculation] = + CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + m_renderpasses[NeighborhoodBlending] = + CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& images : m_dynamic_images) { + images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer( + m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent); + + images.framebuffers[BlendingWeightCalculation] = + CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation], + images.image_views[Blend], m_extent); + + images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer( + m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent); + } +} + +void SMAA::CreateSampler() { + m_sampler = CreateWrappedSampler(m_device); +} + +void SMAA::CreateShaders() { + // These match the order of the SMAAStage enum + static constexpr std::array vert_shader_sources{ + ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), + ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), + ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), + }; + static constexpr std::array frag_shader_sources{ + ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), + ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), + ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), + }; + + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]); + m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]); + } +} + +void SMAA::CreateDescriptorPool() { + // Edge detection: 1 descriptor + // Blending weight calculation: 3 descriptors + // Neighborhood blending: 2 descriptors + + // 6 descriptors, 3 descriptor sets per image + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count); +} + +void SMAA::CreateDescriptorSetLayouts() { + m_descriptor_set_layouts[EdgeDetection] = CreateWrappedDescriptorSetLayout(m_device, 1); + m_descriptor_set_layouts[BlendingWeightCalculation] = + CreateWrappedDescriptorSetLayout(m_device, 3); + m_descriptor_set_layouts[NeighborhoodBlending] = CreateWrappedDescriptorSetLayout(m_device, 2); +} + +void SMAA::CreateDescriptorSets() { + std::vector layouts(m_descriptor_set_layouts.size()); + std::ranges::transform(m_descriptor_set_layouts, layouts.begin(), + [](auto& layout) { return *layout; }); + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); + } +} + +void SMAA::CreatePipelineLayouts() { + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]); + } +} + +void SMAA::CreatePipelines() { + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_pipelines[i] = + CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i], + std::tie(m_vertex_shaders[i], m_fragment_shaders[i])); + } +} + +void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Images& images = m_dynamic_images[image_index]; + std::vector image_infos; + std::vector updates; + image_infos.reserve(6); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[EdgeDetection], 0)); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges], + images.descriptor_sets[BlendingWeightCalculation], + 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area], + images.descriptor_sets[BlendingWeightCalculation], + 1)); + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search], + images.descriptor_sets[BlendingWeightCalculation], 2)); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[NeighborhoodBlending], 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend], + images.descriptor_sets[NeighborhoodBlending], 1)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); +} + +void SMAA::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; + } + + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + + UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, + VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); + UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, + VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& images : m_dynamic_images) { + for (size_t i = 0; i < MaxDynamicImage; i++) { + ClearColorImage(cmdbuf, *images.images[i]); + } + } + }); + scheduler.Finish(); + + m_images_ready = true; +} + +VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view) { + Images& images = m_dynamic_images[image_index]; + + VkImage output_image = *images.images[Output]; + VkImage edges_image = *images.images[Edges]; + VkImage blend_image = *images.images[Blend]; + + VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection]; + VkDescriptorSet blending_weight_calculation_descriptor_set = + images.descriptor_sets[BlendingWeightCalculation]; + VkDescriptorSet neighborhood_blending_descriptor_set = + images.descriptor_sets[NeighborhoodBlending]; + + VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection]; + VkFramebuffer blending_weight_calculation_framebuffer = + *images.framebuffers[BlendingWeightCalculation]; + VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending]; + + UploadImages(scheduler); + UpdateDescriptorSets(source_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[EdgeDetection], edge_detection_framebuffer, + m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[EdgeDetection], 0, + edge_detection_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[BlendingWeightCalculation], + blending_weight_calculation_framebuffer, m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipelines[BlendingWeightCalculation]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[BlendingWeightCalculation], 0, + blending_weight_calculation_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[NeighborhoodBlending], + neighborhood_blending_framebuffer, m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[NeighborhoodBlending], 0, + neighborhood_blending_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + return *images.image_views[Output]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/present/smaa.h similarity index 92% rename from src/video_core/renderer_vulkan/vk_smaa.h rename to src/video_core/renderer_vulkan/present/smaa.h index 0e214258a7..3d6707d485 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.h +++ b/src/video_core/renderer_vulkan/present/smaa.h @@ -4,6 +4,7 @@ #pragma once #include +#include "video_core/renderer_vulkan/present/anti_alias_pass.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -13,12 +14,14 @@ class Device; class Scheduler; class StagingBufferPool; -class SMAA { +class SMAA final : public AntiAliasPass { public: explicit SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent); + ~SMAA() override; + VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view); + VkImageView source_image_view) override; private: enum SMAAStage { diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/present/util.cpp similarity index 58% rename from src/video_core/renderer_vulkan/vk_smaa.cpp rename to src/video_core/renderer_vulkan/present/util.cpp index 70644ea82e..a445b213e6 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -1,29 +1,11 @@ -// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include - #include "common/assert.h" #include "common/polyfill_ranges.h" - -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_smaa.h" -#include "video_core/smaa_area_tex.h" -#include "video_core/smaa_search_tex.h" -#include "video_core/vulkan_common/vulkan_device.h" - -#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h" -#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h" -#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h" +#include "video_core/renderer_vulkan/present/util.h" namespace Vulkan { -namespace { - -#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ @@ -48,7 +30,7 @@ vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, } void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, - VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + VkImageLayout source_layout) { constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; const VkImageMemoryBarrier barrier{ @@ -75,7 +57,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, - std::span initial_contents = {}) { + std::span initial_contents) { const VkBufferCreateInfo upload_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -200,13 +182,13 @@ vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& r }); } -vk::Sampler CreateWrappedSampler(const Device& device) { +vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, + .magFilter = filter, + .minFilter = filter, .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, @@ -471,12 +453,12 @@ void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { cmdbuf.ClearColorImage(image, VK_IMAGE_LAYOUT_GENERAL, {}, subresources); } -void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass, - VkFramebuffer framebuffer, VkExtent2D extent) { +void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, + VkExtent2D extent) { const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = *render_pass, + .renderPass = render_pass, .framebuffer = framebuffer, .renderArea{ .offset{}, @@ -503,248 +485,4 @@ void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass, cmdbuf.SetScissor(0, scissor); } -} // Anonymous namespace - -SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) - : m_device(device), m_allocator(allocator), m_extent(extent), - m_image_count(static_cast(image_count)) { - CreateImages(); - CreateRenderPasses(); - CreateSampler(); - CreateShaders(); - CreateDescriptorPool(); - CreateDescriptorSetLayouts(); - CreateDescriptorSets(); - CreatePipelineLayouts(); - CreatePipelines(); -} - -void SMAA::CreateImages() { - static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - - m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); - m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); - - m_static_image_views[Area] = - CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); - m_static_image_views[Search] = - CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM); - - for (u32 i = 0; i < m_image_count; i++) { - Images& images = m_dynamic_images.emplace_back(); - - images.images[Blend] = - CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); - images.images[Output] = - CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - - images.image_views[Blend] = - CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); - images.image_views[Edges] = - CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT); - images.image_views[Output] = - CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT); - } -} - -void SMAA::CreateRenderPasses() { - m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT); - m_renderpasses[BlendingWeightCalculation] = - CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); - m_renderpasses[NeighborhoodBlending] = - CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); - - for (auto& images : m_dynamic_images) { - images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer( - m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent); - - images.framebuffers[BlendingWeightCalculation] = - CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation], - images.image_views[Blend], m_extent); - - images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer( - m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent); - } -} - -void SMAA::CreateSampler() { - m_sampler = CreateWrappedSampler(m_device); -} - -void SMAA::CreateShaders() { - // These match the order of the SMAAStage enum - static constexpr std::array vert_shader_sources{ - ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), - ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), - ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), - }; - static constexpr std::array frag_shader_sources{ - ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), - ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), - ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), - }; - - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]); - m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]); - } -} - -void SMAA::CreateDescriptorPool() { - // Edge detection: 1 descriptor - // Blending weight calculation: 3 descriptors - // Neighborhood blending: 2 descriptors - - // 6 descriptors, 3 descriptor sets per image - m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count); -} - -void SMAA::CreateDescriptorSetLayouts() { - m_descriptor_set_layouts[EdgeDetection] = CreateWrappedDescriptorSetLayout(m_device, 1); - m_descriptor_set_layouts[BlendingWeightCalculation] = - CreateWrappedDescriptorSetLayout(m_device, 3); - m_descriptor_set_layouts[NeighborhoodBlending] = CreateWrappedDescriptorSetLayout(m_device, 2); -} - -void SMAA::CreateDescriptorSets() { - std::vector layouts(m_descriptor_set_layouts.size()); - std::ranges::transform(m_descriptor_set_layouts, layouts.begin(), - [](auto& layout) { return *layout; }); - - for (auto& images : m_dynamic_images) { - images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); - } -} - -void SMAA::CreatePipelineLayouts() { - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]); - } -} - -void SMAA::CreatePipelines() { - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_pipelines[i] = - CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i], - std::tie(m_vertex_shaders[i], m_fragment_shaders[i])); - } -} - -void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { - Images& images = m_dynamic_images[image_index]; - std::vector image_infos; - std::vector updates; - image_infos.reserve(6); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, - images.descriptor_sets[EdgeDetection], 0)); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges], - images.descriptor_sets[BlendingWeightCalculation], - 0)); - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area], - images.descriptor_sets[BlendingWeightCalculation], - 1)); - updates.push_back( - CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search], - images.descriptor_sets[BlendingWeightCalculation], 2)); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, - images.descriptor_sets[NeighborhoodBlending], 0)); - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend], - images.descriptor_sets[NeighborhoodBlending], 1)); - - m_device.GetLogical().UpdateDescriptorSets(updates, {}); -} - -void SMAA::UploadImages(Scheduler& scheduler) { - if (m_images_ready) { - return; - } - - static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - - UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, - VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); - UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, - VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); - - scheduler.Record([&](vk::CommandBuffer cmdbuf) { - for (auto& images : m_dynamic_images) { - for (size_t i = 0; i < MaxDynamicImage; i++) { - ClearColorImage(cmdbuf, *images.images[i]); - } - } - }); - scheduler.Finish(); - - m_images_ready = true; -} - -VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) { - Images& images = m_dynamic_images[image_index]; - - VkImage output_image = *images.images[Output]; - VkImage edges_image = *images.images[Edges]; - VkImage blend_image = *images.images[Blend]; - - VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection]; - VkDescriptorSet blending_weight_calculation_descriptor_set = - images.descriptor_sets[BlendingWeightCalculation]; - VkDescriptorSet neighborhood_blending_descriptor_set = - images.descriptor_sets[NeighborhoodBlending]; - - VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection]; - VkFramebuffer blending_weight_calculation_framebuffer = - *images.framebuffers[BlendingWeightCalculation]; - VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending]; - - UploadImages(scheduler); - UpdateDescriptorSets(source_image_view, image_index); - - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { - TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, - m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[EdgeDetection], 0, - edge_detection_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - - TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[BlendingWeightCalculation], - blending_weight_calculation_framebuffer, m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipelines[BlendingWeightCalculation]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[BlendingWeightCalculation], 0, - blending_weight_calculation_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - - TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[NeighborhoodBlending], - neighborhood_blending_framebuffer, m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[NeighborhoodBlending], 0, - neighborhood_blending_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); - }); - - return *images.image_views[Output]; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h new file mode 100644 index 0000000000..93cfdd16bd --- /dev/null +++ b/src/video_core/renderer_vulkan/present/util.h @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) + +vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format); +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL); +void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, + vk::Image& image, VkExtent2D dimensions, VkFormat format, + std::span initial_contents = {}); +void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image); + +vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format); +vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format); +vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& render_pass, + vk::ImageView& dest_image, VkExtent2D extent); +vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter = VK_FILTER_LINEAR); +vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span code); +vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, u32 max_sampler_bindings, + u32 max_sets); +vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(const Device& device, + u32 max_sampler_bindings); +vk::DescriptorSets CreateWrappedDescriptorSets(vk::DescriptorPool& pool, + vk::Span layouts); +vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, + vk::DescriptorSetLayout& layout); +vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, + vk::PipelineLayout& layout, + std::tuple shaders); +VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector& images, + VkSampler sampler, VkImageView view, + VkDescriptorSet set, u32 binding); + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, + VkExtent2D extent); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 24781860bc..fe1a7b0cd3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -16,20 +16,19 @@ #include "core/frontend/emu_window.h" #include "video_core/gpu.h" #include "video_core/host1x/gpu_device_memory_manager.h" -#include "video_core/host_shaders/fxaa_frag_spv.h" -#include "video_core/host_shaders/fxaa_vert_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" +#include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/smaa.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_smaa.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" @@ -252,103 +251,17 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { - UpdateAADescriptorSet(source_image_view, false); - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - VkExtent2D size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - scheduler.Record([this, index = image_index, size, - anti_alias_pass](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = {}, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, + if (!fxaa) { + const u32 up_scale = Settings::values.resolution_info.up_scale; + const u32 down_shift = Settings::values.resolution_info.down_shift; + const VkExtent2D fxaa_size{ + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, }; + fxaa = std::make_unique(device, memory_allocator, image_count, fxaa_size); + } - { - VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *aa_image; - fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier); - } - - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *aa_renderpass, - .framebuffer = *aa_framebuffer, - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast(size.width), - .height = static_cast(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - switch (anti_alias_pass) { - case Settings::AntiAliasing::Fxaa: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); - break; - default: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); - break; - } - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, - aa_descriptor_sets[index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - - { - VkImageMemoryBarrier blit_read_barrier = base_barrier; - blit_read_barrier.image = *aa_image; - blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); - } - }); - source_image_view = *aa_image_view; + source_image_view = fxaa->Draw(scheduler, image_index, source_image, source_image_view); } if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Smaa) { if (!smaa) { @@ -496,6 +409,7 @@ void BlitScreen::CreateDynamicResources() { CreateRenderPass(); CreateGraphicsPipeline(); fsr.reset(); + fxaa.reset(); smaa.reset(); if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { CreateFSR(); @@ -520,6 +434,7 @@ void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { raw_height = framebuffer.height; pixel_format = framebuffer.pixel_format; + fxaa.reset(); smaa.reset(); ReleaseRawImages(); @@ -529,8 +444,6 @@ void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { void BlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); - fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV); - fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV); bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); @@ -553,13 +466,6 @@ void BlitScreen::CreateDescriptorPool() { }, }}; - const std::array pool_sizes_aa{{ - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast(image_count * 2), - }, - }}; - const VkDescriptorPoolCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, @@ -569,16 +475,6 @@ void BlitScreen::CreateDescriptorPool() { .pPoolSizes = pool_sizes.data(), }; descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); - - const VkDescriptorPoolCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast(image_count), - .poolSizeCount = static_cast(pool_sizes_aa.size()), - .pPoolSizes = pool_sizes_aa.data(), - }; - aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa); } void BlitScreen::CreateRenderPass() { @@ -659,23 +555,6 @@ void BlitScreen::CreateDescriptorSetLayout() { }, }}; - const std::array layout_bindings_aa{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = nullptr, - }, - }}; - const VkDescriptorSetLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -684,21 +563,11 @@ void BlitScreen::CreateDescriptorSetLayout() { .pBindings = layout_bindings.data(), }; - const VkDescriptorSetLayoutCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(layout_bindings_aa.size()), - .pBindings = layout_bindings_aa.data(), - }; - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); - aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa); } void BlitScreen::CreateDescriptorSets() { const std::vector layouts(image_count, *descriptor_set_layout); - const std::vector layouts_aa(image_count, *aa_descriptor_set_layout); const VkDescriptorSetAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, @@ -708,16 +577,7 @@ void BlitScreen::CreateDescriptorSets() { .pSetLayouts = layouts.data(), }; - const VkDescriptorSetAllocateInfo ai_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *aa_descriptor_pool, - .descriptorSetCount = static_cast(image_count), - .pSetLayouts = layouts_aa.data(), - }; - descriptor_sets = descriptor_pool.Allocate(ai); - aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa); } void BlitScreen::CreatePipelineLayout() { @@ -730,17 +590,7 @@ void BlitScreen::CreatePipelineLayout() { .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }; - const VkPipelineLayoutCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = aa_descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); - aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa); } void BlitScreen::CreateGraphicsPipeline() { @@ -1068,8 +918,6 @@ void BlitScreen::ReleaseRawImages() { scheduler.Wait(tick); } raw_images.clear(); - aa_image_view.reset(); - aa_image.reset(); buffer.reset(); } @@ -1150,198 +998,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images[i] = create_image(); raw_image_views[i] = create_image_view(raw_images[i]); } - - // AA Resources - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - aa_image = create_image(true, up_scale, down_shift); - aa_image_view = create_image_view(aa_image, true); - VkExtent2D size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - if (aa_renderpass) { - aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); - return; - } - aa_renderpass = CreateRenderPassImpl(VK_FORMAT_R16G16B16A16_SFLOAT); - aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); - - const std::array fxaa_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *fxaa_vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *fxaa_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const auto vertex_binding_description = ScreenRectVertex::GetDescription(); - const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - - const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &vertex_binding_description, - .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, - .pVertexAttributeDescriptions = vertex_attrs_description.data(), - }; - - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = VK_FALSE, - }; - - const VkPipelineViewportStateCreateInfo viewport_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = 1, - .pViewports = nullptr, - .scissorCount = 1, - .pScissors = nullptr, - }; - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisampling_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineColorBlendAttachmentState color_blend_attachment{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment, - .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, - }; - - static constexpr std::array dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(fxaa_shader_stages.size()), - .pStages = fxaa_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *aa_pipeline_layout, - .renderPass = *aa_renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - // AA - aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); -} - -void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const { - const VkDescriptorImageInfo image_info{ - .sampler = nn ? *nn_sampler : *sampler, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = aa_descriptor_sets[image_index], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - const VkWriteDescriptorSet sampler_write_2{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = aa_descriptor_sets[image_index], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); } void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 56ac47f089..d7f8effa23 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -34,6 +34,7 @@ namespace Vulkan { class Device; class FSR; +class FXAA; class RasterizerVulkan; class Scheduler; class SMAA; @@ -96,7 +97,6 @@ private: void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); void UpdateDescriptorSet(VkImageView image_view, bool nn) const; - void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout, u32 texture_width, @@ -119,8 +119,6 @@ private: std::size_t image_index{}; vk::ShaderModule vertex_shader; - vk::ShaderModule fxaa_vertex_shader; - vk::ShaderModule fxaa_fragment_shader; vk::ShaderModule bilinear_fragment_shader; vk::ShaderModule bicubic_fragment_shader; vk::ShaderModule gaussian_fragment_shader; @@ -128,7 +126,6 @@ private: vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; - vk::Pipeline nearest_neighbor_pipeline; vk::Pipeline bilinear_pipeline; vk::Pipeline bicubic_pipeline; vk::Pipeline gaussian_pipeline; @@ -145,16 +142,6 @@ private: std::vector raw_images; std::vector raw_image_views; - vk::DescriptorPool aa_descriptor_pool; - vk::DescriptorSetLayout aa_descriptor_set_layout; - vk::PipelineLayout aa_pipeline_layout; - vk::Pipeline aa_pipeline; - vk::RenderPass aa_renderpass; - vk::Framebuffer aa_framebuffer; - vk::DescriptorSets aa_descriptor_sets; - vk::Image aa_image; - vk::ImageView aa_image_view; - u32 raw_width = 0; u32 raw_height = 0; Service::android::PixelFormat pixel_format{}; @@ -163,6 +150,7 @@ private: std::unique_ptr fsr; std::unique_ptr smaa; + std::unique_ptr fxaa; }; } // namespace Vulkan From 0c2e5b64c9fb985a40e5afec898d1f370cbad23e Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 14 Jan 2024 01:46:19 -0500 Subject: [PATCH 05/15] renderer_vulkan: split up blit screen resources into separate antialias and window adapt passes --- src/video_core/CMakeLists.txt | 5 + .../renderer_vulkan/present/filters.cpp | 70 ++ .../renderer_vulkan/present/filters.h | 30 + .../renderer_vulkan/present/util.cpp | 50 + src/video_core/renderer_vulkan/present/util.h | 2 + .../present/window_adapt_pass.cpp | 512 ++++++++++ .../present/window_adapt_pass.h | 71 ++ .../renderer_vulkan/renderer_vulkan.cpp | 119 ++- .../renderer_vulkan/renderer_vulkan.h | 3 +- .../renderer_vulkan/vk_blit_screen.cpp | 949 +++--------------- .../renderer_vulkan/vk_blit_screen.h | 93 +- 11 files changed, 988 insertions(+), 916 deletions(-) create mode 100644 src/video_core/renderer_vulkan/present/filters.cpp create mode 100644 src/video_core/renderer_vulkan/present/filters.h create mode 100644 src/video_core/renderer_vulkan/present/window_adapt_pass.cpp create mode 100644 src/video_core/renderer_vulkan/present/window_adapt_pass.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 240b80c6e5..825815ebd1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -158,6 +158,9 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/util_shaders.cpp renderer_opengl/util_shaders.h + renderer_vulkan/present/anti_alias_pass.h + renderer_vulkan/present/filters.cpp + renderer_vulkan/present/filters.h renderer_vulkan/present/fsr.cpp renderer_vulkan/present/fsr.h renderer_vulkan/present/fxaa.cpp @@ -166,6 +169,8 @@ add_library(video_core STATIC renderer_vulkan/present/smaa.h renderer_vulkan/present/util.cpp renderer_vulkan/present/util.h + renderer_vulkan/present/window_adapt_pass.cpp + renderer_vulkan/present/window_adapt_pass.h renderer_vulkan/blit_image.cpp renderer_vulkan/blit_image.h renderer_vulkan/fixed_pipeline_state.cpp diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp new file mode 100644 index 0000000000..ee6239cc41 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/common_types.h" + +#include "video_core/host_shaders/present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_gaussian_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" +#include "video_core/renderer_vulkan/present/filters.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +namespace { + +vk::ShaderModule SelectScaleForceShader(const Device& device) { + if (device.IsFloat16Supported()) { + return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); + } else { + return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + } +} + +} // Anonymous namespace + +std::unique_ptr MakeNearestNeighbor(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format) { + return std::make_unique(device, memory_allocator, image_count, frame_format, + CreateNearestNeighborSampler(device), + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); +} + +std::unique_ptr MakeBilinear(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format) { + return std::make_unique(device, memory_allocator, image_count, frame_format, + CreateBilinearSampler(device), + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); +} + +std::unique_ptr MakeBicubic(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format) { + return std::make_unique(device, memory_allocator, image_count, frame_format, + CreateBilinearSampler(device), + BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); +} + +std::unique_ptr MakeGaussian(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format) { + return std::make_unique(device, memory_allocator, image_count, frame_format, + CreateBilinearSampler(device), + BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); +} + +std::unique_ptr MakeScaleForce(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format) { + return std::make_unique(device, memory_allocator, image_count, frame_format, + CreateBilinearSampler(device), + SelectScaleForceShader(device)); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h new file mode 100644 index 0000000000..42d7052da7 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/filters.h @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/present/window_adapt_pass.h" + +namespace Vulkan { + +std::unique_ptr MakeNearestNeighbor(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format); + +std::unique_ptr MakeBilinear(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format); + +std::unique_ptr MakeBicubic(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format); + +std::unique_ptr MakeGaussian(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format); + +std::unique_ptr MakeScaleForce(const Device& device, + const MemoryAllocator& memory_allocator, + size_t image_count, VkFormat frame_format); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index a445b213e6..cd60611014 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -441,6 +441,56 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector }; } +vk::Sampler CreateBilinearSampler(const Device& device) { + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + return device.GetLogical().CreateSampler(ci); +} + +vk::Sampler CreateNearestNeighborSampler(const Device& device) { + const VkSamplerCreateInfo ci_nn{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + return device.GetLogical().CreateSampler(ci_nn); +} + void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { static constexpr std::array subresources{{{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index 93cfdd16bd..ea9a26c3db 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -39,6 +39,8 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector& images, VkSampler sampler, VkImageView view, VkDescriptorSet set, u32 binding); +vk::Sampler CreateBilinearSampler(const Device& device); +vk::Sampler CreateNearestNeighborSampler(const Device& device); void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, VkExtent2D extent); diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp new file mode 100644 index 0000000000..7fd9ecd22d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp @@ -0,0 +1,512 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "core/frontend/framebuffer_layout.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/present/window_adapt_pass.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" + +namespace Vulkan { + +namespace { + +struct ScreenRectVertex { + ScreenRectVertex() = default; + explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} + + std::array position; + std::array tex_coord; + + static VkVertexInputBindingDescription GetDescription() { + return { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; + } + + static std::array GetAttributes() { + return {{ + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, position), + }, + { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, tex_coord), + }, + }}; + } +}; + +std::array MakeOrthographicMatrix(f32 width, f32 height) { + // clang-format off + return { 2.f / width, 0.f, 0.f, 0.f, + 0.f, 2.f / height, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + -1.f, -1.f, 0.f, 1.f}; + // clang-format on +} + +} // Anonymous namespace + +struct WindowAdaptPass::BufferData { + struct { + std::array modelview_matrix; + } uniform; + + std::array vertices; +}; + +WindowAdaptPass::WindowAdaptPass(const Device& device_, const MemoryAllocator& memory_allocator, + size_t num_images, VkFormat frame_format, vk::Sampler&& sampler_, + vk::ShaderModule&& fragment_shader_) + : device(device_), sampler(std::move(sampler_)), fragment_shader(std::move(fragment_shader_)) { + CreateDescriptorPool(num_images); + CreateDescriptorSetLayout(); + CreateDescriptorSets(num_images); + CreatePipelineLayout(); + CreateVertexShader(); + CreateRenderPass(frame_format); + CreatePipeline(); + CreateBuffer(memory_allocator); +} + +WindowAdaptPass::~WindowAdaptPass() = default; + +void WindowAdaptPass::Draw(Scheduler& scheduler, size_t image_index, VkImageView src_image_view, + VkExtent2D src_image_extent, const Common::Rectangle& crop_rect, + const Layout::FramebufferLayout& layout, Frame* dst) { + ConfigureLayout(image_index, src_image_view, layout, crop_rect); + + const VkFramebuffer host_framebuffer{*dst->framebuffer}; + const VkRenderPass renderpass{*render_pass}; + const VkPipeline graphics_pipeline{*pipeline}; + const VkDescriptorSet descriptor_set{descriptor_sets[image_index]}; + const VkExtent2D render_area{ + .width = dst->width, + .height = dst->height, + }; + + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearValue clear_color{ + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = host_framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = render_area, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(render_area.width), + .height = static_cast(render_area.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = render_area, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + +VkRenderPass WindowAdaptPass::GetRenderPass() { + return *render_pass; +} + +void WindowAdaptPass::CreateDescriptorPool(size_t num_images) { + const std::array pool_sizes{{ + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = static_cast(num_images), + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast(num_images), + }, + }}; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .maxSets = static_cast(num_images), + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); +} + +void WindowAdaptPass::CreateDescriptorSetLayout() { + const std::array layout_bindings{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, + }, + }}; + + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(layout_bindings.size()), + .pBindings = layout_bindings.data(), + }; + + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); +} + +void WindowAdaptPass::CreateDescriptorSets(size_t num_images) { + const std::vector layouts(num_images, *descriptor_set_layout); + descriptor_sets = CreateWrappedDescriptorSets(descriptor_pool, layouts); +} + +void WindowAdaptPass::CreateBuffer(const MemoryAllocator& memory_allocator) { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = sizeof(BufferData), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); +} + +void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) { + const VkAttachmentDescription color_attachment{ + .flags = 0, + .format = frame_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkAttachmentReference color_attachment_ref{ + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + + const VkSubpassDependency dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dependencyFlags = 0, + }; + + const VkRenderPassCreateInfo renderpass_ci{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &dependency, + }; + + render_pass = device.GetLogical().CreateRenderPass(renderpass_ci); +} + +void WindowAdaptPass::CreateVertexShader() { + vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); +} + +void WindowAdaptPass::CreatePipelineLayout() { + const VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); +} + +void WindowAdaptPass::SetUniformData(BufferData& data, + const Layout::FramebufferLayout& layout) const { + data.uniform.modelview_matrix = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); +} + +void WindowAdaptPass::SetVertexData(BufferData& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop) const { + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = static_cast(screen.left); + const auto y = static_cast(screen.top); + const auto w = static_cast(screen.GetWidth()); + const auto h = static_cast(screen.GetHeight()); + + data.vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); + data.vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); + data.vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); + data.vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); +} + +void WindowAdaptPass::UpdateDescriptorSet(size_t image_index, VkImageView image_view) { + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = offsetof(BufferData, uniform), + .range = sizeof(BufferData::uniform), + }; + + const VkWriteDescriptorSet ubo_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + const VkDescriptorImageInfo image_info{ + .sampler = *sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); +} + +void WindowAdaptPass::ConfigureLayout(size_t image_index, VkImageView image_view, + const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop_rect) { + BufferData data; + SetUniformData(data, layout); + SetVertexData(data, layout, crop_rect); + + const std::span mapped_span = buffer.Mapped(); + std::memcpy(mapped_span.data(), &data, sizeof(data)); + + UpdateDescriptorSet(image_index, image_view); +} + +void WindowAdaptPass::CreatePipeline() { + const std::array shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const auto vertex_binding_description = ScreenRectVertex::GetDescription(); + const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); + + const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &vertex_binding_description, + .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, + .pVertexAttributeDescriptions = vertex_attrs_description.data(), + }; + + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = VK_FALSE, + }; + + const VkPipelineViewportStateCreateInfo viewport_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, + }; + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisampling_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineColorBlendAttachmentState color_blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + + static constexpr std::array dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const VkGraphicsPipelineCreateInfo pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *render_pass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.h b/src/video_core/renderer_vulkan/present/window_adapt_pass.h new file mode 100644 index 0000000000..5309233a2c --- /dev/null +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.h @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace Vulkan { + +class Device; +struct Frame; +class MemoryAllocator; +class Scheduler; + +class WindowAdaptPass final { +public: + explicit WindowAdaptPass(const Device& device, const MemoryAllocator& memory_allocator, + size_t num_images, VkFormat frame_format, vk::Sampler&& sampler, + vk::ShaderModule&& fragment_shader); + ~WindowAdaptPass(); + + void Draw(Scheduler& scheduler, size_t image_index, VkImageView src_image_view, + VkExtent2D src_image_extent, const Common::Rectangle& crop_rect, + const Layout::FramebufferLayout& layout, Frame* dst); + + VkRenderPass GetRenderPass(); + +private: + struct BufferData; + + void SetUniformData(BufferData& data, const Layout::FramebufferLayout& layout) const; + void SetVertexData(BufferData& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop_rect) const; + void UpdateDescriptorSet(size_t image_index, VkImageView image_view); + void ConfigureLayout(size_t image_index, VkImageView image_view, + const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop_rect); + + void CreateDescriptorPool(size_t num_images); + void CreateDescriptorSetLayout(); + void CreateDescriptorSets(size_t num_images); + void CreatePipelineLayout(); + void CreateVertexShader(); + void CreateRenderPass(VkFormat frame_format); + void CreatePipeline(); + void CreateBuffer(const MemoryAllocator& memory_allocator); + +private: + const Device& device; + vk::DescriptorPool descriptor_pool; + vk::DescriptorSetLayout descriptor_set_layout; + vk::DescriptorSets descriptor_sets; + vk::PipelineLayout pipeline_layout; + vk::Sampler sampler; + vk::ShaderModule vertex_shader; + vk::ShaderModule fragment_shader; + vk::RenderPass render_pass; + vk::Pipeline pipeline; + vk::Buffer buffer; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e1fe53bbd9..2912aaff6a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,8 +97,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, render_window.GetFramebufferLayout().height), present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, surface), - blit_screen(device_memory, render_window, device, memory_allocator, swapchain, - present_manager, scheduler), + blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler), + blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler), rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { @@ -127,7 +127,9 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { RenderScreenshot(*framebuffer); Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(rasterizer, frame, *framebuffer); + blit_swapchain.DrawToFrame(rasterizer, frame, *framebuffer, + render_window.GetFramebufferLayout(), swapchain.GetImageCount(), + swapchain.GetImageViewFormat()); scheduler.Flush(*frame->render_ready); present_manager.Present(frame); @@ -166,54 +168,65 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr return; } const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .extent = - { - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); + auto frame = [&]() { + vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_B8G8R8A8_UNORM, + .extent = + { + .width = layout.width, + .height = layout.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); - const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *staging_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .components{ - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - const VkExtent2D render_area{.width = layout.width, .height = layout.height}; - const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); - blit_screen.Draw(rasterizer, framebuffer, *screenshot_fb, layout, render_area); + vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *staging_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_B8G8R8A8_UNORM, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + vk::Framebuffer screenshot_fb = + blit_screenshot.CreateFramebuffer(layout, *dst_view, VK_FORMAT_B8G8R8A8_UNORM); + return Frame{ + .width = layout.width, + .height = layout.height, + .image = std::move(staging_image), + .image_view = std::move(dst_view), + .framebuffer = std::move(screenshot_fb), + }; + }(); + + blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffer, layout, 1, + VK_FORMAT_B8G8R8A8_UNORM); const auto buffer_size = static_cast(layout.width * layout.height * 4); const VkBufferCreateInfo dst_buffer_info{ @@ -240,7 +253,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *staging_image, + .image = *frame.image, .subresourceRange{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -258,7 +271,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *staging_image, + .image = *frame.image, .subresourceRange{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -292,7 +305,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); - cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, + cmdbuf.CopyImageToBuffer(*frame.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, copy); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, nullptr, image_write_barrier); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index d7d006b202..5b0560e684 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -78,7 +78,8 @@ private: Scheduler scheduler; Swapchain swapchain; PresentManager present_manager; - BlitScreen blit_screen; + BlitScreen blit_swapchain; + BlitScreen blit_screenshot; RasterizerVulkan rasterizer; std::optional turbo_mode; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index fe1a7b0cd3..fd7c287791 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -9,19 +9,12 @@ #include "common/assert.h" #include "common/common_types.h" -#include "common/math_util.h" -#include "common/polyfill_ranges.h" #include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "video_core/gpu.h" #include "video_core/host1x/gpu_device_memory_manager.h" -#include "video_core/host_shaders/present_bicubic_frag_spv.h" -#include "video_core/host_shaders/present_gaussian_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_vert_spv.h" +#include "video_core/renderer_vulkan/present/filters.h" #include "video_core/renderer_vulkan/present/fsr.h" #include "video_core/renderer_vulkan/present/fxaa.h" #include "video_core/renderer_vulkan/present/smaa.h" @@ -29,7 +22,6 @@ #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -40,48 +32,6 @@ namespace Vulkan { namespace { -struct ScreenRectVertex { - ScreenRectVertex() = default; - explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} - - std::array position; - std::array tex_coord; - - static VkVertexInputBindingDescription GetDescription() { - return { - .binding = 0, - .stride = sizeof(ScreenRectVertex), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }; - } - - static std::array GetAttributes() { - return {{ - { - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, position), - }, - { - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, tex_coord), - }, - }}; - } -}; - -std::array MakeOrthographicMatrix(f32 width, f32 height) { - // clang-format off - return { 2.f / width, 0.f, 0.f, 0.f, - 0.f, 2.f / height, 0.f, 0.f, - 0.f, 0.f, 1.f, 0.f, - -1.f, -1.f, 0.f, 1.f}; - // clang-format on -} - u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { using namespace VideoCore::Surface; return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); @@ -110,43 +60,82 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { } // Anonymous namespace -struct BlitScreen::BufferData { - struct { - std::array modelview_matrix; - } uniform; - - std::array vertices; - - // Unaligned image data goes here -}; - -BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, - Core::Frontend::EmuWindow& render_window_, const Device& device_, - MemoryAllocator& memory_allocator_, Swapchain& swapchain_, - PresentManager& present_manager_, Scheduler& scheduler_) - : device_memory{device_memory_}, render_window{render_window_}, device{device_}, - memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, - scheduler{scheduler_}, image_count{swapchain.GetImageCount()} { - resource_ticks.resize(image_count); - swapchain_view_format = swapchain.GetImageViewFormat(); - - CreateStaticResources(); - CreateDynamicResources(); -} +BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, + MemoryAllocator& memory_allocator_, PresentManager& present_manager_, + Scheduler& scheduler_) + : device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_}, + present_manager{present_manager_}, scheduler{scheduler_}, image_count{1}, + swapchain_view_format{VK_FORMAT_B8G8R8A8_UNORM} {} BlitScreen::~BlitScreen() = default; -void BlitScreen::Recreate() { +void BlitScreen::WaitIdle() { present_manager.WaitPresent(); scheduler.Finish(); device.GetLogical().WaitIdle(); - CreateDynamicResources(); +} + +void BlitScreen::SetWindowAdaptPass(const Layout::FramebufferLayout& layout) { + scaling_filter = Settings::values.scaling_filter.GetValue(); + + const VkExtent2D adapt_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), + }; + + fsr.reset(); + + switch (scaling_filter) { + case Settings::ScalingFilter::NearestNeighbor: + window_adapt = + MakeNearestNeighbor(device, memory_allocator, image_count, swapchain_view_format); + break; + case Settings::ScalingFilter::Bicubic: + window_adapt = MakeBicubic(device, memory_allocator, image_count, swapchain_view_format); + break; + case Settings::ScalingFilter::Gaussian: + window_adapt = MakeGaussian(device, memory_allocator, image_count, swapchain_view_format); + break; + case Settings::ScalingFilter::ScaleForce: + window_adapt = MakeScaleForce(device, memory_allocator, image_count, swapchain_view_format); + break; + case Settings::ScalingFilter::Fsr: + fsr = std::make_unique(device, memory_allocator, image_count, adapt_size); + [[fallthrough]]; + case Settings::ScalingFilter::Bilinear: + default: + window_adapt = MakeBilinear(device, memory_allocator, image_count, swapchain_view_format); + break; + } +} + +void BlitScreen::SetAntiAliasPass() { + if (anti_alias && anti_aliasing == Settings::values.anti_aliasing.GetValue()) { + return; + } + + anti_aliasing = Settings::values.anti_aliasing.GetValue(); + + const VkExtent2D render_area{ + .width = Settings::values.resolution_info.ScaleUp(raw_width), + .height = Settings::values.resolution_info.ScaleUp(raw_height), + }; + + switch (anti_aliasing) { + case Settings::AntiAliasing::Fxaa: + anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + break; + case Settings::AntiAliasing::Smaa: + anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + break; + default: + anti_alias = std::make_unique(); + break; + } } void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, - const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, - VkExtent2D render_area) { - + const Layout::FramebufferLayout& layout, Frame* dst) { const auto texture_info = rasterizer.AccelerateDisplay( framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; @@ -156,23 +145,19 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf const bool use_accelerated = texture_info.has_value(); RefreshResources(framebuffer); + SetAntiAliasPass(); // Finish any pending renderpass scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Wait(resource_ticks[image_index]); - resource_ticks[image_index] = scheduler.CurrentTick(); + SCOPE_EXIT({ resource_ticks[image_index] = scheduler.CurrentTick(); }); VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index]; VkImageView source_image_view = texture_info ? texture_info->image_view : *raw_image_views[image_index]; - BufferData data; - SetUniformData(data, layout); - SetVertexData(data, framebuffer, layout, texture_width, texture_height); - const std::span mapped_span = buffer.Mapped(); - std::memcpy(mapped_span.data(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer); @@ -249,145 +234,109 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf }); } - const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); - if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { - if (!fxaa) { - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - const VkExtent2D fxaa_size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - fxaa = std::make_unique(device, memory_allocator, image_count, fxaa_size); - } + source_image_view = anti_alias->Draw(scheduler, image_index, source_image, source_image_view); + + const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); + const VkExtent2D render_extent{ + .width = scaled_width, + .height = scaled_height, + }; - source_image_view = fxaa->Draw(scheduler, image_index, source_image, source_image_view); - } - if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Smaa) { - if (!smaa) { - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - const VkExtent2D smaa_size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - CreateSMAA(smaa_size); - } - source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); - } if (fsr) { - const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); - const VkExtent2D fsr_input_size{ - .width = scaled_width, - .height = scaled_height, + const VkExtent2D adapt_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), }; - VkImageView fsr_image_view = - fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); - UpdateDescriptorSet(fsr_image_view, true); + + source_image_view = + fsr->Draw(scheduler, image_index, source_image_view, render_extent, crop_rect); + + const Common::Rectangle output_crop{0, 0, 1, 1}; + window_adapt->Draw(scheduler, image_index, source_image_view, adapt_size, output_crop, + layout, dst); } else { - const bool is_nn = - Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; - UpdateDescriptorSet(source_image_view, is_nn); + window_adapt->Draw(scheduler, image_index, source_image_view, render_extent, crop_rect, + layout, dst); } - - scheduler.Record([this, host_framebuffer, index = image_index, - size = render_area](vk::CommandBuffer cmdbuf) { - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *renderpass, - .framebuffer = host_framebuffer, - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast(size.width), - .height = static_cast(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - auto graphics_pipeline = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return *bilinear_pipeline; - case Settings::ScalingFilter::Bicubic: - return *bicubic_pipeline; - case Settings::ScalingFilter::Gaussian: - return *gaussian_pipeline; - case Settings::ScalingFilter::ScaleForce: - return *scaleforce_pipeline; - default: - return *bilinear_pipeline; - } - }(); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_sets[index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - }); } -void BlitScreen::DrawToSwapchain(RasterizerVulkan& rasterizer, Frame* frame, - const Tegra::FramebufferConfig& framebuffer) { +void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout, size_t swapchain_images, + VkFormat current_swapchain_view_format) { + bool resource_update_required = false; + bool presentation_recreate_required = false; + + // Recreate dynamic resources if the adapting filter changed + if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue()) { + resource_update_required = true; + } + // Recreate dynamic resources if the the image count or input format changed - const VkFormat current_framebuffer_format = + const VkFormat old_framebuffer_format = std::exchange(framebuffer_view_format, GetFormat(framebuffer)); - if (const std::size_t swapchain_images = swapchain.GetImageCount(); - swapchain_images != image_count || current_framebuffer_format != framebuffer_view_format) { + if (swapchain_images != image_count || old_framebuffer_format != framebuffer_view_format) { image_count = swapchain_images; - Recreate(); + resource_update_required = true; } - // Recreate the presentation frame if the dimensions of the window changed - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); - if (layout.width != frame->width || layout.height != frame->height) { - Recreate(); - present_manager.RecreateFrame(frame, layout.width, layout.height, swapchain_view_format, - *renderpass); + // Recreate the presentation frame if the format or dimensions of the window changed + const VkFormat old_swapchain_view_format = + std::exchange(swapchain_view_format, current_swapchain_view_format); + if (old_swapchain_view_format != current_swapchain_view_format || + layout.width != frame->width || layout.height != frame->height) { + resource_update_required = true; + presentation_recreate_required = true; } - const VkExtent2D render_area{frame->width, frame->height}; - Draw(rasterizer, framebuffer, *frame->framebuffer, layout, render_area); + // If we have a pending resource update, perform it + if (resource_update_required) { + // Wait for idle to ensure no resources are in use + WaitIdle(); + + // Set new number of resource ticks + resource_ticks.resize(swapchain_images); + + // Update window adapt pass + SetWindowAdaptPass(layout); + + // Update frame format if needed + if (presentation_recreate_required) { + present_manager.RecreateFrame(frame, layout.width, layout.height, swapchain_view_format, + window_adapt->GetRenderPass()); + } + } + + Draw(rasterizer, framebuffer, layout, frame); if (++image_index >= image_count) { image_index = 0; } } -vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { - return CreateFramebuffer(image_view, extent, renderpass); +vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& layout, + const VkImageView& image_view, + VkFormat current_view_format) { + const bool format_updated = + std::exchange(swapchain_view_format, current_view_format) != current_view_format; + if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() || + format_updated) { + WaitIdle(); + SetWindowAdaptPass(layout); + } + const VkExtent2D extent{ + .width = layout.width, + .height = layout.height, + }; + return CreateFramebuffer(image_view, extent, window_adapt->GetRenderPass()); } vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, - vk::RenderPass& rd) { + VkRenderPass render_pass) { return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .renderPass = *rd, + .renderPass = render_pass, .attachmentCount = 1, .pAttachments = &image_view, .width = extent.width, @@ -396,35 +345,7 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkE }); } -void BlitScreen::CreateStaticResources() { - CreateShaders(); - CreateSampler(); -} - -void BlitScreen::CreateDynamicResources() { - CreateDescriptorPool(); - CreateDescriptorSetLayout(); - CreateDescriptorSets(); - CreatePipelineLayout(); - CreateRenderPass(); - CreateGraphicsPipeline(); - fsr.reset(); - fxaa.reset(); - smaa.reset(); - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - CreateFSR(); - } -} - void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr) { - CreateFSR(); - } - } else { - fsr.reset(); - } - if (framebuffer.width == raw_width && framebuffer.height == raw_height && framebuffer.pixel_format == pixel_format && !raw_images.empty()) { return; @@ -433,486 +354,13 @@ void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { raw_width = framebuffer.width; raw_height = framebuffer.height; pixel_format = framebuffer.pixel_format; + anti_alias.reset(); - fxaa.reset(); - smaa.reset(); ReleaseRawImages(); - CreateStagingBuffer(framebuffer); CreateRawImages(framebuffer); } -void BlitScreen::CreateShaders() { - vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); - bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); - bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); - gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); - if (device.IsFloat16Supported()) { - scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); - } else { - scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); - } -} - -void BlitScreen::CreateDescriptorPool() { - const std::array pool_sizes{{ - { - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = static_cast(image_count), - }, - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast(image_count), - }, - }}; - - const VkDescriptorPoolCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast(image_count), - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); -} - -void BlitScreen::CreateRenderPass() { - renderpass = CreateRenderPassImpl(swapchain_view_format); -} - -vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) { - const VkAttachmentDescription color_attachment{ - .flags = 0, - .format = format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkAttachmentReference color_attachment_ref{ - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = 1, - .pColorAttachments = &color_attachment_ref, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - const VkSubpassDependency dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dependencyFlags = 0, - }; - - const VkRenderPassCreateInfo renderpass_ci{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = 1, - .pAttachments = &color_attachment, - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &dependency, - }; - - return device.GetLogical().CreateRenderPass(renderpass_ci); -} - -void BlitScreen::CreateDescriptorSetLayout() { - const std::array layout_bindings{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = nullptr, - }, - }}; - - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(layout_bindings.size()), - .pBindings = layout_bindings.data(), - }; - - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); -} - -void BlitScreen::CreateDescriptorSets() { - const std::vector layouts(image_count, *descriptor_set_layout); - - const VkDescriptorSetAllocateInfo ai{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *descriptor_pool, - .descriptorSetCount = static_cast(image_count), - .pSetLayouts = layouts.data(), - }; - - descriptor_sets = descriptor_pool.Allocate(ai); -} - -void BlitScreen::CreatePipelineLayout() { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); -} - -void BlitScreen::CreateGraphicsPipeline() { - const std::array bilinear_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *bilinear_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array bicubic_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *bicubic_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array gaussian_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *gaussian_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array scaleforce_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *scaleforce_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const auto vertex_binding_description = ScreenRectVertex::GetDescription(); - const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - - const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &vertex_binding_description, - .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, - .pVertexAttributeDescriptions = vertex_attrs_description.data(), - }; - - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = VK_FALSE, - }; - - const VkPipelineViewportStateCreateInfo viewport_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = 1, - .pViewports = nullptr, - .scissorCount = 1, - .pScissors = nullptr, - }; - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisampling_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineColorBlendAttachmentState color_blend_attachment{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment, - .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, - }; - - static constexpr std::array dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(bilinear_shader_stages.size()), - .pStages = bilinear_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(bicubic_shader_stages.size()), - .pStages = bicubic_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(gaussian_shader_stages.size()), - .pStages = gaussian_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(scaleforce_shader_stages.size()), - .pStages = scaleforce_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); - bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); - gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci); - scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); -} - -void BlitScreen::CreateSampler() { - const VkSamplerCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; - - const VkSamplerCreateInfo ci_nn{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; - - sampler = device.GetLogical().CreateSampler(ci); - nn_sampler = device.GetLogical().CreateSampler(ci_nn); -} - void BlitScreen::ReleaseRawImages() { for (const u64 tick : resource_ticks) { scheduler.Wait(tick); @@ -1000,109 +448,12 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { } } -void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { - const VkDescriptorBufferInfo buffer_info{ - .buffer = *buffer, - .offset = offsetof(BufferData, uniform), - .range = sizeof(BufferData::uniform), - }; - - const VkWriteDescriptorSet ubo_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .pImageInfo = nullptr, - .pBufferInfo = &buffer_info, - .pTexelBufferView = nullptr, - }; - - const VkDescriptorImageInfo image_info{ - .sampler = nn ? *nn_sampler : *sampler, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); -} - -void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const { - data.uniform.modelview_matrix = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); -} - -void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout, u32 texture_width, - u32 texture_height) const { - f32 left, top, right, bottom; - - if (fsr) { - // FSR has already applied the crop, so we just want to render the image - // it has produced. - left = 0; - top = 0; - right = 1; - bottom = 1; - } else { - // Get the normalized crop rectangle. - const auto crop = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); - - // Apply the crop. - left = crop.left; - top = crop.top; - right = crop.right; - bottom = crop.bottom; - } - - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = static_cast(screen.left); - const auto y = static_cast(screen.top); - const auto w = static_cast(screen.GetWidth()); - const auto h = static_cast(screen.GetHeight()); - - data.vertices[0] = ScreenRectVertex(x, y, left, top); - data.vertices[1] = ScreenRectVertex(x + w, y, right, top); - data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom); - data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom); -} - -void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { - smaa = std::make_unique(device, memory_allocator, image_count, smaa_size); -} - -void BlitScreen::CreateFSR() { - const auto& layout = render_window.GetFramebufferLayout(); - const VkExtent2D fsr_size{ - .width = layout.screen.GetWidth(), - .height = layout.screen.GetHeight(), - }; - fsr = std::make_unique(device, memory_allocator, image_count, fsr_size); -} - u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { - return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; + return GetSizeInBytes(framebuffer) * image_count; } u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { - constexpr auto first_image_offset = static_cast(sizeof(BufferData)); - return first_image_offset + GetSizeInBytes(framebuffer) * image_index; + return GetSizeInBytes(framebuffer) * image_index; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index d7f8effa23..555b3d82ef 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -30,16 +30,20 @@ namespace Service::android { enum class PixelFormat : u32; } +namespace Settings { +enum class AntiAliasing : u32; +enum class ScalingFilter : u32; +} // namespace Settings + namespace Vulkan { +class AntiAliasPass; class Device; class FSR; -class FXAA; class RasterizerVulkan; class Scheduler; -class SMAA; -class Swapchain; class PresentManager; +class WindowAdaptPass; struct Frame; @@ -54,103 +58,66 @@ struct FramebufferTextureInfo { class BlitScreen { public: - explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, - Core::Frontend::EmuWindow& render_window, const Device& device, - MemoryAllocator& memory_manager, Swapchain& swapchain, - PresentManager& present_manager, Scheduler& scheduler); + explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device, + MemoryAllocator& memory_allocator, PresentManager& present_manager, + Scheduler& scheduler); ~BlitScreen(); - void Recreate(); + void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout, size_t swapchain_images, + VkFormat current_swapchain_view_format); - void Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, - const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, - VkExtent2D render_area); - - void DrawToSwapchain(RasterizerVulkan& rasterizer, Frame* frame, - const Tegra::FramebufferConfig& framebuffer); - - [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, - VkExtent2D extent); - - [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, - VkExtent2D extent, vk::RenderPass& rd); + [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout, + const VkImageView& image_view, + VkFormat current_view_format); private: - struct BufferData; + void WaitIdle(); + void SetWindowAdaptPass(const Layout::FramebufferLayout& layout); + void SetAntiAliasPass(); - void CreateStaticResources(); - void CreateShaders(); - void CreateDescriptorPool(); - void CreateRenderPass(); - vk::RenderPass CreateRenderPassImpl(VkFormat format); - void CreateDescriptorSetLayout(); - void CreateDescriptorSets(); - void CreatePipelineLayout(); - void CreateGraphicsPipeline(); - void CreateSampler(); + void Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout, Frame* dst); - void CreateDynamicResources(); + vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, + VkRenderPass render_pass); void RefreshResources(const Tegra::FramebufferConfig& framebuffer); void ReleaseRawImages(); void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(VkImageView image_view, bool nn) const; - void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; - void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout, u32 texture_width, - u32 texture_height) const; - - void CreateSMAA(VkExtent2D smaa_size); - void CreateFSR(); - u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; Tegra::MaxwellDeviceMemoryManager& device_memory; - Core::Frontend::EmuWindow& render_window; const Device& device; MemoryAllocator& memory_allocator; - Swapchain& swapchain; PresentManager& present_manager; Scheduler& scheduler; std::size_t image_count; std::size_t image_index{}; - vk::ShaderModule vertex_shader; - vk::ShaderModule bilinear_fragment_shader; - vk::ShaderModule bicubic_fragment_shader; - vk::ShaderModule gaussian_fragment_shader; - vk::ShaderModule scaleforce_fragment_shader; - vk::DescriptorPool descriptor_pool; - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::Pipeline bilinear_pipeline; - vk::Pipeline bicubic_pipeline; - vk::Pipeline gaussian_pipeline; - vk::Pipeline scaleforce_pipeline; - vk::RenderPass renderpass; - vk::DescriptorSets descriptor_sets; - vk::Sampler nn_sampler; - vk::Sampler sampler; - vk::Buffer buffer; std::vector resource_ticks; std::vector raw_images; std::vector raw_image_views; - u32 raw_width = 0; u32 raw_height = 0; + Service::android::PixelFormat pixel_format{}; VkFormat framebuffer_view_format; VkFormat swapchain_view_format; + Settings::AntiAliasing anti_aliasing{}; + Settings::ScalingFilter scaling_filter{}; + std::unique_ptr fsr; - std::unique_ptr smaa; - std::unique_ptr fxaa; + std::unique_ptr anti_alias; + std::unique_ptr window_adapt; }; } // namespace Vulkan From b90eff4bc666548a77eb58ac152408c80ff952b3 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 14 Jan 2024 21:11:28 -0500 Subject: [PATCH 06/15] renderer_opengl: split out SMAA --- src/video_core/CMakeLists.txt | 7 +- .../renderer_opengl/gl_blit_screen.cpp | 90 ++------------- .../renderer_opengl/gl_blit_screen.h | 17 +-- .../{gl_fsr.cpp => present/fsr.cpp} | 2 +- .../{gl_fsr.h => present/fsr.h} | 0 .../renderer_opengl/present/smaa.cpp | 108 ++++++++++++++++++ src/video_core/renderer_opengl/present/smaa.h | 35 ++++++ src/video_core/renderer_opengl/present/util.h | 32 ++++++ .../renderer_opengl/renderer_opengl.cpp | 1 - .../renderer_opengl/renderer_opengl.h | 1 - 10 files changed, 197 insertions(+), 96 deletions(-) rename src/video_core/renderer_opengl/{gl_fsr.cpp => present/fsr.cpp} (98%) rename src/video_core/renderer_opengl/{gl_fsr.h => present/fsr.h} (100%) create mode 100644 src/video_core/renderer_opengl/present/smaa.cpp create mode 100644 src/video_core/renderer_opengl/present/smaa.h create mode 100644 src/video_core/renderer_opengl/present/util.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 825815ebd1..524e2cae81 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -116,6 +116,11 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/present/fsr.cpp + renderer_opengl/present/fsr.h + renderer_opengl/present/smaa.cpp + renderer_opengl/present/smaa.h + renderer_opengl/present/util.h renderer_opengl/blit_image.cpp renderer_opengl/blit_image.h renderer_opengl/gl_blit_screen.cpp @@ -129,8 +134,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_fsr.cpp - renderer_opengl/gl_fsr.h renderer_opengl/gl_graphics_pipeline.cpp renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 88757ba388..cc343f1713 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -13,22 +13,16 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/host_shaders/opengl_smaa_glsl.h" #include "video_core/host_shaders/present_bicubic_frag.h" #include "video_core/host_shaders/present_gaussian_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" -#include "video_core/host_shaders/smaa_edge_detection_frag.h" -#include "video_core/host_shaders/smaa_edge_detection_vert.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" + #include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/smaa_area_tex.h" -#include "video_core/smaa_search_tex.h" +#include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/smaa.h" #include "video_core/textures/decoders.h" namespace OpenGL { @@ -84,24 +78,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, shader_source.replace(pos, include_string.size(), include_content); }; - const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { - std::string shader_source{specialized_source}; - replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); - return CreateProgram(shader_source, stage); - }; - - smaa_edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); - smaa_edge_detection_frag = - SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); - smaa_blending_weight_calculation_vert = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); - smaa_blending_weight_calculation_frag = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); - smaa_neighborhood_blending_vert = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); - smaa_neighborhood_blending_frag = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); - present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); @@ -157,15 +133,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, aa_framebuffer.Create(); - smaa_area_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); - glTextureSubImage2D(smaa_area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, - GL_UNSIGNED_BYTE, areaTexBytes); - smaa_search_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); - glTextureSubImage2D(smaa_search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, - GL_UNSIGNED_BYTE, searchTexBytes); - // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -176,6 +143,8 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, } } +BlitScreen::~BlitScreen() = default; + FramebufferTextureInfo BlitScreen::PrepareRenderTarget( const Tegra::FramebufferConfig& framebuffer) { // If framebuffer is provided, reload it from memory to a texture @@ -281,16 +250,10 @@ void BlitScreen::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& fra Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); - smaa_edges_tex.Release(); - smaa_edges_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - smaa_blend_tex.Release(); - smaa_blend_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + + smaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); } void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, @@ -363,39 +326,10 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glBindTextureUnit(0, aa_texture.handle); } break; case Settings::AntiAliasing::Smaa: { - glClearColor(0, 0, 0, 0); - glFrontFace(GL_CCW); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glBindSampler(1, present_sampler.handle); - glBindSampler(2, present_sampler.handle); - - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_edges_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_edge_detection_vert.handle, - smaa_edge_detection_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, smaa_edges_tex.handle); - glBindTextureUnit(1, smaa_area_tex.handle); - glBindTextureUnit(2, smaa_search_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_blend_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_blending_weight_calculation_vert.handle, - smaa_blending_weight_calculation_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, info.display_texture); - glBindTextureUnit(1, smaa_blend_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - aa_texture.handle, 0); - program_manager.BindPresentPrograms(smaa_neighborhood_blending_vert.handle, - smaa_neighborhood_blending_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - glFrontFace(GL_CW); + glBindTextureUnit(0, smaa->Draw(program_manager, info.display_texture)); } break; default: UNREACHABLE(); @@ -403,8 +337,6 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - - glBindTextureUnit(0, aa_texture.handle); } glDisablei(GL_SCISSOR_TEST, 0); diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h index 13d769958c..945c7226ac 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.h +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -8,7 +8,6 @@ #include "core/hle/service/nvnflinger/pixel_format.h" #include "video_core/host1x/gpu_device_memory_manager.h" -#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace Layout { @@ -22,7 +21,10 @@ struct FramebufferConfig; namespace OpenGL { class Device; +class FSR; +class ProgramManager; class RasterizerOpenGL; +class SMAA; class StateTracker; /// Structure used for storing information about the textures for the Switch screen @@ -50,6 +52,7 @@ public: Tegra::MaxwellDeviceMemoryManager& device_memory, StateTracker& state_tracker, ProgramManager& program_manager, Device& device); + ~BlitScreen(); void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); @@ -87,18 +90,8 @@ private: OGLTexture aa_texture; OGLFramebuffer aa_framebuffer; - OGLProgram smaa_edge_detection_vert; - OGLProgram smaa_blending_weight_calculation_vert; - OGLProgram smaa_neighborhood_blending_vert; - OGLProgram smaa_edge_detection_frag; - OGLProgram smaa_blending_weight_calculation_frag; - OGLProgram smaa_neighborhood_blending_frag; - OGLTexture smaa_area_tex; - OGLTexture smaa_search_tex; - OGLTexture smaa_edges_tex; - OGLTexture smaa_blend_tex; - std::unique_ptr fsr; + std::unique_ptr smaa; /// OpenGL framebuffer data std::vector gl_framebuffer_data; diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/present/fsr.cpp similarity index 98% rename from src/video_core/renderer_opengl/gl_fsr.cpp rename to src/video_core/renderer_opengl/present/fsr.cpp index 429dcdc6ca..e5945b80bb 100644 --- a/src/video_core/renderer_opengl/gl_fsr.cpp +++ b/src/video_core/renderer_opengl/present/fsr.cpp @@ -3,9 +3,9 @@ #include "common/settings.h" #include "video_core/fsr.h" -#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/fsr.h" namespace OpenGL { using namespace FSR; diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/present/fsr.h similarity index 100% rename from src/video_core/renderer_opengl/gl_fsr.h rename to src/video_core/renderer_opengl/present/fsr.h diff --git a/src/video_core/renderer_opengl/present/smaa.cpp b/src/video_core/renderer_opengl/present/smaa.cpp new file mode 100644 index 0000000000..a9a0eb6c69 --- /dev/null +++ b/src/video_core/renderer_opengl/present/smaa.cpp @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/opengl_smaa_glsl.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" +#include "video_core/host_shaders/smaa_edge_detection_frag.h" +#include "video_core/host_shaders/smaa_edge_detection_vert.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/renderer_opengl/present/util.h" +#include "video_core/smaa_area_tex.h" +#include "video_core/smaa_search_tex.h" + +namespace OpenGL { + +SMAA::SMAA(u32 width, u32 height) { + const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { + std::string shader_source{specialized_source}; + ReplaceInclude(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); + return CreateProgram(shader_source, stage); + }; + + edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); + edge_detection_frag = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); + blending_weight_calculation_vert = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); + blending_weight_calculation_frag = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); + neighborhood_blending_vert = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); + neighborhood_blending_frag = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); + glPixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + + area_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); + glTextureSubImage2D(area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, + GL_UNSIGNED_BYTE, areaTexBytes); + search_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); + glTextureSubImage2D(search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, + GL_UNSIGNED_BYTE, searchTexBytes); + + edges_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(edges_tex.handle, 1, GL_RG16F, width, height); + + blend_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(blend_tex.handle, 1, GL_RGBA16F, width, height); + + sampler = CreateBilinearSampler(); + + framebuffer.Create(); + + texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); +} + +SMAA::~SMAA() = default; + +GLuint SMAA::Draw(ProgramManager& program_manager, GLuint input_texture) { + glClearColor(0, 0, 0, 0); + glFrontFace(GL_CCW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glBindSampler(0, sampler.handle); + glBindSampler(1, sampler.handle); + glBindSampler(2, sampler.handle); + + glBindTextureUnit(0, input_texture); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, edges_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(edge_detection_vert.handle, edge_detection_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, edges_tex.handle); + glBindTextureUnit(1, area_tex.handle); + glBindTextureUnit(2, search_tex.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, blend_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(blending_weight_calculation_vert.handle, + blending_weight_calculation_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, input_texture); + glBindTextureUnit(1, blend_tex.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); + program_manager.BindPresentPrograms(neighborhood_blending_vert.handle, + neighborhood_blending_frag.handle); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + glFrontFace(GL_CW); + + return texture.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/smaa.h b/src/video_core/renderer_opengl/present/smaa.h new file mode 100644 index 0000000000..a48cb4fa9c --- /dev/null +++ b/src/video_core/renderer_opengl/present/smaa.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class SMAA { +public: + explicit SMAA(u32 width, u32 height); + ~SMAA(); + + GLuint Draw(ProgramManager& program_manager, GLuint input_texture); + +private: + OGLProgram edge_detection_vert; + OGLProgram blending_weight_calculation_vert; + OGLProgram neighborhood_blending_vert; + OGLProgram edge_detection_frag; + OGLProgram blending_weight_calculation_frag; + OGLProgram neighborhood_blending_frag; + OGLTexture area_tex; + OGLTexture search_tex; + OGLTexture edges_tex; + OGLTexture blend_tex; + OGLSampler sampler; + OGLFramebuffer framebuffer; + OGLTexture texture; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/util.h b/src/video_core/renderer_opengl/present/util.h new file mode 100644 index 0000000000..0aa8b110c1 --- /dev/null +++ b/src/video_core/renderer_opengl/present/util.h @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/assert.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +static inline void ReplaceInclude(std::string& shader_source, std::string_view include_name, + std::string_view include_content) { + const std::string include_string = fmt::format("#include \"{}\"", include_name); + const std::size_t pos = shader_source.find(include_string); + ASSERT(pos != std::string::npos); + shader_source.replace(pos, include_string.size(), include_content); +}; + +static inline OGLSampler CreateBilinearSampler() { + OGLSampler sampler; + sampler.Create(); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + return sampler; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 38b0aacf47..3d75fd17a3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -17,7 +17,6 @@ #include "core/frontend/emu_window.h" #include "core/telemetry_session.h" #include "video_core/renderer_opengl/gl_blit_screen.h" -#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 23aff055aa..7ab1633722 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,7 +10,6 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" From 60ee29aac386c31b9a9add6a12f051273fa45dae Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 15 Jan 2024 00:09:34 -0500 Subject: [PATCH 07/15] renderer_opengl: split out FXAA --- src/video_core/CMakeLists.txt | 2 + src/video_core/host_shaders/fxaa.vert | 4 +- .../renderer_opengl/gl_blit_screen.cpp | 22 +++------- .../renderer_opengl/gl_blit_screen.h | 6 +-- .../renderer_opengl/present/fxaa.cpp | 40 +++++++++++++++++++ src/video_core/renderer_opengl/present/fxaa.h | 27 +++++++++++++ .../renderer_vulkan/present/fxaa.cpp | 2 +- 7 files changed, 79 insertions(+), 24 deletions(-) create mode 100644 src/video_core/renderer_opengl/present/fxaa.cpp create mode 100644 src/video_core/renderer_opengl/present/fxaa.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 524e2cae81..9879c3ad70 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -118,6 +118,8 @@ add_library(video_core STATIC renderer_null/renderer_null.h renderer_opengl/present/fsr.cpp renderer_opengl/present/fsr.h + renderer_opengl/present/fxaa.cpp + renderer_opengl/present/fxaa.h renderer_opengl/present/smaa.cpp renderer_opengl/present/smaa.h renderer_opengl/present/util.h diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index c2717d90d0..223ab785e0 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -7,8 +7,8 @@ out gl_PerVertex { vec4 gl_Position; }; -const vec2 vertices[4] = - vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); +const vec2 vertices[3] = + vec2[3](vec2(-1,-1), vec2(3,-1), vec2(-1, 3)); layout (location = 0) out vec4 posPos; diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index cc343f1713..44f6a0922d 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -5,8 +5,6 @@ #include "video_core/host_shaders/ffx_a_h.h" #include "video_core/host_shaders/ffx_fsr1_h.h" #include "video_core/host_shaders/full_screen_triangle_vert.h" -#include "video_core/host_shaders/fxaa_frag.h" -#include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" #include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" #include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" @@ -22,6 +20,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/fxaa.h" #include "video_core/renderer_opengl/present/smaa.h" #include "video_core/textures/decoders.h" @@ -67,9 +66,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), program_manager(program_manager_), device(device_) { // Create shader programs - fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); - fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); - const auto replace_include = [](std::string& shader_source, std::string_view include_name, std::string_view include_content) { const std::string include_string = fmt::format("#include \"{}\"", include_name); @@ -131,8 +127,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); - aa_framebuffer.Create(); - // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -244,13 +238,10 @@ void BlitScreen::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& fra framebuffer_texture.resource.Create(GL_TEXTURE_2D); glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, framebuffer_texture.width, framebuffer_texture.height); - aa_texture.Release(); - aa_texture.Create(GL_TEXTURE_2D); - glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); + fxaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); smaa = std::make_unique( Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); @@ -323,10 +314,7 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, switch (anti_aliasing) { case Settings::AntiAliasing::Fxaa: { - program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glBindTextureUnit(0, aa_texture.handle); + glBindTextureUnit(0, fxaa->Draw(program_manager, info.display_texture)); } break; case Settings::AntiAliasing::Smaa: { glBindTextureUnit(0, smaa->Draw(program_manager, info.display_texture)); diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h index 945c7226ac..2cb9a50159 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.h +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -22,6 +22,7 @@ namespace OpenGL { class Device; class FSR; +class FXAA; class ProgramManager; class RasterizerOpenGL; class SMAA; @@ -77,8 +78,6 @@ private: OGLSampler present_sampler; OGLSampler present_sampler_nn; OGLBuffer vertex_buffer; - OGLProgram fxaa_vertex; - OGLProgram fxaa_fragment; OGLProgram present_vertex; OGLProgram present_bilinear_fragment; OGLProgram present_bicubic_fragment; @@ -87,10 +86,9 @@ private: /// Display information for Switch screen TextureInfo framebuffer_texture; - OGLTexture aa_texture; - OGLFramebuffer aa_framebuffer; std::unique_ptr fsr; + std::unique_ptr fxaa; std::unique_ptr smaa; /// OpenGL framebuffer data diff --git a/src/video_core/renderer_opengl/present/fxaa.cpp b/src/video_core/renderer_opengl/present/fxaa.cpp new file mode 100644 index 0000000000..9425c42fad --- /dev/null +++ b/src/video_core/renderer_opengl/present/fxaa.cpp @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/fxaa.h" +#include "video_core/renderer_opengl/present/util.h" + +namespace OpenGL { + +FXAA::FXAA(u32 width, u32 height) { + vert_shader = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + frag_shader = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); + + sampler = CreateBilinearSampler(); + + framebuffer.Create(); + + texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); +} + +FXAA::~FXAA() = default; + +GLuint FXAA::Draw(ProgramManager& program_manager, GLuint input_texture) { + glFrontFace(GL_CCW); + + program_manager.BindPresentPrograms(vert_shader.handle, frag_shader.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glBindTextureUnit(0, input_texture); + glDrawArrays(GL_TRIANGLES, 0, 3); + glFrontFace(GL_CW); + + return texture.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fxaa.h b/src/video_core/renderer_opengl/present/fxaa.h new file mode 100644 index 0000000000..b898198f1e --- /dev/null +++ b/src/video_core/renderer_opengl/present/fxaa.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FXAA { +public: + explicit FXAA(u32 width, u32 height); + ~FXAA(); + + GLuint Draw(ProgramManager& program_manager, GLuint input_texture); + +private: + OGLProgram vert_shader; + OGLProgram frag_shader; + OGLSampler sampler; + OGLFramebuffer framebuffer; + OGLTexture texture; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index 6f87ddebb6..6c772ada3e 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -133,7 +133,7 @@ VkImageView FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ BeginRenderPass(cmdbuf, renderpass, framebuffer, extent); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); - cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.Draw(3, 1, 0, 0); cmdbuf.EndRenderPass(); TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); }); From b78900e9567a4c98c44ef06b5088ccb507464d66 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 15 Jan 2024 00:36:54 -0500 Subject: [PATCH 08/15] renderer_opengl: move out FSR shader source construction --- .../renderer_opengl/gl_blit_screen.cpp | 20 +------------- .../renderer_opengl/present/fsr.cpp | 26 +++++++++++++++---- src/video_core/renderer_opengl/present/fsr.h | 3 +-- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 44f6a0922d..4e9d80d10b 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -66,14 +66,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), program_manager(program_manager_), device(device_) { // Create shader programs - const auto replace_include = [](std::string& shader_source, std::string_view include_name, - std::string_view include_content) { - const std::string include_string = fmt::format("#include \"{}\"", include_name); - const std::size_t pos = shader_source.find(include_string); - ASSERT(pos != std::string::npos); - shader_source.replace(pos, include_string.size(), include_content); - }; - present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); @@ -83,17 +75,7 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), GL_FRAGMENT_SHADER); - std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; - replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); - replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); - - std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; - std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; - replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - - fsr = std::make_unique(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, - fsr_rcas_frag_source); + fsr = std::make_unique(); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_opengl/present/fsr.cpp b/src/video_core/renderer_opengl/present/fsr.cpp index e5945b80bb..a5540bb0c2 100644 --- a/src/video_core/renderer_opengl/present/fsr.cpp +++ b/src/video_core/renderer_opengl/present/fsr.cpp @@ -3,20 +3,36 @@ #include "common/settings.h" #include "video_core/fsr.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/util.h" namespace OpenGL { using namespace FSR; using FsrConstants = std::array; -FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, - std::string_view fsr_rcas_source) - : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)}, - fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)}, - fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} { +FSR::FSR() { + std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; + ReplaceInclude(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); + ReplaceInclude(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + + std::string fsr_easu_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; + std::string fsr_rcas_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; + ReplaceInclude(fsr_easu_source, "opengl_fidelityfx_fsr.frag", fsr_source); + ReplaceInclude(fsr_rcas_source, "opengl_fidelityfx_fsr.frag", fsr_source); + + fsr_vertex = CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER); + fsr_easu_frag = CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER); + fsr_rcas_frag = CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER); + glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); } diff --git a/src/video_core/renderer_opengl/present/fsr.h b/src/video_core/renderer_opengl/present/fsr.h index a5092e3969..fa57c6f004 100644 --- a/src/video_core/renderer_opengl/present/fsr.h +++ b/src/video_core/renderer_opengl/present/fsr.h @@ -16,8 +16,7 @@ class ProgramManager; class FSR { public: - explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, - std::string_view fsr_rcas_source); + explicit FSR(); ~FSR(); void Draw(ProgramManager& program_manager, const Common::Rectangle& screen, From 2ed9586130a7b1de6aefc2aede464c4d3430d484 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 15 Jan 2024 10:19:02 -0500 Subject: [PATCH 09/15] renderer_vulkan: convert FSR to graphics pipeline --- src/video_core/host_shaders/CMakeLists.txt | 11 +- ...idelityfx_fsr.comp => fidelityfx_fsr.frag} | 33 +- .../host_shaders/vulkan_fidelityfx_fsr.vert | 13 + ...p => vulkan_fidelityfx_fsr_easu_fp16.frag} | 2 +- ...p => vulkan_fidelityfx_fsr_easu_fp32.frag} | 2 +- ...p => vulkan_fidelityfx_fsr_rcas_fp16.frag} | 2 +- ...p => vulkan_fidelityfx_fsr_rcas_fp32.frag} | 2 +- .../renderer_vulkan/present/anti_alias_pass.h | 10 +- .../renderer_vulkan/present/fsr.cpp | 534 ++++++------------ src/video_core/renderer_vulkan/present/fsr.h | 65 ++- .../renderer_vulkan/present/fxaa.cpp | 16 +- src/video_core/renderer_vulkan/present/fxaa.h | 4 +- .../renderer_vulkan/present/smaa.cpp | 23 +- src/video_core/renderer_vulkan/present/smaa.h | 4 +- .../renderer_vulkan/present/util.cpp | 37 +- src/video_core/renderer_vulkan/present/util.h | 10 +- .../renderer_vulkan/vk_blit_screen.cpp | 6 +- 17 files changed, 311 insertions(+), 463 deletions(-) rename src/video_core/host_shaders/{fidelityfx_fsr.comp => fidelityfx_fsr.frag} (79%) create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_easu_fp16.comp => vulkan_fidelityfx_fsr_easu_fp16.frag} (87%) rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_easu_fp32.comp => vulkan_fidelityfx_fsr_easu_fp32.frag} (86%) rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_rcas_fp16.comp => vulkan_fidelityfx_fsr_rcas_fp16.frag} (87%) rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_rcas_fp32.comp => vulkan_fidelityfx_fsr_rcas_fp32.frag} (86%) diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index cd25492328..969f21d509 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -9,7 +9,7 @@ set(FIDELITYFX_FILES ) set(GLSL_INCLUDES - fidelityfx_fsr.comp + fidelityfx_fsr.frag ${FIDELITYFX_FILES} ) @@ -56,10 +56,11 @@ set(SHADER_FILES vulkan_color_clear.frag vulkan_color_clear.vert vulkan_depthstencil_clear.frag - vulkan_fidelityfx_fsr_easu_fp16.comp - vulkan_fidelityfx_fsr_easu_fp32.comp - vulkan_fidelityfx_fsr_rcas_fp16.comp - vulkan_fidelityfx_fsr_rcas_fp32.comp + vulkan_fidelityfx_fsr.vert + vulkan_fidelityfx_fsr_easu_fp16.frag + vulkan_fidelityfx_fsr_easu_fp32.frag + vulkan_fidelityfx_fsr_rcas_fp16.frag + vulkan_fidelityfx_fsr_rcas_fp32.frag vulkan_present.frag vulkan_present.vert vulkan_present_scaleforce_fp16.frag diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.frag similarity index 79% rename from src/video_core/host_shaders/fidelityfx_fsr.comp rename to src/video_core/host_shaders/fidelityfx_fsr.frag index f91b1aa9f0..a266e1c4eb 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.frag @@ -34,7 +34,6 @@ layout( push_constant ) uniform constants { }; layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 @@ -72,44 +71,40 @@ layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #include "ffx_fsr1.h" -void CurrFilter(AU2 pos) { -#if USE_BILINEAR - AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); - imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); +#if USE_RCAS + layout(location = 0) in vec2 frag_texcoord; #endif +layout (location = 0) out vec4 frag_color; + +void CurrFilter(AU2 pos) { #if USE_EASU #ifndef YUZU_USE_FP16 AF3 c; FsrEasuF(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + frag_color = AF4(c, 1.0); #else AH3 c; FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + frag_color = AH4(c, 1.0); #endif #endif #if USE_RCAS #ifndef YUZU_USE_FP16 AF3 c; FsrRcasF(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + frag_color = AF4(c, 1.0); #else AH3 c; FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + frag_color = AH4(c, 1.0); #endif #endif } -layout(local_size_x=64) in; void main() { - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); - CurrFilter(gxy); - gxy.x += 8u; - CurrFilter(gxy); - gxy.y += 8u; - CurrFilter(gxy); - gxy.x -= 8u; - CurrFilter(gxy); +#if USE_RCAS + CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0)))); +#else + CurrFilter(AU2(gl_FragCoord.xy)); +#endif } diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert b/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert new file mode 100644 index 0000000000..6a87a7cac0 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = vec2(x, y) / 2.0; +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag similarity index 87% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag index 00af13726b..d369bef069 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag @@ -7,4 +7,4 @@ #define YUZU_USE_FP16 #define USE_EASU 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag similarity index 86% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag index 13d783fa86..6f25ef00f6 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag @@ -6,4 +6,4 @@ #define USE_EASU 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag similarity index 87% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag index 331549d96f..0c953a9009 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag @@ -7,4 +7,4 @@ #define YUZU_USE_FP16 #define USE_RCAS 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag similarity index 86% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag index 013ca0014b..02e9a27c65 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag @@ -6,4 +6,4 @@ #define USE_RCAS 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/renderer_vulkan/present/anti_alias_pass.h b/src/video_core/renderer_vulkan/present/anti_alias_pass.h index c1ec0b9a0d..1f20fbd7f0 100644 --- a/src/video_core/renderer_vulkan/present/anti_alias_pass.h +++ b/src/video_core/renderer_vulkan/present/anti_alias_pass.h @@ -12,16 +12,14 @@ class Scheduler; class AntiAliasPass { public: virtual ~AntiAliasPass() = default; - virtual VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) = 0; + virtual void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) = 0; }; class NoAA final : public AntiAliasPass { public: - virtual VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) { - return source_image_view; - } + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override {} }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 30a16a785a..3f708be704 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -6,11 +6,13 @@ #include "common/settings.h" #include "video_core/fsr.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_vert_spv.h" #include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -18,403 +20,207 @@ namespace Vulkan { using namespace FSR; -FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, - VkExtent2D output_size_) - : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_}, - output_size{output_size_} { +using PushConstants = std::array; + +FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D extent) + : m_device{device}, m_memory_allocator{memory_allocator}, + m_image_count{image_count}, m_extent{extent} { CreateImages(); + CreateRenderPasses(); CreateSampler(); CreateShaders(); CreateDescriptorPool(); CreateDescriptorSetLayout(); CreateDescriptorSets(); - CreatePipelineLayout(); - CreatePipeline(); -} - -VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle& crop_rect) { - - UpdateDescriptorSet(image_index, image_view); - - scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = {}, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); - - const f32 input_image_width = static_cast(input_image_extent.width); - const f32 input_image_height = static_cast(input_image_extent.height); - const f32 output_image_width = static_cast(output_size.width); - const f32 output_image_height = static_cast(output_size.height); - const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; - const f32 viewport_x = crop_rect.left * input_image_width; - const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; - const f32 viewport_y = crop_rect.top * input_image_height; - - std::array push_constants; - FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4, - push_constants.data() + 8, push_constants.data() + 12, - - viewport_width, viewport_height, input_image_width, input_image_height, - output_image_width, output_image_height, viewport_x, viewport_y); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); - - { - VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *images[image_index]; - fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier); - } - - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_sets[image_index * 2], {}); - cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), - Common::DivCeil(output_size.height, 16u), 1); - - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); - - const float sharpening = - static_cast(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; - - FsrRcasCon(push_constants.data(), sharpening); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); - - { - std::array barriers; - auto& fsr_read_barrier = barriers[0]; - auto& blit_write_barrier = barriers[1]; - - fsr_read_barrier = base_barrier; - fsr_read_barrier.image = *images[image_index]; - fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - blit_write_barrier = base_barrier; - blit_write_barrier.image = *images[image_count + image_index]; - blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers); - } - - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_sets[image_index * 2 + 1], {}); - cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), - Common::DivCeil(output_size.height, 16u), 1); - - { - std::array barriers; - auto& blit_read_barrier = barriers[0]; - - blit_read_barrier = base_barrier; - blit_read_barrier.image = *images[image_count + image_index]; - blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers); - } - }); - - return *image_views[image_count + image_index]; -} - -void FSR::CreateDescriptorPool() { - const std::array pool_sizes{{ - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast(image_count * 2), - }, - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = static_cast(image_count * 2), - }, - }}; - - const VkDescriptorPoolCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast(image_count * 2), - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); -} - -void FSR::CreateDescriptorSetLayout() { - const std::array layout_bindings{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = sampler.address(), - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = sampler.address(), - }, - }}; - - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(layout_bindings.size()), - .pBindings = layout_bindings.data(), - }; - - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); -} - -void FSR::CreateDescriptorSets() { - const u32 sets = static_cast(image_count * 2); - const std::vector layouts(sets, *descriptor_set_layout); - - const VkDescriptorSetAllocateInfo ai{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *descriptor_pool, - .descriptorSetCount = sets, - .pSetLayouts = layouts.data(), - }; - - descriptor_sets = descriptor_pool.Allocate(ai); + CreatePipelineLayouts(); + CreatePipelines(); } void FSR::CreateImages() { - images.resize(image_count * 2); - image_views.resize(image_count * 2); - - for (size_t i = 0; i < image_count * 2; ++i) { - images[i] = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R16G16B16A16_SFLOAT, - .extent = - { - .width = output_size.width, - .height = output_size.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *images[i], - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_R16G16B16A16_SFLOAT, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); + m_dynamic_images.resize(m_image_count); + for (auto& images : m_dynamic_images) { + images.images[Easu] = + CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Rcas] = + CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Easu] = + CreateWrappedImageView(m_device, images.images[Easu], VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Rcas] = + CreateWrappedImageView(m_device, images.images[Rcas], VK_FORMAT_R16G16B16A16_SFLOAT); } } -void FSR::CreatePipelineLayout() { - VkPushConstantRange push_const{ - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, +void FSR::CreateRenderPasses() { + m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& images : m_dynamic_images) { + images.framebuffers[Easu] = + CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Easu], m_extent); + images.framebuffers[Rcas] = + CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Rcas], m_extent); + } +} + +void FSR::CreateSampler() { + m_sampler = CreateBilinearSampler(m_device); +} + +void FSR::CreateShaders() { + m_vert_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_VERT_SPV); + + if (m_device.IsFloat16Supported()) { + m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP16_FRAG_SPV); + m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_FRAG_SPV); + } else { + m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP32_FRAG_SPV); + m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_FRAG_SPV); + } +} + +void FSR::CreateDescriptorPool() { + // EASU: 1 descriptor + // RCAS: 1 descriptor + // 2 descriptors, 2 descriptor sets per invocation + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, 2 * m_image_count); +} + +void FSR::CreateDescriptorSetLayout() { + m_descriptor_set_layout = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +} + +void FSR::CreateDescriptorSets() { + std::vector layouts(MaxFsrStage, *m_descriptor_set_layout); + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); + } +} + +void FSR::CreatePipelineLayouts() { + const VkPushConstantRange range{ + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .offset = 0, - .size = sizeof(std::array), + .size = sizeof(PushConstants), }; VkPipelineLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), + .pSetLayouts = m_descriptor_set_layout.address(), .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_const, + .pPushConstantRanges = &range, }; - pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); + m_pipeline_layout = m_device.GetLogical().CreatePipelineLayout(ci); } -void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { - const auto fsr_image_view = *image_views[image_index]; - const auto blit_image_view = *image_views[image_count + image_index]; - - const VkDescriptorImageInfo image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - const VkDescriptorImageInfo fsr_image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = fsr_image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - const VkDescriptorImageInfo blit_image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = blit_image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index * 2], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - VkWriteDescriptorSet output_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index * 2], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = &fsr_image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); - - sampler_write.dstSet = descriptor_sets[image_index * 2 + 1]; - sampler_write.pImageInfo = &fsr_image_info; - output_write.dstSet = descriptor_sets[image_index * 2 + 1]; - output_write.pImageInfo = &blit_image_info; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); +void FSR::CreatePipelines() { + m_easu_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vert_shader, m_easu_shader)); + m_rcas_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vert_shader, m_rcas_shader)); } -void FSR::CreateSampler() { - const VkSamplerCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; +void FSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Images& images = m_dynamic_images[image_index]; + std::vector image_infos; + std::vector updates; + image_infos.reserve(2); - sampler = device.GetLogical().CreateSampler(ci); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[Easu], 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Easu], + images.descriptor_sets[Rcas], 0)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); } -void FSR::CreateShaders() { - if (device.IsFloat16Supported()) { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); - } else { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); +void FSR::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; } + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& image : m_dynamic_images) { + ClearColorImage(cmdbuf, *image.images[Easu]); + ClearColorImage(cmdbuf, *image.images[Rcas]); + } + }); + scheduler.Finish(); + + m_images_ready = true; } -void FSR::CreatePipeline() { - VkPipelineShaderStageCreateInfo shader_stage_easu{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *easu_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }; +VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view, VkExtent2D input_image_extent, + const Common::Rectangle& crop_rect) { + Images& images = m_dynamic_images[image_index]; - VkPipelineShaderStageCreateInfo shader_stage_rcas{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *rcas_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }; + VkImage easu_image = *images.images[Easu]; + VkImage rcas_image = *images.images[Rcas]; + VkDescriptorSet easu_descriptor_set = images.descriptor_sets[Easu]; + VkDescriptorSet rcas_descriptor_set = images.descriptor_sets[Rcas]; + VkFramebuffer easu_framebuffer = *images.framebuffers[Easu]; + VkFramebuffer rcas_framebuffer = *images.framebuffers[Rcas]; + VkPipeline easu_pipeline = *m_easu_pipeline; + VkPipeline rcas_pipeline = *m_rcas_pipeline; + VkPipelineLayout pipeline_layout = *m_pipeline_layout; + VkRenderPass renderpass = *m_renderpass; + VkExtent2D extent = m_extent; - VkComputePipelineCreateInfo pipeline_ci_easu{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = shader_stage_easu, - .layout = *pipeline_layout, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }; + const f32 input_image_width = static_cast(input_image_extent.width); + const f32 input_image_height = static_cast(input_image_extent.height); + const f32 output_image_width = static_cast(extent.width); + const f32 output_image_height = static_cast(extent.height); + const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; + const f32 viewport_x = crop_rect.left * input_image_width; + const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; + const f32 viewport_y = crop_rect.top * input_image_height; - VkComputePipelineCreateInfo pipeline_ci_rcas{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = shader_stage_rcas, - .layout = *pipeline_layout, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }; + PushConstants easu_con{}; + PushConstants rcas_con{}; + FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8, + easu_con.data() + 12, viewport_width, viewport_height, input_image_width, + input_image_height, output_image_width, output_image_height, viewport_x, + viewport_y); - easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu); - rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas); + const float sharpening = + static_cast(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + FsrRcasCon(rcas_con.data(), sharpening); + + UploadImages(scheduler); + UpdateDescriptorSets(source_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, easu_framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, easu_pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, + easu_descriptor_set, {}); + cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, easu_con); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, rcas_framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, rcas_pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, + rcas_descriptor_set, {}); + cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, rcas_con); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + return *images.image_views[Rcas]; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fsr.h b/src/video_core/renderer_vulkan/present/fsr.h index 3505c14164..8602e81465 100644 --- a/src/video_core/renderer_vulkan/present/fsr.h +++ b/src/video_core/renderer_vulkan/present/fsr.h @@ -15,38 +15,55 @@ class Scheduler; class FSR { public: explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, - VkExtent2D output_size); - VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle& crop_rect); + VkExtent2D extent); + VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view, VkExtent2D input_image_extent, + const Common::Rectangle& crop_rect); private: + void CreateImages(); + void CreateRenderPasses(); + void CreateSampler(); + void CreateShaders(); void CreateDescriptorPool(); void CreateDescriptorSetLayout(); void CreateDescriptorSets(); - void CreateImages(); - void CreateSampler(); - void CreateShaders(); - void CreatePipeline(); - void CreatePipelineLayout(); + void CreatePipelineLayouts(); + void CreatePipelines(); - void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; + void UploadImages(Scheduler& scheduler); + void UpdateDescriptorSets(VkImageView image_view, size_t image_index); - const Device& device; - MemoryAllocator& memory_allocator; - size_t image_count; - VkExtent2D output_size; + const Device& m_device; + MemoryAllocator& m_memory_allocator; + const size_t m_image_count; + const VkExtent2D m_extent; - vk::DescriptorPool descriptor_pool; - vk::DescriptorSetLayout descriptor_set_layout; - vk::DescriptorSets descriptor_sets; - vk::PipelineLayout pipeline_layout; - vk::ShaderModule easu_shader; - vk::ShaderModule rcas_shader; - vk::Pipeline easu_pipeline; - vk::Pipeline rcas_pipeline; - vk::Sampler sampler; - std::vector images; - std::vector image_views; + enum FsrStage { + Easu, + Rcas, + MaxFsrStage, + }; + + vk::DescriptorPool m_descriptor_pool; + vk::DescriptorSetLayout m_descriptor_set_layout; + vk::PipelineLayout m_pipeline_layout; + vk::ShaderModule m_vert_shader; + vk::ShaderModule m_easu_shader; + vk::ShaderModule m_rcas_shader; + vk::Pipeline m_easu_pipeline; + vk::Pipeline m_rcas_pipeline; + vk::RenderPass m_renderpass; + vk::Sampler m_sampler; + + struct Images { + vk::DescriptorSets descriptor_sets; + std::array images; + std::array image_views; + std::array framebuffers; + }; + std::vector m_dynamic_images; + bool m_images_ready{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index 6c772ada3e..bdafd1f4d0 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -63,7 +63,9 @@ void FXAA::CreateDescriptorPool() { } void FXAA::CreateDescriptorSetLayouts() { - m_descriptor_set_layout = CreateWrappedDescriptorSetLayout(m_device, 2); + m_descriptor_set_layout = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); } void FXAA::CreateDescriptorSets() { @@ -112,9 +114,10 @@ void FXAA::UploadImages(Scheduler& scheduler) { m_images_ready = true; } -VkImageView FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) { +void FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) { const Image& image{m_dynamic_images[image_index]}; + const VkImage input_image{*inout_image}; const VkImage output_image{*image.image}; const VkDescriptorSet descriptor_set{image.descriptor_sets[0]}; const VkFramebuffer framebuffer{*image.framebuffer}; @@ -124,11 +127,11 @@ VkImageView FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ const VkExtent2D extent{m_extent}; UploadImages(scheduler); - UpdateDescriptorSets(source_image_view, image_index); + UpdateDescriptorSets(*inout_image_view, image_index); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([=](vk::CommandBuffer cmdbuf) { - TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); BeginRenderPass(cmdbuf, renderpass, framebuffer, extent); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -138,7 +141,8 @@ VkImageView FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); }); - return *image.image_view; + *inout_image = *image.image; + *inout_image_view = *image.image_view; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fxaa.h b/src/video_core/renderer_vulkan/present/fxaa.h index c083f3ff09..97a2e5c1cb 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.h +++ b/src/video_core/renderer_vulkan/present/fxaa.h @@ -19,8 +19,8 @@ public: VkExtent2D extent); ~FXAA() override; - VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) override; + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override; private: void CreateImages(); diff --git a/src/video_core/renderer_vulkan/present/smaa.cpp b/src/video_core/renderer_vulkan/present/smaa.cpp index 68cd22b08e..39645fd1d7 100644 --- a/src/video_core/renderer_vulkan/present/smaa.cpp +++ b/src/video_core/renderer_vulkan/present/smaa.cpp @@ -122,10 +122,15 @@ void SMAA::CreateDescriptorPool() { } void SMAA::CreateDescriptorSetLayouts() { - m_descriptor_set_layouts[EdgeDetection] = CreateWrappedDescriptorSetLayout(m_device, 1); + m_descriptor_set_layouts[EdgeDetection] = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); m_descriptor_set_layouts[BlendingWeightCalculation] = - CreateWrappedDescriptorSetLayout(m_device, 3); - m_descriptor_set_layouts[NeighborhoodBlending] = CreateWrappedDescriptorSetLayout(m_device, 2); + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); + m_descriptor_set_layouts[NeighborhoodBlending] = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); } void SMAA::CreateDescriptorSets() { @@ -204,10 +209,11 @@ void SMAA::UploadImages(Scheduler& scheduler) { m_images_ready = true; } -VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) { +void SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) { Images& images = m_dynamic_images[image_index]; + VkImage input_image = *inout_image; VkImage output_image = *images.images[Output]; VkImage edges_image = *images.images[Edges]; VkImage blend_image = *images.images[Blend]; @@ -224,11 +230,11 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending]; UploadImages(scheduler); - UpdateDescriptorSets(source_image_view, image_index); + UpdateDescriptorSets(*inout_image_view, image_index); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { - TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); BeginRenderPass(cmdbuf, *m_renderpasses[EdgeDetection], edge_detection_framebuffer, m_extent); @@ -264,7 +270,8 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); }); - return *images.image_views[Output]; + *inout_image = *images.images[Output]; + *inout_image_view = *images.image_views[Output]; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/smaa.h b/src/video_core/renderer_vulkan/present/smaa.h index 3d6707d485..fdf6def070 100644 --- a/src/video_core/renderer_vulkan/present/smaa.h +++ b/src/video_core/renderer_vulkan/present/smaa.h @@ -20,8 +20,8 @@ public: VkExtent2D extent); ~SMAA() override; - VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) override; + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override; private: enum SMAAStage { diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index cd60611014..9c08ac6134 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -215,32 +215,37 @@ vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span(max_descriptors), - }; +vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors, + size_t max_sets, + std::initializer_list types) { + std::vector pool_sizes(types.size()); + for (u32 i = 0; i < types.size(); i++) { + pool_sizes[i] = VkDescriptorPoolSize{ + .type = std::data(types)[i], + .descriptorCount = static_cast(max_descriptors), + }; + } return device.GetLogical().CreateDescriptorPool(VkDescriptorPoolCreateInfo{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = 0, - .maxSets = max_sets, - .poolSizeCount = 1, - .pPoolSizes = &pool_size, + .maxSets = static_cast(max_sets), + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), }); } -vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(const Device& device, - u32 max_sampler_bindings) { - std::vector bindings(max_sampler_bindings); - for (u32 i = 0; i < max_sampler_bindings; i++) { +vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout( + const Device& device, std::initializer_list types) { + std::vector bindings(types.size()); + for (size_t i = 0; i < types.size(); i++) { bindings[i] = { - .binding = i, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .binding = static_cast(i), + .descriptorType = std::data(types)[i], .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | + VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }; } diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index ea9a26c3db..2f3a538faf 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -25,10 +25,12 @@ vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& r vk::ImageView& dest_image, VkExtent2D extent); vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter = VK_FILTER_LINEAR); vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span code); -vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, u32 max_sampler_bindings, - u32 max_sets); -vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(const Device& device, - u32 max_sampler_bindings); +vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors, + size_t max_sets, + std::initializer_list types = { + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout( + const Device& device, std::initializer_list types); vk::DescriptorSets CreateWrappedDescriptorSets(vk::DescriptorPool& pool, vk::Span layouts); vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index fd7c287791..8d01ec9fce 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -234,7 +234,7 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf }); } - source_image_view = anti_alias->Draw(scheduler, image_index, source_image, source_image_view); + anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view); const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); const VkExtent2D render_extent{ @@ -248,8 +248,8 @@ void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConf .height = layout.screen.GetHeight(), }; - source_image_view = - fsr->Draw(scheduler, image_index, source_image_view, render_extent, crop_rect); + source_image_view = fsr->Draw(scheduler, image_index, source_image, source_image_view, + render_extent, crop_rect); const Common::Rectangle output_crop{0, 0, 1, 1}; window_adapt->Draw(scheduler, image_index, source_image_view, adapt_size, output_crop, From dd2918efd83b586861ebc463dfee20c35e9d3bb3 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 15 Jan 2024 14:28:03 -0500 Subject: [PATCH 10/15] renderer_opengl: move out ownership of FSR resources --- .../renderer_opengl/gl_blit_screen.cpp | 35 +++--- .../renderer_opengl/present/fsr.cpp | 116 +++++++----------- src/video_core/renderer_opengl/present/fsr.h | 29 ++--- 3 files changed, 77 insertions(+), 103 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 4e9d80d10b..5f6221b9be 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -75,8 +75,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), GL_FRAGMENT_SHADER); - fsr = std::make_unique(); - // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -269,7 +267,7 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthRangeIndexed(0, 0.0, 0.0); - glBindTextureUnit(0, info.display_texture); + GLuint texture = info.display_texture; auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { @@ -296,10 +294,10 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, switch (anti_aliasing) { case Settings::AntiAliasing::Fxaa: { - glBindTextureUnit(0, fxaa->Draw(program_manager, info.display_texture)); + texture = fxaa->Draw(program_manager, info.display_texture); } break; case Settings::AntiAliasing::Smaa: { - glBindTextureUnit(0, smaa->Draw(program_manager, info.display_texture)); + texture = smaa->Draw(program_manager, info.display_texture); } break; default: UNREACHABLE(); @@ -311,34 +309,37 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glDisablei(GL_SCISSOR_TEST, 0); if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr->AreBuffersInitialized()) { - fsr->InitBuffers(); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + if (!fsr || fsr->NeedsRecreation(layout.screen)) { + fsr = std::make_unique(layout.screen.GetWidth(), layout.screen.GetHeight()); } - glBindSampler(0, present_sampler.handle); - fsr->Draw(program_manager, layout.screen, info.scaled_width, info.scaled_height, crop); - } else { - if (fsr->AreBuffersInitialized()) { - fsr->ReleaseBuffers(); - } + texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); } + glBindTextureUnit(0, texture); + const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); const auto fragment_handle = [this]() { switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return present_bilinear_fragment.handle; case Settings::ScalingFilter::Bicubic: return present_bicubic_fragment.handle; case Settings::ScalingFilter::Gaussian: return present_gaussian_fragment.handle; case Settings::ScalingFilter::ScaleForce: return present_scaleforce_fragment.handle; + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: case Settings::ScalingFilter::Fsr: - return fsr->GetPresentFragmentProgram().handle; default: return present_bilinear_fragment.handle; } diff --git a/src/video_core/renderer_opengl/present/fsr.cpp b/src/video_core/renderer_opengl/present/fsr.cpp index a5540bb0c2..b764aadae8 100644 --- a/src/video_core/renderer_opengl/present/fsr.cpp +++ b/src/video_core/renderer_opengl/present/fsr.cpp @@ -19,7 +19,7 @@ using namespace FSR; using FsrConstants = std::array; -FSR::FSR() { +FSR::FSR(u32 output_width_, u32 output_height_) : width(output_width_), height(output_height_) { std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; ReplaceInclude(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); ReplaceInclude(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); @@ -29,94 +29,70 @@ FSR::FSR() { ReplaceInclude(fsr_easu_source, "opengl_fidelityfx_fsr.frag", fsr_source); ReplaceInclude(fsr_rcas_source, "opengl_fidelityfx_fsr.frag", fsr_source); - fsr_vertex = CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER); - fsr_easu_frag = CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER); - fsr_rcas_frag = CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER); + vert = CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER); + easu_frag = CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER); + rcas_frag = CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER); - glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); - glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); + glProgramUniform2f(vert.handle, 0, 1.0f, -1.0f); + glProgramUniform2f(vert.handle, 1, 0.0f, 1.0f); + + sampler = CreateBilinearSampler(); + framebuffer.Create(); + + easu_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(easu_tex.handle, 1, GL_RGBA16F, width, height); + + rcas_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(rcas_tex.handle, 1, GL_RGBA16F, width, height); } FSR::~FSR() = default; -void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle& screen, - u32 input_image_width, u32 input_image_height, - const Common::Rectangle& crop_rect) { - - const auto output_image_width = screen.GetWidth(); - const auto output_image_height = screen.GetHeight(); - - if (fsr_intermediate_tex.handle) { - GLint fsr_tex_width, fsr_tex_height; - glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH, - &fsr_tex_width); - glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT, - &fsr_tex_height); - if (static_cast(fsr_tex_width) != output_image_width || - static_cast(fsr_tex_height) != output_image_height) { - fsr_intermediate_tex.Release(); - } - } - if (!fsr_intermediate_tex.handle) { - fsr_intermediate_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width, - output_image_height); - glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0, - fsr_intermediate_tex.handle, 0); - } - - GLint old_draw_fb; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - - glFrontFace(GL_CW); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(output_image_width), - static_cast(output_image_height)); - +GLuint FSR::Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width, + u32 input_image_height, const Common::Rectangle& crop_rect) { const f32 input_width = static_cast(input_image_width); const f32 input_height = static_cast(input_image_height); - const f32 output_width = static_cast(screen.GetWidth()); - const f32 output_height = static_cast(screen.GetHeight()); + const f32 output_width = static_cast(width); + const f32 output_height = static_cast(height); const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_width; const f32 viewport_x = crop_rect.left * input_width; const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_height; const f32 viewport_y = crop_rect.top * input_height; - FsrConstants constants; - FsrEasuConOffset(constants.data() + 0, constants.data() + 4, constants.data() + 8, - constants.data() + 12, viewport_width, viewport_height, input_width, + FsrConstants easu_con{}; + FsrConstants rcas_con{}; + + FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8, + easu_con.data() + 12, viewport_width, viewport_height, input_width, input_height, output_width, output_height, viewport_x, viewport_y); - glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); - - program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - glBindTextureUnit(0, fsr_intermediate_tex.handle); - const float sharpening = static_cast(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; - FsrRcasCon(constants.data(), sharpening); - glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants)); + FsrRcasCon(rcas_con.data(), sharpening); + + glProgramUniform4uiv(easu_frag.handle, 0, sizeof(easu_con), easu_con.data()); + glProgramUniform4uiv(rcas_frag.handle, 0, sizeof(rcas_con), rcas_con.data()); + + glFrontFace(GL_CW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, easu_tex.handle, 0); + glViewportIndexedf(0, 0.0f, 0.0f, output_width, output_height); + program_manager.BindPresentPrograms(vert.handle, easu_frag.handle); + glBindTextureUnit(0, texture); + glBindSampler(0, sampler.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, rcas_tex.handle, 0); + program_manager.BindPresentPrograms(vert.handle, rcas_frag.handle); + glBindTextureUnit(0, easu_tex.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + return rcas_tex.handle; } -void FSR::InitBuffers() { - fsr_framebuffer.Create(); -} - -void FSR::ReleaseBuffers() { - fsr_framebuffer.Release(); - fsr_intermediate_tex.Release(); -} - -const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept { - return fsr_rcas_frag; -} - -bool FSR::AreBuffersInitialized() const noexcept { - return fsr_framebuffer.handle; +bool FSR::NeedsRecreation(const Common::Rectangle& screen) { + return screen.GetWidth() != width || screen.GetHeight() != height; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fsr.h b/src/video_core/renderer_opengl/present/fsr.h index fa57c6f004..606935a012 100644 --- a/src/video_core/renderer_opengl/present/fsr.h +++ b/src/video_core/renderer_opengl/present/fsr.h @@ -16,27 +16,24 @@ class ProgramManager; class FSR { public: - explicit FSR(); + explicit FSR(u32 output_width, u32 output_height); ~FSR(); - void Draw(ProgramManager& program_manager, const Common::Rectangle& screen, - u32 input_image_width, u32 input_image_height, - const Common::Rectangle& crop_rect); + GLuint Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width, + u32 input_image_height, const Common::Rectangle& crop_rect); - void InitBuffers(); - - void ReleaseBuffers(); - - [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept; - - [[nodiscard]] bool AreBuffersInitialized() const noexcept; + bool NeedsRecreation(const Common::Rectangle& screen); private: - OGLFramebuffer fsr_framebuffer; - OGLProgram fsr_vertex; - OGLProgram fsr_easu_frag; - OGLProgram fsr_rcas_frag; - OGLTexture fsr_intermediate_tex; + const u32 width; + const u32 height; + OGLFramebuffer framebuffer; + OGLSampler sampler; + OGLProgram vert; + OGLProgram easu_frag; + OGLProgram rcas_frag; + OGLTexture easu_tex; + OGLTexture rcas_tex; }; } // namespace OpenGL From d4de04584f14f3ea8fde4cd79102b887c084fbc2 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 15 Jan 2024 15:08:21 -0500 Subject: [PATCH 11/15] renderer_opengl: split up blit screen resources into antialias and window adapt passes --- src/video_core/CMakeLists.txt | 4 + .../renderer_opengl/gl_blit_screen.cpp | 278 +++++------------- .../renderer_opengl/gl_blit_screen.h | 26 +- .../renderer_opengl/present/filters.cpp | 39 +++ .../renderer_opengl/present/filters.h | 17 ++ .../renderer_opengl/present/fxaa.cpp | 1 + .../renderer_opengl/present/smaa.cpp | 6 - src/video_core/renderer_opengl/present/util.h | 11 + .../present/window_adapt_pass.cpp | 128 ++++++++ .../present/window_adapt_pass.h | 39 +++ .../renderer_opengl/renderer_opengl.cpp | 6 + .../present/window_adapt_pass.cpp | 6 +- .../renderer_vulkan/renderer_vulkan.cpp | 3 + 13 files changed, 332 insertions(+), 232 deletions(-) create mode 100644 src/video_core/renderer_opengl/present/filters.cpp create mode 100644 src/video_core/renderer_opengl/present/filters.h create mode 100644 src/video_core/renderer_opengl/present/window_adapt_pass.cpp create mode 100644 src/video_core/renderer_opengl/present/window_adapt_pass.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9879c3ad70..c6b0d628d1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -116,6 +116,8 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/present/filters.cpp + renderer_opengl/present/filters.h renderer_opengl/present/fsr.cpp renderer_opengl/present/fsr.h renderer_opengl/present/fxaa.cpp @@ -123,6 +125,8 @@ add_library(video_core STATIC renderer_opengl/present/smaa.cpp renderer_opengl/present/smaa.h renderer_opengl/present/util.h + renderer_opengl/present/window_adapt_pass.cpp + renderer_opengl/present/window_adapt_pass.h renderer_opengl/blit_image.cpp renderer_opengl/blit_image.h renderer_opengl/gl_blit_screen.cpp diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 5f6221b9be..f9dbef0fcd 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -2,100 +2,26 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "video_core/framebuffer_config.h" -#include "video_core/host_shaders/ffx_a_h.h" -#include "video_core/host_shaders/ffx_fsr1_h.h" -#include "video_core/host_shaders/full_screen_triangle_vert.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" -#include "video_core/host_shaders/opengl_present_frag.h" -#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" -#include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/host_shaders/present_bicubic_frag.h" -#include "video_core/host_shaders/present_gaussian_frag.h" - #include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/present/filters.h" #include "video_core/renderer_opengl/present/fsr.h" #include "video_core/renderer_opengl/present/fxaa.h" #include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/renderer_opengl/present/window_adapt_pass.h" #include "video_core/textures/decoders.h" namespace OpenGL { -namespace { -constexpr GLint PositionLocation = 0; -constexpr GLint TexCoordLocation = 1; -constexpr GLint ModelViewMatrixLocation = 0; - -struct ScreenRectVertex { - constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) - : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} - - std::array position; - std::array tex_coord; -}; - -/** - * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left - * corner and (width, height) on the lower-bottom. - * - * The projection part of the matrix is trivial, hence these operations are represented - * by a 3x2 matrix. - */ -std::array MakeOrthographicMatrix(float width, float height) { - std::array matrix; // Laid out in column-major order - - // clang-format off - matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; - matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; - // Last matrix row is implicitly assumed to be [0, 0, 1]. - // clang-format on - - return matrix; -} -} // namespace - BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_, StateTracker& state_tracker_, ProgramManager& program_manager_, Device& device_) : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), program_manager(program_manager_), device(device_) { - // Create shader programs - present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); - present_gaussian_fragment = - CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); - present_scaleforce_fragment = - CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), - GL_FRAGMENT_SHADER); - - // Generate presentation sampler - present_sampler.Create(); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - present_sampler_nn.Create(); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - // Generate VBO handle for drawing - vertex_buffer.Create(); - - // Attach vertex data to VAO - glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - // Allocate textures for the screen framebuffer_texture.resource.Create(GL_TEXTURE_2D); @@ -106,15 +32,6 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, const u8 framebuffer_data[4] = {0, 0, 0, 0}; glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } BlitScreen::~BlitScreen() = default; @@ -219,18 +136,14 @@ void BlitScreen::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& fra glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, framebuffer_texture.width, framebuffer_texture.height); - fxaa = std::make_unique( - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - smaa = std::make_unique( - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + fxaa.reset(); + smaa.reset(); } void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout& layout) { FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); - const auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); + auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); // TODO: Signal state tracker about these changes state_tracker.NotifyScreenDrawVertexArray(); @@ -267,15 +180,14 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthRangeIndexed(0, 0.0, 0.0); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + GLuint texture = info.display_texture; auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); - if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { - LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); - anti_aliasing = Settings::AntiAliasing::None; - Settings::values.anti_aliasing.SetValue(anti_aliasing); - } - if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); auto scissor_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); @@ -286,137 +198,83 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glScissorIndexed(0, 0, 0, scissor_width, scissor_height); glViewportIndexedf(0, 0.0f, 0.0f, viewport_width, viewport_height); - glBindSampler(0, present_sampler.handle); - GLint old_read_fb; - GLint old_draw_fb; - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - switch (anti_aliasing) { - case Settings::AntiAliasing::Fxaa: { + case Settings::AntiAliasing::Fxaa: + CreateFXAA(); texture = fxaa->Draw(program_manager, info.display_texture); - } break; - case Settings::AntiAliasing::Smaa: { - texture = smaa->Draw(program_manager, info.display_texture); - } break; + break; + case Settings::AntiAliasing::Smaa: default: - UNREACHABLE(); + CreateSMAA(); + texture = smaa->Draw(program_manager, info.display_texture); + break; } - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); } + glDisablei(GL_SCISSOR_TEST, 0); if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - GLint old_read_fb; - GLint old_draw_fb; - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - if (!fsr || fsr->NeedsRecreation(layout.screen)) { fsr = std::make_unique(layout.screen.GetWidth(), layout.screen.GetHeight()); } texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + crop = {0, 0, 1, 1}; } - glBindTextureUnit(0, texture); + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - - const auto fragment_handle = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::Bicubic: - return present_bicubic_fragment.handle; - case Settings::ScalingFilter::Gaussian: - return present_gaussian_fragment.handle; - case Settings::ScalingFilter::ScaleForce: - return present_scaleforce_fragment.handle; - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - case Settings::ScalingFilter::Fsr: - default: - return present_bilinear_fragment.handle; - } - }(); - program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); - glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, - ortho_matrix.data()); - - f32 left, top, right, bottom; - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - // FSR has already applied the crop, so we just want to render the image - // it has produced. - left = 0; - top = 0; - right = 1; - bottom = 1; - } else { - // Apply the precomputed crop. - left = crop.left; - top = crop.top; - right = crop.right; - bottom = crop.bottom; - } - - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = screen.left; - const auto y = screen.top; - const auto w = screen.GetWidth(); - const auto h = screen.GetHeight(); - - const std::array vertices = { - ScreenRectVertex(x, y, left, top), - ScreenRectVertex(x + w, y, right, top), - ScreenRectVertex(x, y + h, left, bottom), - ScreenRectVertex(x + w, y + h, right, bottom), - }; - glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - - glDisable(GL_FRAMEBUFFER_SRGB); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), - static_cast(layout.height)); - - glEnableVertexAttribArray(PositionLocation); - glEnableVertexAttribArray(TexCoordLocation); - glVertexAttribDivisor(PositionLocation, 0); - glVertexAttribDivisor(TexCoordLocation, 0); - glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, position)); - glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, tex_coord)); - glVertexAttribBinding(PositionLocation, 0); - glVertexAttribBinding(TexCoordLocation, 0); - if (device.HasVertexBufferUnifiedMemory()) { - glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, - sizeof(vertices)); - } else { - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); - } - - if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { - glBindSampler(0, present_sampler.handle); - } else { - glBindSampler(0, present_sampler_nn.handle); - } - - // Update background color before drawing - glClearColor(Settings::values.bg_red.GetValue() / 255.0f, - Settings::values.bg_green.GetValue() / 255.0f, - Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); - - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + CreateWindowAdapt(); + window_adapt->DrawToFramebuffer(program_manager, texture, layout, crop); // TODO // program_manager.RestoreGuestPipeline(); } +void BlitScreen::CreateFXAA() { + smaa.reset(); + if (!fxaa) { + fxaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +void BlitScreen::CreateSMAA() { + fxaa.reset(); + if (!smaa) { + smaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +void BlitScreen::CreateWindowAdapt() { + if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) { + return; + } + + current_window_adapt = Settings::values.scaling_filter.GetValue(); + switch (current_window_adapt) { + case Settings::ScalingFilter::NearestNeighbor: + window_adapt = MakeNearestNeighbor(device); + break; + case Settings::ScalingFilter::Bicubic: + window_adapt = MakeBicubic(device); + break; + case Settings::ScalingFilter::Gaussian: + window_adapt = MakeGaussian(device); + break; + case Settings::ScalingFilter::ScaleForce: + window_adapt = MakeScaleForce(device); + break; + case Settings::ScalingFilter::Fsr: + case Settings::ScalingFilter::Bilinear: + default: + window_adapt = MakeBilinear(device); + break; + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h index 2cb9a50159..f42f89dee4 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.h +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -18,6 +18,10 @@ namespace Tegra { struct FramebufferConfig; } +namespace Settings { +enum class ScalingFilter : u32; +} + namespace OpenGL { class Device; @@ -27,6 +31,7 @@ class ProgramManager; class RasterizerOpenGL; class SMAA; class StateTracker; +class WindowAdaptPass; /// Structure used for storing information about the textures for the Switch screen struct TextureInfo { @@ -61,29 +66,22 @@ public: void DrawScreen(const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout& layout); - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); - /// Loads framebuffer from emulated memory into the active OpenGL texture. FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); private: + void CreateFXAA(); + void CreateSMAA(); + void CreateWindowAdapt(); + RasterizerOpenGL& rasterizer; Tegra::MaxwellDeviceMemoryManager& device_memory; StateTracker& state_tracker; ProgramManager& program_manager; Device& device; - OGLSampler present_sampler; - OGLSampler present_sampler_nn; - OGLBuffer vertex_buffer; - OGLProgram present_vertex; - OGLProgram present_bilinear_fragment; - OGLProgram present_bicubic_fragment; - OGLProgram present_gaussian_fragment; - OGLProgram present_scaleforce_fragment; - /// Display information for Switch screen TextureInfo framebuffer_texture; @@ -91,11 +89,11 @@ private: std::unique_ptr fxaa; std::unique_ptr smaa; + Settings::ScalingFilter current_window_adapt{}; + std::unique_ptr window_adapt; + /// OpenGL framebuffer data std::vector gl_framebuffer_data; - - // GPU address of the vertex buffer - GLuint64EXT vertex_buffer_address = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/filters.cpp b/src/video_core/renderer_opengl/present/filters.cpp new file mode 100644 index 0000000000..819e5d77f4 --- /dev/null +++ b/src/video_core/renderer_opengl/present/filters.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" +#include "video_core/renderer_opengl/present/filters.h" +#include "video_core/renderer_opengl/present/util.h" + +namespace OpenGL { + +std::unique_ptr MakeNearestNeighbor(const Device& device) { + return std::make_unique(device, CreateNearestNeighborSampler(), + HostShaders::OPENGL_PRESENT_FRAG); +} + +std::unique_ptr MakeBilinear(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::OPENGL_PRESENT_FRAG); +} + +std::unique_ptr MakeBicubic(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_BICUBIC_FRAG); +} + +std::unique_ptr MakeGaussian(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_GAUSSIAN_FRAG); +} + +std::unique_ptr MakeScaleForce(const Device& device) { + return std::make_unique( + device, CreateBilinearSampler(), + fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG)); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/filters.h b/src/video_core/renderer_opengl/present/filters.h new file mode 100644 index 0000000000..122ab74365 --- /dev/null +++ b/src/video_core/renderer_opengl/present/filters.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "video_core/renderer_opengl/present/window_adapt_pass.h" + +namespace OpenGL { + +std::unique_ptr MakeNearestNeighbor(const Device& device); +std::unique_ptr MakeBilinear(const Device& device); +std::unique_ptr MakeBicubic(const Device& device); +std::unique_ptr MakeGaussian(const Device& device); +std::unique_ptr MakeScaleForce(const Device& device); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fxaa.cpp b/src/video_core/renderer_opengl/present/fxaa.cpp index 9425c42fad..d9b58512de 100644 --- a/src/video_core/renderer_opengl/present/fxaa.cpp +++ b/src/video_core/renderer_opengl/present/fxaa.cpp @@ -31,6 +31,7 @@ GLuint FXAA::Draw(ProgramManager& program_manager, GLuint input_texture) { program_manager.BindPresentPrograms(vert_shader.handle, frag_shader.handle); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); glBindTextureUnit(0, input_texture); + glBindSampler(0, sampler.handle); glDrawArrays(GL_TRIANGLES, 0, 3); glFrontFace(GL_CW); diff --git a/src/video_core/renderer_opengl/present/smaa.cpp b/src/video_core/renderer_opengl/present/smaa.cpp index a9a0eb6c69..de7f6e5021 100644 --- a/src/video_core/renderer_opengl/present/smaa.cpp +++ b/src/video_core/renderer_opengl/present/smaa.cpp @@ -36,13 +36,7 @@ SMAA::SMAA(u32 width, u32 height) { SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); - glPixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); - glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); area_tex.Create(GL_TEXTURE_2D); glTextureStorage2D(area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); diff --git a/src/video_core/renderer_opengl/present/util.h b/src/video_core/renderer_opengl/present/util.h index 0aa8b110c1..67f03aa275 100644 --- a/src/video_core/renderer_opengl/present/util.h +++ b/src/video_core/renderer_opengl/present/util.h @@ -29,4 +29,15 @@ static inline OGLSampler CreateBilinearSampler() { return sampler; } +static inline OGLSampler CreateNearestNeighborSampler() { + OGLSampler sampler; + sampler.Create(); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + return sampler; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp new file mode 100644 index 0000000000..168fa1aea3 --- /dev/null +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp @@ -0,0 +1,128 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/window_adapt_pass.h" + +namespace OpenGL { + +namespace { +constexpr GLint PositionLocation = 0; +constexpr GLint TexCoordLocation = 1; +constexpr GLint ModelViewMatrixLocation = 0; + +struct ScreenRectVertex { + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} + + std::array position; + std::array tex_coord; +}; + +/** + * Defines a 1:1 pixel orthographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + */ +std::array MakeOrthographicMatrix(float width, float height) { + std::array matrix; // Laid out in column-major order + + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // Last matrix row is implicitly assumed to be [0, 0, 1]. + // clang-format on + + return matrix; +} +} // namespace + +WindowAdaptPass::WindowAdaptPass(const Device& device_, OGLSampler&& sampler_, + std::string_view frag_source) + : device(device_), sampler(std::move(sampler_)) { + vert = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + frag = CreateProgram(frag_source, GL_FRAGMENT_SHADER); + + // Generate VBO handle for drawing + vertex_buffer.Create(); + + // Attach vertex data to VAO + glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); + + // Query vertex buffer address when the driver supports unified vertex attributes + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } +} + +WindowAdaptPass::~WindowAdaptPass() = default; + +void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, GLuint texture, + const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop) { + glBindTextureUnit(0, texture); + + const std::array ortho_matrix = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); + + program_manager.BindPresentPrograms(vert.handle, frag.handle); + glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); + + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = screen.left; + const auto y = screen.top; + const auto w = screen.GetWidth(); + const auto h = screen.GetHeight(); + + const std::array vertices = { + ScreenRectVertex(x, y, crop.left, crop.top), + ScreenRectVertex(x + w, y, crop.right, crop.top), + ScreenRectVertex(x, y + h, crop.left, crop.bottom), + ScreenRectVertex(x + w, y + h, crop.right, crop.bottom), + }; + glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); + + glDisable(GL_FRAMEBUFFER_SRGB); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), + static_cast(layout.height)); + + glEnableVertexAttribArray(PositionLocation); + glEnableVertexAttribArray(TexCoordLocation); + glVertexAttribDivisor(PositionLocation, 0); + glVertexAttribDivisor(TexCoordLocation, 0); + glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, position)); + glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribBinding(PositionLocation, 0); + glVertexAttribBinding(TexCoordLocation, 0); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } + + glBindSampler(0, sampler.handle); + + // Update background color before drawing + glClearColor(Settings::values.bg_red.GetValue() / 255.0f, + Settings::values.bg_green.GetValue() / 255.0f, + Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.h b/src/video_core/renderer_opengl/present/window_adapt_pass.h new file mode 100644 index 0000000000..65dcd09ffa --- /dev/null +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace OpenGL { + +class Device; +class ProgramManager; + +class WindowAdaptPass final { +public: + explicit WindowAdaptPass(const Device& device, OGLSampler&& sampler, + std::string_view frag_source); + ~WindowAdaptPass(); + + void DrawToFramebuffer(ProgramManager& program_manager, GLuint texture, + const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop); + +private: + const Device& device; + OGLSampler sampler; + OGLProgram vert; + OGLProgram frag; + OGLBuffer vertex_buffer; + + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 3d75fd17a3..0d138c1897 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -113,6 +113,12 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } + + // Enable unified vertex attributes when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + } blit_screen = std::make_unique(rasterizer, device_memory, state_tracker, program_manager, device); } diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp index 7fd9ecd22d..1d1828a4c8 100644 --- a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp @@ -92,7 +92,9 @@ void WindowAdaptPass::Draw(Scheduler& scheduler, size_t image_index, VkImageView const VkFramebuffer host_framebuffer{*dst->framebuffer}; const VkRenderPass renderpass{*render_pass}; const VkPipeline graphics_pipeline{*pipeline}; + const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout}; const VkDescriptorSet descriptor_set{descriptor_sets[image_index]}; + const VkBuffer vertex_buffer{*buffer}; const VkExtent2D render_area{ .width = dst->width, .height = dst->height, @@ -134,8 +136,8 @@ void WindowAdaptPass::Draw(Scheduler& scheduler, size_t image_index, VkImageView cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + cmdbuf.BindVertexBuffer(0, vertex_buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, descriptor_set, {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2912aaff6a..a99ef08a5c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -222,6 +222,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .image = std::move(staging_image), .image_view = std::move(dst_view), .framebuffer = std::move(screenshot_fb), + .cmdbuf{}, + .render_ready{}, + .present_done{}, }; }(); From 9bdf09bd768f73073e9d1cbc65febfd7f7955db3 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 18 Jan 2024 11:44:13 -0500 Subject: [PATCH 12/15] renderer_vulkan: implement layer stack composition --- src/video_core/CMakeLists.txt | 3 + src/video_core/engines/maxwell_dma.h | 1 + .../opengl_present_scaleforce.frag | 12 +- .../host_shaders/present_bicubic.frag | 12 +- .../host_shaders/present_gaussian.frag | 12 +- .../host_shaders/vulkan_present.frag | 2 +- .../host_shaders/vulkan_present.vert | 37 +- .../vulkan_present_scaleforce_fp16.frag | 1 + .../vulkan_present_scaleforce_fp32.frag | 2 + .../renderer_vulkan/present/filters.cpp | 34 +- .../renderer_vulkan/present/filters.h | 24 +- .../renderer_vulkan/present/layer.cpp | 336 ++++++++++++ .../renderer_vulkan/present/layer.h | 92 ++++ .../present/present_push_constants.h | 34 ++ .../renderer_vulkan/present/util.cpp | 31 +- src/video_core/renderer_vulkan/present/util.h | 6 +- .../present/window_adapt_pass.cpp | 507 +++--------------- .../present/window_adapt_pass.h | 29 +- .../renderer_vulkan/renderer_vulkan.cpp | 8 +- .../renderer_vulkan/renderer_vulkan.h | 2 +- .../renderer_vulkan/vk_blit_screen.cpp | 378 ++----------- .../renderer_vulkan/vk_blit_screen.h | 59 +- 22 files changed, 666 insertions(+), 956 deletions(-) create mode 100644 src/video_core/renderer_vulkan/present/layer.cpp create mode 100644 src/video_core/renderer_vulkan/present/layer.h create mode 100644 src/video_core/renderer_vulkan/present/present_push_constants.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c6b0d628d1..7526de699d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -174,6 +174,9 @@ add_library(video_core STATIC renderer_vulkan/present/fsr.h renderer_vulkan/present/fxaa.cpp renderer_vulkan/present/fxaa.h + renderer_vulkan/present/layer.cpp + renderer_vulkan/present/layer.h + renderer_vulkan/present/present_push_constants.h renderer_vulkan/present/smaa.cpp renderer_vulkan/present/smaa.h renderer_vulkan/present/util.cpp diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 1a43e24b6b..99341e431c 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -8,6 +8,7 @@ #include #include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/scratch_buffer.h" #include "video_core/engines/engine_interface.h" diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag index a780373e34..1598575a1c 100644 --- a/src/video_core/host_shaders/opengl_present_scaleforce.frag +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -26,21 +26,11 @@ #endif -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout (location = 0) in vec2 tex_coord; layout (location = 0) out vec4 frag_color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; +layout (binding = 0) uniform sampler2D input_texture; const bool ignore_alpha = true; diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index c57dd28518..c814629cf1 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -3,22 +3,12 @@ #version 460 core -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout (binding = 0) uniform sampler2D color_texture; vec4 cubic(float v) { vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index 5f54b71b60..ad9bb76a49 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -7,21 +7,11 @@ #version 460 core -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout(location = 0) in vec2 frag_tex_coord; layout(location = 0) out vec4 color; -layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout(binding = 0) uniform sampler2D color_texture; const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); diff --git a/src/video_core/host_shaders/vulkan_present.frag b/src/video_core/host_shaders/vulkan_present.frag index 97e098cedc..adada94115 100644 --- a/src/video_core/host_shaders/vulkan_present.frag +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -7,7 +7,7 @@ layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; -layout (binding = 1) uniform sampler2D color_texture; +layout (binding = 0) uniform sampler2D color_texture; void main() { color = texture(color_texture, frag_tex_coord); diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert index 89dc80468d..249c9675a2 100644 --- a/src/video_core/host_shaders/vulkan_present.vert +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -3,16 +3,37 @@ #version 460 core -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; - layout (location = 0) out vec2 frag_tex_coord; -layout (set = 0, binding = 0) uniform MatrixBlock { - mat4 modelview_matrix; +struct ScreenRectVertex { + vec2 position; + vec2 tex_coord; }; -void main() { - gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); - frag_tex_coord = vert_tex_coord; +layout (push_constant) uniform PushConstants { + mat4 modelview_matrix; + ScreenRectVertex vertices[4]; +}; + +// Vulkan spec 15.8.1: +// Any member of a push constant block that is declared as an +// array must only be accessed with dynamically uniform indices. +ScreenRectVertex GetVertex(int index) { + switch (index) { + case 0: + default: + return vertices[0]; + case 1: + return vertices[1]; + case 2: + return vertices[2]; + case 3: + return vertices[3]; + } +} + +void main() { + ScreenRectVertex vertex = GetVertex(gl_VertexIndex); + gl_Position = modelview_matrix * vec4(vertex.position, 0.0, 1.0); + frag_tex_coord = vertex.tex_coord; } diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag index 3dc9c0df5a..79ea817c2f 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -5,6 +5,7 @@ #extension GL_GOOGLE_include_directive : enable +#define VERSION 1 #define YUZU_USE_FP16 #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag index 77ed075528..9605bb58bd 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -5,4 +5,6 @@ #extension GL_GOOGLE_include_directive : enable +#define VERSION 1 + #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp index ee6239cc41..b5e08938e7 100644 --- a/src/video_core/renderer_vulkan/present/filters.cpp +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -27,43 +27,29 @@ vk::ShaderModule SelectScaleForceShader(const Device& device) { } // Anonymous namespace -std::unique_ptr MakeNearestNeighbor(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format) { - return std::make_unique(device, memory_allocator, image_count, frame_format, +std::unique_ptr MakeNearestNeighbor(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateNearestNeighborSampler(device), BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); } -std::unique_ptr MakeBilinear(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format) { - return std::make_unique(device, memory_allocator, image_count, frame_format, - CreateBilinearSampler(device), +std::unique_ptr MakeBilinear(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); } -std::unique_ptr MakeBicubic(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format) { - return std::make_unique(device, memory_allocator, image_count, frame_format, - CreateBilinearSampler(device), +std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); } -std::unique_ptr MakeGaussian(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format) { - return std::make_unique(device, memory_allocator, image_count, frame_format, - CreateBilinearSampler(device), +std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); } -std::unique_ptr MakeScaleForce(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format) { - return std::make_unique(device, memory_allocator, image_count, frame_format, - CreateBilinearSampler(device), +std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), SelectScaleForceShader(device)); } diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h index 42d7052da7..6c83726dd4 100644 --- a/src/video_core/renderer_vulkan/present/filters.h +++ b/src/video_core/renderer_vulkan/present/filters.h @@ -7,24 +7,12 @@ namespace Vulkan { -std::unique_ptr MakeNearestNeighbor(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format); +class MemoryAllocator; -std::unique_ptr MakeBilinear(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format); - -std::unique_ptr MakeBicubic(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format); - -std::unique_ptr MakeGaussian(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format); - -std::unique_ptr MakeScaleForce(const Device& device, - const MemoryAllocator& memory_allocator, - size_t image_count, VkFormat frame_format); +std::unique_ptr MakeNearestNeighbor(const Device& device, VkFormat frame_format); +std::unique_ptr MakeBilinear(const Device& device, VkFormat frame_format); +std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format); +std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format); +std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp new file mode 100644 index 0000000000..cfc04be44d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -0,0 +1,336 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/vk_rasterizer.h" + +#include "common/settings.h" +#include "video_core/framebuffer_config.h" +#include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/layer.h" +#include "video_core/renderer_vulkan/present/present_push_constants.h" +#include "video_core/renderer_vulkan/present/smaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/textures/decoders.h" + +namespace Vulkan { + +namespace { + +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { + using namespace VideoCore::Surface; + return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); +} + +std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { + return static_cast(framebuffer.stride) * + static_cast(framebuffer.height) * GetBytesPerPixel(framebuffer); +} + +VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { + switch (framebuffer.pixel_format) { + case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case Service::android::PixelFormat::Rgb565: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + case Service::android::PixelFormat::Bgra8888: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + static_cast(framebuffer.pixel_format)); + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + } +} + +} // Anonymous namespace + +Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, + Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_, + VkExtent2D output_size, VkDescriptorSetLayout layout) + : device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_), + device_memory(device_memory_), image_count(image_count_) { + CreateDescriptorPool(); + CreateDescriptorSets(layout); + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + CreateFSR(output_size); + } +} + +Layer::~Layer() { + ReleaseRawImages(); +} + +void Layer::ConfigureDraw(PresentPushConstants* out_push_constants, + VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer, + VkSampler sampler, size_t image_index, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + const auto texture_info = rasterizer.AccelerateDisplay( + framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); + const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; + const u32 texture_height = texture_info ? texture_info->height : framebuffer.height; + const u32 scaled_width = texture_info ? texture_info->scaled_width : texture_width; + const u32 scaled_height = texture_info ? texture_info->scaled_height : texture_height; + const bool use_accelerated = texture_info.has_value(); + + RefreshResources(framebuffer); + SetAntiAliasPass(); + + // Finish any pending renderpass + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Wait(resource_ticks[image_index]); + SCOPE_EXIT({ resource_ticks[image_index] = scheduler.CurrentTick(); }); + + if (!use_accelerated) { + UpdateRawImage(framebuffer, image_index); + } + + VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index]; + VkImageView source_image_view = + texture_info ? texture_info->image_view : *raw_image_views[image_index]; + + anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view); + + auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); + const VkExtent2D render_extent{ + .width = scaled_width, + .height = scaled_height, + }; + + if (fsr) { + source_image_view = fsr->Draw(scheduler, image_index, source_image, source_image_view, + render_extent, crop_rect); + crop_rect = {0, 0, 1, 1}; + } + + SetMatrixData(*out_push_constants, layout); + SetVertexData(*out_push_constants, layout, crop_rect); + + UpdateDescriptorSet(source_image_view, sampler, image_index); + *out_descriptor_set = descriptor_sets[image_index]; +} + +void Layer::CreateDescriptorPool() { + descriptor_pool = CreateWrappedDescriptorPool(device, image_count, image_count); +} + +void Layer::CreateDescriptorSets(VkDescriptorSetLayout layout) { + const std::vector layouts(image_count, layout); + descriptor_sets = CreateWrappedDescriptorSets(descriptor_pool, layouts); +} + +void Layer::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = CalculateBufferSize(framebuffer), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); +} + +void Layer::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { + const auto format = GetFormat(framebuffer); + resource_ticks.resize(image_count); + raw_images.resize(image_count); + raw_image_views.resize(image_count); + + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = + CreateWrappedImage(memory_allocator, {framebuffer.width, framebuffer.height}, format); + raw_image_views[i] = CreateWrappedImageView(device, raw_images[i], format); + } +} + +void Layer::CreateFSR(VkExtent2D output_size) { + fsr = std::make_unique(device, memory_allocator, image_count, output_size); +} + +void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { + if (framebuffer.width == raw_width && framebuffer.height == raw_height && + framebuffer.pixel_format == pixel_format && !raw_images.empty()) { + return; + } + + raw_width = framebuffer.width; + raw_height = framebuffer.height; + pixel_format = framebuffer.pixel_format; + anti_alias.reset(); + + ReleaseRawImages(); + CreateStagingBuffer(framebuffer); + CreateRawImages(framebuffer); +} + +void Layer::SetAntiAliasPass() { + if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) { + return; + } + + anti_alias_setting = Settings::values.anti_aliasing.GetValue(); + + const VkExtent2D render_area{ + .width = Settings::values.resolution_info.ScaleUp(raw_width), + .height = Settings::values.resolution_info.ScaleUp(raw_height), + }; + + switch (anti_alias_setting) { + case Settings::AntiAliasing::Fxaa: + anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + break; + case Settings::AntiAliasing::Smaa: + anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + break; + default: + anti_alias = std::make_unique(); + break; + } +} + +void Layer::ReleaseRawImages() { + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); + } + raw_images.clear(); + buffer.reset(); +} + +u64 Layer::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { + return GetSizeInBytes(framebuffer) * image_count; +} + +u64 Layer::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, + size_t image_index) const { + return GetSizeInBytes(framebuffer) * image_index; +} + +void Layer::SetMatrixData(PresentPushConstants& data, + const Layout::FramebufferLayout& layout) const { + data.modelview_matrix = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); +} + +void Layer::SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop) const { + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = static_cast(screen.left); + const auto y = static_cast(screen.top); + const auto w = static_cast(screen.GetWidth()); + const auto h = static_cast(screen.GetHeight()); + + data.vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); + data.vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); + data.vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); + data.vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); +} + +void Layer::UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index) { + const VkDescriptorImageInfo image_info{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write}, {}); +} + +void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index) { + const std::span mapped_span = buffer.Mapped(); + + const u64 image_offset = GetRawImageOffset(framebuffer, image_index); + + const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; + const u8* const host_ptr = device_memory.GetPointer(framebuffer_addr); + + // TODO(Rodrigo): Read this from HLE + constexpr u32 block_height_log2 = 4; + const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + const u64 linear_size{GetSizeInBytes(framebuffer)}; + const u64 tiled_size{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + Tegra::Texture::UnswizzleTexture( + mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + + const VkBufferImageCopy copy{ + .bufferOffset = image_offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + }; + scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + read_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + write_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, write_barrier); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/layer.h b/src/video_core/renderer_vulkan/present/layer.h new file mode 100644 index 0000000000..88d43fc5f6 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/layer.h @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace Service::android { +enum class PixelFormat : u32; +} + +namespace Settings { +enum class AntiAliasing : u32; +} + +namespace Vulkan { + +class AntiAliasPass; +class Device; +class FSR; +class MemoryAllocator; +struct PresentPushConstants; +class RasterizerVulkan; +class Scheduler; + +class Layer final { +public: + explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, + Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count, + VkExtent2D output_size, VkDescriptorSetLayout layout); + ~Layer(); + + void ConfigureDraw(PresentPushConstants* out_push_constants, + VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer, + VkSampler sampler, size_t image_index, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); + +private: + void CreateDescriptorPool(); + void CreateDescriptorSets(VkDescriptorSetLayout layout); + void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); + void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); + void CreateFSR(VkExtent2D output_size); + + void RefreshResources(const Tegra::FramebufferConfig& framebuffer); + void SetAntiAliasPass(); + void ReleaseRawImages(); + + u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; + u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, size_t image_index) const; + + void SetMatrixData(PresentPushConstants& data, const Layout::FramebufferLayout& layout) const; + void SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle& crop) const; + void UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index); + void UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index); + +private: + const Device& device; + MemoryAllocator& memory_allocator; + Scheduler& scheduler; + Tegra::MaxwellDeviceMemoryManager& device_memory; + const size_t image_count{}; + vk::DescriptorPool descriptor_pool{}; + vk::DescriptorSets descriptor_sets{}; + + vk::Buffer buffer{}; + std::vector raw_images{}; + std::vector raw_image_views{}; + u32 raw_width{}; + u32 raw_height{}; + Service::android::PixelFormat pixel_format{}; + + Settings::AntiAliasing anti_alias_setting{}; + std::unique_ptr anti_alias{}; + + std::unique_ptr fsr{}; + std::vector resource_ticks{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/present_push_constants.h b/src/video_core/renderer_vulkan/present/present_push_constants.h new file mode 100644 index 0000000000..f1949e7aaa --- /dev/null +++ b/src/video_core/renderer_vulkan/present/present_push_constants.h @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_types.h" + +namespace Vulkan { + +struct ScreenRectVertex { + ScreenRectVertex() = default; + explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} + + std::array position; + std::array tex_coord; +}; + +static inline std::array MakeOrthographicMatrix(f32 width, f32 height) { + // clang-format off + return { 2.f / width, 0.f, 0.f, 0.f, + 0.f, 2.f / height, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + -1.f, -1.f, 0.f, 1.f}; + // clang-format on +} + +struct PresentPushConstants { + std::array modelview_matrix; + std::array vertices; +}; + +static_assert(sizeof(PresentPushConstants) <= 128, "Push constants are too large"); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 9c08ac6134..7bff1c436a 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -113,16 +113,18 @@ vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkF }); } -vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format) { +vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format, + VkImageLayout initial_layout) { const VkAttachmentDescription attachment{ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, .format = format, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .loadOp = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ? VK_ATTACHMENT_LOAD_OP_DONT_CARE + : VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .initialLayout = initial_layout, .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -244,8 +246,7 @@ vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout( .binding = static_cast(i), .descriptorType = std::data(types)[i], .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | - VK_SHADER_STAGE_COMPUTE_BIT, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = nullptr, }; } @@ -285,7 +286,8 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, - std::tuple shaders) { + std::tuple shaders, + bool enable_blending) { const std::array shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -363,7 +365,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .alphaToOneEnable = VK_FALSE, }; - constexpr VkPipelineColorBlendAttachmentState color_blend_attachment{ + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{ .blendEnable = VK_FALSE, .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, @@ -375,6 +377,18 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, }; + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, @@ -382,7 +396,8 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, .attachmentCount = 1, - .pAttachments = &color_blend_attachment, + .pAttachments = + enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled, .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }; diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index 2f3a538faf..fb4e4a8e46 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -20,7 +20,8 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image); vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format); -vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format); +vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format, + VkImageLayout initial_layout = VK_IMAGE_LAYOUT_GENERAL); vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& render_pass, vk::ImageView& dest_image, VkExtent2D extent); vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter = VK_FILTER_LINEAR); @@ -37,7 +38,8 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, vk::DescriptorSetLayout& layout); vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, - std::tuple shaders); + std::tuple shaders, + bool enable_blending = false); VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector& images, VkSampler sampler, VkImageView view, VkDescriptorSet set, u32 binding); diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp index 1d1828a4c8..c5db0230d9 100644 --- a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp @@ -1,10 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include - #include "core/frontend/framebuffer_layout.h" +#include "video_core/framebuffer_config.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" +#include "video_core/renderer_vulkan/present/layer.h" +#include "video_core/renderer_vulkan/present/present_push_constants.h" #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/present/window_adapt_pass.h" #include "video_core/renderer_vulkan/vk_present_manager.h" @@ -14,501 +15,123 @@ namespace Vulkan { -namespace { - -struct ScreenRectVertex { - ScreenRectVertex() = default; - explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} - - std::array position; - std::array tex_coord; - - static VkVertexInputBindingDescription GetDescription() { - return { - .binding = 0, - .stride = sizeof(ScreenRectVertex), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }; - } - - static std::array GetAttributes() { - return {{ - { - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, position), - }, - { - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, tex_coord), - }, - }}; - } -}; - -std::array MakeOrthographicMatrix(f32 width, f32 height) { - // clang-format off - return { 2.f / width, 0.f, 0.f, 0.f, - 0.f, 2.f / height, 0.f, 0.f, - 0.f, 0.f, 1.f, 0.f, - -1.f, -1.f, 0.f, 1.f}; - // clang-format on -} - -} // Anonymous namespace - -struct WindowAdaptPass::BufferData { - struct { - std::array modelview_matrix; - } uniform; - - std::array vertices; -}; - -WindowAdaptPass::WindowAdaptPass(const Device& device_, const MemoryAllocator& memory_allocator, - size_t num_images, VkFormat frame_format, vk::Sampler&& sampler_, - vk::ShaderModule&& fragment_shader_) +WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format, + vk::Sampler&& sampler_, vk::ShaderModule&& fragment_shader_) : device(device_), sampler(std::move(sampler_)), fragment_shader(std::move(fragment_shader_)) { - CreateDescriptorPool(num_images); CreateDescriptorSetLayout(); - CreateDescriptorSets(num_images); CreatePipelineLayout(); CreateVertexShader(); CreateRenderPass(frame_format); CreatePipeline(); - CreateBuffer(memory_allocator); } WindowAdaptPass::~WindowAdaptPass() = default; -void WindowAdaptPass::Draw(Scheduler& scheduler, size_t image_index, VkImageView src_image_view, - VkExtent2D src_image_extent, const Common::Rectangle& crop_rect, +void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index, + std::list& layers, + std::span configs, const Layout::FramebufferLayout& layout, Frame* dst) { - ConfigureLayout(image_index, src_image_view, layout, crop_rect); const VkFramebuffer host_framebuffer{*dst->framebuffer}; const VkRenderPass renderpass{*render_pass}; const VkPipeline graphics_pipeline{*pipeline}; const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout}; - const VkDescriptorSet descriptor_set{descriptor_sets[image_index]}; - const VkBuffer vertex_buffer{*buffer}; const VkExtent2D render_area{ .width = dst->width, .height = dst->height, }; + const size_t layer_count = configs.size(); + std::vector push_constants(layer_count); + std::vector descriptor_sets(layer_count); + + auto layer_it = layers.begin(); + for (size_t i = 0; i < layer_count; i++) { + layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler, + image_index, configs[i], layout); + layer_it++; + } + scheduler.Record([=](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + const VkClearAttachment clear_attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = 0, + .clearValue = + { + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }, }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = host_framebuffer, - .renderArea = + const VkClearRect clear_rect{ + .rect = { .offset = {0, 0}, .extent = render_area, }, - .clearValueCount = 1, - .pClearValues = &clear_color, + .baseArrayLayer = 0, + .layerCount = 1, }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast(render_area.width), - .height = static_cast(render_area.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = render_area, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + + BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area); + cmdbuf.ClearAttachments({clear_attachment}, {clear_rect}); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, vertex_buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, - descriptor_set, {}); - cmdbuf.Draw(4, 1, 0, 0); + for (size_t i = 0; i < layer_count; i++) { + cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, + push_constants[i]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, + descriptor_sets[i], {}); + cmdbuf.Draw(4, 1, 0, 0); + } + cmdbuf.EndRenderPass(); }); } +VkDescriptorSetLayout WindowAdaptPass::GetDescriptorSetLayout() { + return *descriptor_set_layout; +} + VkRenderPass WindowAdaptPass::GetRenderPass() { return *render_pass; } -void WindowAdaptPass::CreateDescriptorPool(size_t num_images) { - const std::array pool_sizes{{ - { - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = static_cast(num_images), - }, - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast(num_images), - }, - }}; - - const VkDescriptorPoolCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast(num_images), - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); -} - void WindowAdaptPass::CreateDescriptorSetLayout() { - const std::array layout_bindings{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = nullptr, - }, - }}; - - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(layout_bindings.size()), - .pBindings = layout_bindings.data(), - }; - - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); + descriptor_set_layout = + CreateWrappedDescriptorSetLayout(device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); } -void WindowAdaptPass::CreateDescriptorSets(size_t num_images) { - const std::vector layouts(num_images, *descriptor_set_layout); - descriptor_sets = CreateWrappedDescriptorSets(descriptor_pool, layouts); -} +void WindowAdaptPass::CreatePipelineLayout() { + const VkPushConstantRange range{ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = sizeof(PresentPushConstants), + }; -void WindowAdaptPass::CreateBuffer(const MemoryAllocator& memory_allocator) { - const VkBufferCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + pipeline_layout = device.GetLogical().CreatePipelineLayout(VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = sizeof(BufferData), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - - buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); -} - -void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) { - const VkAttachmentDescription color_attachment{ - .flags = 0, - .format = frame_format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkAttachmentReference color_attachment_ref{ - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = 1, - .pColorAttachments = &color_attachment_ref, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - const VkSubpassDependency dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dependencyFlags = 0, - }; - - const VkRenderPassCreateInfo renderpass_ci{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = 1, - .pAttachments = &color_attachment, - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &dependency, - }; - - render_pass = device.GetLogical().CreateRenderPass(renderpass_ci); + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 1, + .pPushConstantRanges = &range, + }); } void WindowAdaptPass::CreateVertexShader() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); } -void WindowAdaptPass::CreatePipelineLayout() { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); -} - -void WindowAdaptPass::SetUniformData(BufferData& data, - const Layout::FramebufferLayout& layout) const { - data.uniform.modelview_matrix = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); -} - -void WindowAdaptPass::SetVertexData(BufferData& data, const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop) const { - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = static_cast(screen.left); - const auto y = static_cast(screen.top); - const auto w = static_cast(screen.GetWidth()); - const auto h = static_cast(screen.GetHeight()); - - data.vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); - data.vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); - data.vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); - data.vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); -} - -void WindowAdaptPass::UpdateDescriptorSet(size_t image_index, VkImageView image_view) { - const VkDescriptorBufferInfo buffer_info{ - .buffer = *buffer, - .offset = offsetof(BufferData, uniform), - .range = sizeof(BufferData::uniform), - }; - - const VkWriteDescriptorSet ubo_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .pImageInfo = nullptr, - .pBufferInfo = &buffer_info, - .pTexelBufferView = nullptr, - }; - - const VkDescriptorImageInfo image_info{ - .sampler = *sampler, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); -} - -void WindowAdaptPass::ConfigureLayout(size_t image_index, VkImageView image_view, - const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop_rect) { - BufferData data; - SetUniformData(data, layout); - SetVertexData(data, layout, crop_rect); - - const std::span mapped_span = buffer.Mapped(); - std::memcpy(mapped_span.data(), &data, sizeof(data)); - - UpdateDescriptorSet(image_index, image_view); +void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) { + render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED); } void WindowAdaptPass::CreatePipeline() { - const std::array shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const auto vertex_binding_description = ScreenRectVertex::GetDescription(); - const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - - const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &vertex_binding_description, - .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, - .pVertexAttributeDescriptions = vertex_attrs_description.data(), - }; - - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = VK_FALSE, - }; - - const VkPipelineViewportStateCreateInfo viewport_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = 1, - .pViewports = nullptr, - .scissorCount = 1, - .pScissors = nullptr, - }; - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisampling_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineColorBlendAttachmentState color_blend_attachment{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment, - .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, - }; - - static constexpr std::array dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkGraphicsPipelineCreateInfo pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *render_pass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); + pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout, + std::tie(vertex_shader, fragment_shader), false); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.h b/src/video_core/renderer_vulkan/present/window_adapt_pass.h index 5309233a2c..0e2edfc312 100644 --- a/src/video_core/renderer_vulkan/present/window_adapt_pass.h +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/math_util.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,54 +20,39 @@ namespace Vulkan { class Device; struct Frame; -class MemoryAllocator; +class Layer; class Scheduler; +class RasterizerVulkan; class WindowAdaptPass final { public: - explicit WindowAdaptPass(const Device& device, const MemoryAllocator& memory_allocator, - size_t num_images, VkFormat frame_format, vk::Sampler&& sampler, + explicit WindowAdaptPass(const Device& device, VkFormat frame_format, vk::Sampler&& sampler, vk::ShaderModule&& fragment_shader); ~WindowAdaptPass(); - void Draw(Scheduler& scheduler, size_t image_index, VkImageView src_image_view, - VkExtent2D src_image_extent, const Common::Rectangle& crop_rect, + void Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index, + std::list& layers, std::span configs, const Layout::FramebufferLayout& layout, Frame* dst); + VkDescriptorSetLayout GetDescriptorSetLayout(); VkRenderPass GetRenderPass(); private: - struct BufferData; - - void SetUniformData(BufferData& data, const Layout::FramebufferLayout& layout) const; - void SetVertexData(BufferData& data, const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop_rect) const; - void UpdateDescriptorSet(size_t image_index, VkImageView image_view); - void ConfigureLayout(size_t image_index, VkImageView image_view, - const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop_rect); - - void CreateDescriptorPool(size_t num_images); void CreateDescriptorSetLayout(); - void CreateDescriptorSets(size_t num_images); void CreatePipelineLayout(); void CreateVertexShader(); void CreateRenderPass(VkFormat frame_format); void CreatePipeline(); - void CreateBuffer(const MemoryAllocator& memory_allocator); private: const Device& device; - vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; - vk::DescriptorSets descriptor_sets; vk::PipelineLayout pipeline_layout; vk::Sampler sampler; vk::ShaderModule vertex_shader; vk::ShaderModule fragment_shader; vk::RenderPass render_pass; vk::Pipeline pipeline; - vk::Buffer buffer; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index a99ef08a5c..77837added 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -125,9 +125,9 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { return; } - RenderScreenshot(*framebuffer); + RenderScreenshot(framebuffer); Frame* frame = present_manager.GetRenderFrame(); - blit_swapchain.DrawToFrame(rasterizer, frame, *framebuffer, + blit_swapchain.DrawToFrame(rasterizer, frame, std::span(framebuffer, 1), render_window.GetFramebufferLayout(), swapchain.GetImageCount(), swapchain.GetImageViewFormat()); scheduler.Flush(*frame->render_ready); @@ -163,7 +163,7 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer) { +void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { if (!renderer_settings.screenshot_requested) { return; } @@ -228,7 +228,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr }; }(); - blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffer, layout, 1, + blit_screenshot.DrawToFrame(rasterizer, &frame, std::span(framebuffer, 1), layout, 1, VK_FORMAT_B8G8R8A8_UNORM); const auto buffer_size = static_cast(layout.width * layout.height * 4); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 5b0560e684..bdeb43a54b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -59,7 +59,7 @@ public: private: void Report() const; - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); + void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 8d01ec9fce..b2dcbf80b0 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -1,65 +1,15 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/settings.h" -#include "core/core.h" -#include "core/frontend/emu_window.h" -#include "video_core/gpu.h" -#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/framebuffer_config.h" #include "video_core/renderer_vulkan/present/filters.h" -#include "video_core/renderer_vulkan/present/fsr.h" -#include "video_core/renderer_vulkan/present/fxaa.h" -#include "video_core/renderer_vulkan/present/smaa.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/present/layer.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { - -u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { - using namespace VideoCore::Surface; - return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); -} - -std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { - return static_cast(framebuffer.stride) * - static_cast(framebuffer.height) * GetBytesPerPixel(framebuffer); -} - -VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { - switch (framebuffer.pixel_format) { - case Service::android::PixelFormat::Rgba8888: - case Service::android::PixelFormat::Rgbx8888: - return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Service::android::PixelFormat::Rgb565: - return VK_FORMAT_R5G6B5_UNORM_PACK16; - case Service::android::PixelFormat::Bgra8888: - return VK_FORMAT_B8G8R8A8_UNORM; - default: - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast(framebuffer.pixel_format)); - return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - } -} - -} // Anonymous namespace - BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, MemoryAllocator& memory_allocator_, PresentManager& present_manager_, Scheduler& scheduler_) @@ -75,194 +25,35 @@ void BlitScreen::WaitIdle() { device.GetLogical().WaitIdle(); } -void BlitScreen::SetWindowAdaptPass(const Layout::FramebufferLayout& layout) { +void BlitScreen::SetWindowAdaptPass() { + layers.clear(); scaling_filter = Settings::values.scaling_filter.GetValue(); - const VkExtent2D adapt_size{ - .width = layout.screen.GetWidth(), - .height = layout.screen.GetHeight(), - }; - - fsr.reset(); - switch (scaling_filter) { case Settings::ScalingFilter::NearestNeighbor: - window_adapt = - MakeNearestNeighbor(device, memory_allocator, image_count, swapchain_view_format); + window_adapt = MakeNearestNeighbor(device, swapchain_view_format); break; case Settings::ScalingFilter::Bicubic: - window_adapt = MakeBicubic(device, memory_allocator, image_count, swapchain_view_format); + window_adapt = MakeBicubic(device, swapchain_view_format); break; case Settings::ScalingFilter::Gaussian: - window_adapt = MakeGaussian(device, memory_allocator, image_count, swapchain_view_format); + window_adapt = MakeGaussian(device, swapchain_view_format); break; case Settings::ScalingFilter::ScaleForce: - window_adapt = MakeScaleForce(device, memory_allocator, image_count, swapchain_view_format); + window_adapt = MakeScaleForce(device, swapchain_view_format); break; case Settings::ScalingFilter::Fsr: - fsr = std::make_unique(device, memory_allocator, image_count, adapt_size); - [[fallthrough]]; case Settings::ScalingFilter::Bilinear: default: - window_adapt = MakeBilinear(device, memory_allocator, image_count, swapchain_view_format); + window_adapt = MakeBilinear(device, swapchain_view_format); break; } } -void BlitScreen::SetAntiAliasPass() { - if (anti_alias && anti_aliasing == Settings::values.anti_aliasing.GetValue()) { - return; - } - - anti_aliasing = Settings::values.anti_aliasing.GetValue(); - - const VkExtent2D render_area{ - .width = Settings::values.resolution_info.ScaleUp(raw_width), - .height = Settings::values.resolution_info.ScaleUp(raw_height), - }; - - switch (anti_aliasing) { - case Settings::AntiAliasing::Fxaa: - anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); - break; - case Settings::AntiAliasing::Smaa: - anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); - break; - default: - anti_alias = std::make_unique(); - break; - } -} - -void BlitScreen::Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout, Frame* dst) { - const auto texture_info = rasterizer.AccelerateDisplay( - framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); - const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; - const u32 texture_height = texture_info ? texture_info->height : framebuffer.height; - const u32 scaled_width = texture_info ? texture_info->scaled_width : texture_width; - const u32 scaled_height = texture_info ? texture_info->scaled_height : texture_height; - const bool use_accelerated = texture_info.has_value(); - - RefreshResources(framebuffer); - SetAntiAliasPass(); - - // Finish any pending renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - scheduler.Wait(resource_ticks[image_index]); - SCOPE_EXIT({ resource_ticks[image_index] = scheduler.CurrentTick(); }); - - VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index]; - VkImageView source_image_view = - texture_info ? texture_info->image_view : *raw_image_views[image_index]; - - const std::span mapped_span = buffer.Mapped(); - - if (!use_accelerated) { - const u64 image_offset = GetRawImageOffset(framebuffer); - - const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const u8* const host_ptr = device_memory.GetPointer(framebuffer_addr); - - // TODO(Rodrigo): Read this from HLE - constexpr u32 block_height_log2 = 4; - const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); - const u64 linear_size{GetSizeInBytes(framebuffer)}; - const u64 tiled_size{Tegra::Texture::CalculateSize(true, bytes_per_pixel, - framebuffer.stride, framebuffer.height, - 1, block_height_log2, 0)}; - Tegra::Texture::UnswizzleTexture( - mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), - bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); - - const VkBufferImageCopy copy{ - .bufferOffset = image_offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {.x = 0, .y = 0, .z = 0}, - .imageExtent = - { - .width = framebuffer.width, - .height = framebuffer.height, - .depth = 1, - }, - }; - scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { - const VkImage image = *raw_images[index]; - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - read_barrier); - cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, write_barrier); - }); - } - - anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view); - - const auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); - const VkExtent2D render_extent{ - .width = scaled_width, - .height = scaled_height, - }; - - if (fsr) { - const VkExtent2D adapt_size{ - .width = layout.screen.GetWidth(), - .height = layout.screen.GetHeight(), - }; - - source_image_view = fsr->Draw(scheduler, image_index, source_image, source_image_view, - render_extent, crop_rect); - - const Common::Rectangle output_crop{0, 0, 1, 1}; - window_adapt->Draw(scheduler, image_index, source_image_view, adapt_size, output_crop, - layout, dst); - } else { - window_adapt->Draw(scheduler, image_index, source_image_view, render_extent, crop_rect, - layout, dst); - } -} - void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, - const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout, size_t swapchain_images, + std::span framebuffers, + const Layout::FramebufferLayout& layout, + size_t current_swapchain_image_count, VkFormat current_swapchain_view_format) { bool resource_update_required = false; bool presentation_recreate_required = false; @@ -272,11 +63,10 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, resource_update_required = true; } - // Recreate dynamic resources if the the image count or input format changed - const VkFormat old_framebuffer_format = - std::exchange(framebuffer_view_format, GetFormat(framebuffer)); - if (swapchain_images != image_count || old_framebuffer_format != framebuffer_view_format) { - image_count = swapchain_images; + // Recreate dynamic resources if the image count changed + const size_t old_swapchain_image_count = + std::exchange(image_count, current_swapchain_image_count); + if (old_swapchain_image_count != current_swapchain_image_count) { resource_update_required = true; } @@ -294,11 +84,8 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, // Wait for idle to ensure no resources are in use WaitIdle(); - // Set new number of resource ticks - resource_ticks.resize(swapchain_images); - // Update window adapt pass - SetWindowAdaptPass(layout); + SetWindowAdaptPass(); // Update frame format if needed if (presentation_recreate_required) { @@ -307,7 +94,21 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, } } - Draw(rasterizer, framebuffer, layout, frame); + // Add additional layers if needed + const VkExtent2D window_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), + }; + + while (layers.size() < framebuffers.size()) { + layers.emplace_back(device, memory_allocator, scheduler, device_memory, image_count, + window_size, window_adapt->GetDescriptorSetLayout()); + } + + // Perform the draw + window_adapt->Draw(rasterizer, scheduler, image_index, layers, framebuffers, layout, frame); + + // Advance to next image if (++image_index >= image_count) { image_index = 0; } @@ -321,7 +122,7 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& l if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() || format_updated) { WaitIdle(); - SetWindowAdaptPass(layout); + SetWindowAdaptPass(); } const VkExtent2D extent{ .width = layout.width, @@ -345,115 +146,4 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkE }); } -void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { - if (framebuffer.width == raw_width && framebuffer.height == raw_height && - framebuffer.pixel_format == pixel_format && !raw_images.empty()) { - return; - } - - raw_width = framebuffer.width; - raw_height = framebuffer.height; - pixel_format = framebuffer.pixel_format; - anti_alias.reset(); - - ReleaseRawImages(); - CreateStagingBuffer(framebuffer); - CreateRawImages(framebuffer); -} - -void BlitScreen::ReleaseRawImages() { - for (const u64 tick : resource_ticks) { - scheduler.Wait(tick); - } - raw_images.clear(); - buffer.reset(); -} - -void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - const VkBufferCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = CalculateBufferSize(framebuffer), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - - buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); -} - -void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { - raw_images.resize(image_count); - raw_image_views.resize(image_count); - - const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, - u32 down_shift = 0) { - u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT - : VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : framebuffer_view_format, - .extent = - { - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - }; - const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { - return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : framebuffer_view_format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); - }; - - for (size_t i = 0; i < image_count; ++i) { - raw_images[i] = create_image(); - raw_image_views[i] = create_image_view(raw_images[i]); - } -} - -u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { - return GetSizeInBytes(framebuffer) * image_count; -} - -u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { - return GetSizeInBytes(framebuffer) * image_index; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 555b3d82ef..9a3476c779 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -3,10 +3,12 @@ #pragma once +#include #include #include "core/frontend/framebuffer_layout.h" #include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_vulkan/present/layer.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -14,32 +16,17 @@ namespace Core { class System; } -namespace Core::Frontend { -class EmuWindow; -} - namespace Tegra { struct FramebufferConfig; } -namespace VideoCore { -class RasterizerInterface; -} - -namespace Service::android { -enum class PixelFormat : u32; -} - namespace Settings { -enum class AntiAliasing : u32; enum class ScalingFilter : u32; } // namespace Settings namespace Vulkan { -class AntiAliasPass; class Device; -class FSR; class RasterizerVulkan; class Scheduler; class PresentManager; @@ -64,8 +51,8 @@ public: ~BlitScreen(); void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, - const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout, size_t swapchain_images, + std::span framebuffers, + const Layout::FramebufferLayout& layout, size_t current_swapchain_image_count, VkFormat current_swapchain_view_format); [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout, @@ -74,50 +61,22 @@ public: private: void WaitIdle(); - void SetWindowAdaptPass(const Layout::FramebufferLayout& layout); - void SetAntiAliasPass(); - - void Draw(RasterizerVulkan& rasterizer, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout, Frame* dst); - + void SetWindowAdaptPass(); vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, VkRenderPass render_pass); - void RefreshResources(const Tegra::FramebufferConfig& framebuffer); - void ReleaseRawImages(); - void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); - void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - - u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; - u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; - Tegra::MaxwellDeviceMemoryManager& device_memory; const Device& device; MemoryAllocator& memory_allocator; PresentManager& present_manager; Scheduler& scheduler; - std::size_t image_count; + std::size_t image_count{}; std::size_t image_index{}; + VkFormat swapchain_view_format{}; - vk::Buffer buffer; - - std::vector resource_ticks; - - std::vector raw_images; - std::vector raw_image_views; - u32 raw_width = 0; - u32 raw_height = 0; - - Service::android::PixelFormat pixel_format{}; - VkFormat framebuffer_view_format; - VkFormat swapchain_view_format; - - Settings::AntiAliasing anti_aliasing{}; Settings::ScalingFilter scaling_filter{}; - - std::unique_ptr fsr; - std::unique_ptr anti_alias; - std::unique_ptr window_adapt; + std::unique_ptr window_adapt{}; + std::list layers{}; }; } // namespace Vulkan From 10cf0585180bcf2eab38ebf65dc593fecc4ddf92 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 18 Jan 2024 20:47:50 -0500 Subject: [PATCH 13/15] renderer_opengl: implement layer stack composition --- src/video_core/CMakeLists.txt | 3 + .../renderer_opengl/gl_blit_screen.cpp | 198 +--------------- .../renderer_opengl/gl_blit_screen.h | 38 +--- .../renderer_opengl/present/layer.cpp | 215 ++++++++++++++++++ .../renderer_opengl/present/layer.h | 80 +++++++ .../present/present_uniforms.h | 43 ++++ .../present/window_adapt_pass.cpp | 91 +++----- .../present/window_adapt_pass.h | 14 +- .../renderer_opengl/renderer_opengl.cpp | 8 +- .../renderer_opengl/renderer_opengl.h | 2 +- 10 files changed, 402 insertions(+), 290 deletions(-) create mode 100644 src/video_core/renderer_opengl/present/layer.cpp create mode 100644 src/video_core/renderer_opengl/present/layer.h create mode 100644 src/video_core/renderer_opengl/present/present_uniforms.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 7526de699d..16c905db9c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -122,6 +122,9 @@ add_library(video_core STATIC renderer_opengl/present/fsr.h renderer_opengl/present/fxaa.cpp renderer_opengl/present/fxaa.h + renderer_opengl/present/layer.cpp + renderer_opengl/present/layer.h + renderer_opengl/present/present_uniforms.h renderer_opengl/present/smaa.cpp renderer_opengl/present/smaa.h renderer_opengl/present/util.h diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index f9dbef0fcd..6ba8b214be 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -1,18 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "video_core/framebuffer_config.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_blit_screen.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/present/filters.h" -#include "video_core/renderer_opengl/present/fsr.h" -#include "video_core/renderer_opengl/present/fxaa.h" -#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/renderer_opengl/present/layer.h" #include "video_core/renderer_opengl/present/window_adapt_pass.h" -#include "video_core/textures/decoders.h" namespace OpenGL { @@ -21,130 +15,12 @@ BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, StateTracker& state_tracker_, ProgramManager& program_manager_, Device& device_) : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), - program_manager(program_manager_), device(device_) { - // Allocate textures for the screen - framebuffer_texture.resource.Create(GL_TEXTURE_2D); - - const GLuint texture = framebuffer_texture.resource.handle; - glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); - - // Clear screen to black - const u8 framebuffer_data[4] = {0, 0, 0, 0}; - glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, - framebuffer_data); -} + program_manager(program_manager_), device(device_) {} BlitScreen::~BlitScreen() = default; -FramebufferTextureInfo BlitScreen::PrepareRenderTarget( - const Tegra::FramebufferConfig& framebuffer) { - // If framebuffer is provided, reload it from memory to a texture - if (framebuffer_texture.width != static_cast(framebuffer.width) || - framebuffer_texture.height != static_cast(framebuffer.height) || - framebuffer_texture.pixel_format != framebuffer.pixel_format || - gl_framebuffer_data.empty()) { - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(framebuffer); - } - - // Load the framebuffer from memory if needed - return LoadFBToScreenInfo(framebuffer); -} - -FramebufferTextureInfo BlitScreen::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { - const DAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - const auto accelerated_info = - rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); - if (accelerated_info) { - return *accelerated_info; - } - - // Reset the screen info's display texture to its own permanent texture - FramebufferTextureInfo info{}; - info.display_texture = framebuffer_texture.resource.handle; - info.width = framebuffer.width; - info.height = framebuffer.height; - info.scaled_width = framebuffer.width; - info.scaled_height = framebuffer.height; - - // TODO(Rodrigo): Read this from HLE - constexpr u32 block_height_log2 = 4; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - const u64 size_in_bytes{Tegra::Texture::CalculateSize( - true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; - const u8* const host_ptr{device_memory.GetPointer(framebuffer_addr)}; - const std::span input_data(host_ptr, size_in_bytes); - Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, - 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); - - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that - // they differ from the LCD resolution. - // TODO: Applications could theoretically crash yuzu here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, - framebuffer.height, framebuffer_texture.gl_format, - framebuffer_texture.gl_type, gl_framebuffer_data.data()); - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - return info; -} - -void BlitScreen::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { - framebuffer_texture.width = framebuffer.width; - framebuffer_texture.height = framebuffer.height; - framebuffer_texture.pixel_format = framebuffer.pixel_format; - - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * - bytes_per_pixel); - - GLint internal_format; - switch (framebuffer.pixel_format) { - case Service::android::PixelFormat::Rgba8888: - internal_format = GL_RGBA8; - framebuffer_texture.gl_format = GL_RGBA; - framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - break; - case Service::android::PixelFormat::Rgb565: - internal_format = GL_RGB565; - framebuffer_texture.gl_format = GL_RGB; - framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; - break; - default: - internal_format = GL_RGBA8; - framebuffer_texture.gl_format = GL_RGBA; - framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - // static_cast(framebuffer.pixel_format)); - break; - } - - framebuffer_texture.resource.Release(); - framebuffer_texture.resource.Create(GL_TEXTURE_2D); - glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, - framebuffer_texture.width, framebuffer_texture.height); - - fxaa.reset(); - smaa.reset(); -} - -void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, +void BlitScreen::DrawScreen(std::span framebuffers, const Layout::FramebufferLayout& layout) { - FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); - auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); - // TODO: Signal state tracker about these changes state_tracker.NotifyScreenDrawVertexArray(); state_tracker.NotifyPolygonModes(); @@ -163,7 +39,6 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, state_tracker.NotifyLogicOp(); state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); @@ -180,76 +55,17 @@ void BlitScreen::DrawScreen(const Tegra::FramebufferConfig& framebuffer, glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthRangeIndexed(0, 0.0, 0.0); - GLint old_read_fb; - GLint old_draw_fb; - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - - GLuint texture = info.display_texture; - - auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); - if (anti_aliasing != Settings::AntiAliasing::None) { - glEnablei(GL_SCISSOR_TEST, 0); - auto scissor_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); - auto viewport_width = static_cast(scissor_width); - auto scissor_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); - auto viewport_height = static_cast(scissor_height); - - glScissorIndexed(0, 0, 0, scissor_width, scissor_height); - glViewportIndexedf(0, 0.0f, 0.0f, viewport_width, viewport_height); - - switch (anti_aliasing) { - case Settings::AntiAliasing::Fxaa: - CreateFXAA(); - texture = fxaa->Draw(program_manager, info.display_texture); - break; - case Settings::AntiAliasing::Smaa: - default: - CreateSMAA(); - texture = smaa->Draw(program_manager, info.display_texture); - break; - } + while (layers.size() < framebuffers.size()) { + layers.emplace_back(rasterizer, device_memory); } - glDisablei(GL_SCISSOR_TEST, 0); - - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr || fsr->NeedsRecreation(layout.screen)) { - fsr = std::make_unique(layout.screen.GetWidth(), layout.screen.GetHeight()); - } - - texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); - crop = {0, 0, 1, 1}; - } - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - CreateWindowAdapt(); - window_adapt->DrawToFramebuffer(program_manager, texture, layout, crop); + window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout); // TODO // program_manager.RestoreGuestPipeline(); } -void BlitScreen::CreateFXAA() { - smaa.reset(); - if (!fxaa) { - fxaa = std::make_unique( - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - } -} - -void BlitScreen::CreateSMAA() { - fxaa.reset(); - if (!smaa) { - smaa = std::make_unique( - Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), - Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); - } -} - void BlitScreen::CreateWindowAdapt() { if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) { return; diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h index f42f89dee4..0c3d838f14 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.h +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -3,8 +3,9 @@ #pragma once +#include #include -#include +#include #include "core/hle/service/nvnflinger/pixel_format.h" #include "video_core/host1x/gpu_device_memory_manager.h" @@ -25,24 +26,12 @@ enum class ScalingFilter : u32; namespace OpenGL { class Device; -class FSR; -class FXAA; +class Layer; class ProgramManager; class RasterizerOpenGL; -class SMAA; class StateTracker; class WindowAdaptPass; -/// Structure used for storing information about the textures for the Switch screen -struct TextureInfo { - OGLTexture resource; - GLsizei width; - GLsizei height; - GLenum gl_format; - GLenum gl_type; - Service::android::PixelFormat pixel_format; -}; - /// Structure used for storing information about the display target for the Switch screen struct FramebufferTextureInfo { GLuint display_texture{}; @@ -60,20 +49,11 @@ public: Device& device); ~BlitScreen(); - void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); - /// Draws the emulated screens to the emulator window. - void DrawScreen(const Tegra::FramebufferConfig& framebuffer, + void DrawScreen(std::span framebuffers, const Layout::FramebufferLayout& layout); - /// Loads framebuffer from emulated memory into the active OpenGL texture. - FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); - - FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); - private: - void CreateFXAA(); - void CreateSMAA(); void CreateWindowAdapt(); RasterizerOpenGL& rasterizer; @@ -82,18 +62,10 @@ private: ProgramManager& program_manager; Device& device; - /// Display information for Switch screen - TextureInfo framebuffer_texture; - - std::unique_ptr fsr; - std::unique_ptr fxaa; - std::unique_ptr smaa; - Settings::ScalingFilter current_window_adapt{}; std::unique_ptr window_adapt; - /// OpenGL framebuffer data - std::vector gl_framebuffer_data; + std::list layers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp new file mode 100644 index 0000000000..8643e07c68 --- /dev/null +++ b/src/video_core/renderer_opengl/present/layer.cpp @@ -0,0 +1,215 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/framebuffer_config.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/fxaa.h" +#include "video_core/renderer_opengl/present/layer.h" +#include "video_core/renderer_opengl/present/present_uniforms.h" +#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) + : rasterizer(rasterizer_), device_memory(device_memory_) { + // Allocate textures for the screen + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + + const GLuint texture = framebuffer_texture.resource.handle; + glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); + + // Clear screen to black + const u8 framebuffer_data[4] = {0, 0, 0, 0}; + glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, + framebuffer_data); +} + +Layer::~Layer() = default; + +GLuint Layer::ConfigureDraw(std::array& out_matrix, + std::array& out_vertices, + ProgramManager& program_manager, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); + auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); + GLuint texture = info.display_texture; + + auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + if (anti_aliasing != Settings::AntiAliasing::None) { + glEnablei(GL_SCISSOR_TEST, 0); + auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); + auto viewport_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); + + glScissorIndexed(0, 0, 0, viewport_width, viewport_height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(viewport_width), + static_cast(viewport_height)); + + switch (anti_aliasing) { + case Settings::AntiAliasing::Fxaa: + CreateFXAA(); + texture = fxaa->Draw(program_manager, info.display_texture); + break; + case Settings::AntiAliasing::Smaa: + default: + CreateSMAA(); + texture = smaa->Draw(program_manager, info.display_texture); + break; + } + } + + glDisablei(GL_SCISSOR_TEST, 0); + + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr || fsr->NeedsRecreation(layout.screen)) { + fsr = std::make_unique(layout.screen.GetWidth(), layout.screen.GetHeight()); + } + + texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); + crop = {0, 0, 1, 1}; + } + + out_matrix = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); + + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = screen.left; + const auto y = screen.top; + const auto w = screen.GetWidth(); + const auto h = screen.GetHeight(); + + out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); + out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); + out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); + out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); + + return texture; +} + +FramebufferTextureInfo Layer::PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer) { + // If framebuffer is provided, reload it from memory to a texture + if (framebuffer_texture.width != static_cast(framebuffer.width) || + framebuffer_texture.height != static_cast(framebuffer.height) || + framebuffer_texture.pixel_format != framebuffer.pixel_format || + gl_framebuffer_data.empty()) { + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(framebuffer); + } + + // Load the framebuffer from memory if needed + return LoadFBToScreenInfo(framebuffer); +} + +FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { + const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; + const auto accelerated_info = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (accelerated_info) { + return *accelerated_info; + } + + // Reset the screen info's display texture to its own permanent texture + FramebufferTextureInfo info{}; + info.display_texture = framebuffer_texture.resource.handle; + info.width = framebuffer.width; + info.height = framebuffer.height; + info.scaled_width = framebuffer.width; + info.scaled_height = framebuffer.height; + + // TODO(Rodrigo): Read this from HLE + constexpr u32 block_height_log2 = 4; + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{device_memory.GetPointer(framebuffer_addr)}; + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); + + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. + // TODO: Applications could theoretically crash yuzu here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, + framebuffer.height, framebuffer_texture.gl_format, + framebuffer_texture.gl_type, gl_framebuffer_data.data()); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + return info; +} + +void Layer::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { + framebuffer_texture.width = framebuffer.width; + framebuffer_texture.height = framebuffer.height; + framebuffer_texture.pixel_format = framebuffer.pixel_format; + + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * + bytes_per_pixel); + + GLint internal_format; + switch (framebuffer.pixel_format) { + case Service::android::PixelFormat::Rgba8888: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + break; + case Service::android::PixelFormat::Rgb565: + internal_format = GL_RGB565; + framebuffer_texture.gl_format = GL_RGB; + framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; + break; + default: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast(framebuffer.pixel_format)); + break; + } + + framebuffer_texture.resource.Release(); + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, + framebuffer_texture.width, framebuffer_texture.height); + + fxaa.reset(); + smaa.reset(); +} + +void Layer::CreateFXAA() { + smaa.reset(); + if (!fxaa) { + fxaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +void Layer::CreateSMAA() { + fxaa.reset(); + if (!smaa) { + smaa = std::make_unique( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/layer.h b/src/video_core/renderer_opengl/present/layer.h new file mode 100644 index 0000000000..ef1055abf3 --- /dev/null +++ b/src/video_core/renderer_opengl/present/layer.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Service::android { +enum class PixelFormat : u32; +}; + +namespace Tegra { +struct FramebufferConfig; +} + +namespace OpenGL { + +struct FramebufferTextureInfo; +class FSR; +class FXAA; +class ProgramManager; +class RasterizerOpenGL; +class SMAA; + +/// Structure used for storing information about the textures for the Switch screen +struct TextureInfo { + OGLTexture resource; + GLsizei width; + GLsizei height; + GLenum gl_format; + GLenum gl_type; + Service::android::PixelFormat pixel_format; +}; + +struct ScreenRectVertex; + +class Layer { +public: + explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory); + ~Layer(); + + GLuint ConfigureDraw(std::array& out_matrix, + std::array& out_vertices, + ProgramManager& program_manager, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); + +private: + /// Loads framebuffer from emulated memory into the active OpenGL texture. + FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); + FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); + void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); + + void CreateFXAA(); + void CreateSMAA(); + +private: + RasterizerOpenGL& rasterizer; + Tegra::MaxwellDeviceMemoryManager& device_memory; + + /// OpenGL framebuffer data + std::vector gl_framebuffer_data; + + /// Display information for Switch screen + TextureInfo framebuffer_texture; + + std::unique_ptr fsr; + std::unique_ptr fxaa; + std::unique_ptr smaa; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/present_uniforms.h b/src/video_core/renderer_opengl/present/present_uniforms.h new file mode 100644 index 0000000000..3a19f05c72 --- /dev/null +++ b/src/video_core/renderer_opengl/present/present_uniforms.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +constexpr GLint PositionLocation = 0; +constexpr GLint TexCoordLocation = 1; +constexpr GLint ModelViewMatrixLocation = 0; + +struct ScreenRectVertex { + constexpr ScreenRectVertex() = default; + + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} + + std::array position{}; + std::array tex_coord{}; +}; + +/** + * Defines a 1:1 pixel orthographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + */ +static inline std::array MakeOrthographicMatrix(float width, float height) { + std::array matrix; // Laid out in column-major order + + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // Last matrix row is implicitly assumed to be [0, 0, 1]. + // clang-format on + + return matrix; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp index 168fa1aea3..4d681606b3 100644 --- a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp @@ -2,47 +2,17 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/settings.h" +#include "video_core/framebuffer_config.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/layer.h" +#include "video_core/renderer_opengl/present/present_uniforms.h" #include "video_core/renderer_opengl/present/window_adapt_pass.h" namespace OpenGL { -namespace { -constexpr GLint PositionLocation = 0; -constexpr GLint TexCoordLocation = 1; -constexpr GLint ModelViewMatrixLocation = 0; - -struct ScreenRectVertex { - constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) - : position{{static_cast(x), static_cast(y)}}, tex_coord{{u, v}} {} - - std::array position; - std::array tex_coord; -}; - -/** - * Defines a 1:1 pixel orthographic projection matrix with (0,0) on the top-left - * corner and (width, height) on the lower-bottom. - * - * The projection part of the matrix is trivial, hence these operations are represented - * by a 3x2 matrix. - */ -std::array MakeOrthographicMatrix(float width, float height) { - std::array matrix; // Laid out in column-major order - - // clang-format off - matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; - matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; - // Last matrix row is implicitly assumed to be [0, 0, 1]. - // clang-format on - - return matrix; -} -} // namespace - WindowAdaptPass::WindowAdaptPass(const Device& device_, OGLSampler&& sampler_, std::string_view frag_source) : device(device_), sampler(std::move(sampler_)) { @@ -65,32 +35,30 @@ WindowAdaptPass::WindowAdaptPass(const Device& device_, OGLSampler&& sampler_, WindowAdaptPass::~WindowAdaptPass() = default; -void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, GLuint texture, - const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop) { - glBindTextureUnit(0, texture); +void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list& layers, + std::span framebuffers, + const Layout::FramebufferLayout& layout) { + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); + const size_t layer_count = framebuffers.size(); + std::vector textures(layer_count); + std::vector> matrices(layer_count); + std::vector> vertices(layer_count); + + auto layer_it = layers.begin(); + for (size_t i = 0; i < layer_count; i++) { + textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager, + framebuffers[i], layout); + layer_it++; + } + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); program_manager.BindPresentPrograms(vert.handle, frag.handle); - glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE, - ortho_matrix.data()); - - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = screen.left; - const auto y = screen.top; - const auto w = screen.GetWidth(); - const auto h = screen.GetHeight(); - - const std::array vertices = { - ScreenRectVertex(x, y, crop.left, crop.top), - ScreenRectVertex(x + w, y, crop.right, crop.top), - ScreenRectVertex(x, y + h, crop.left, crop.bottom), - ScreenRectVertex(x + w, y + h, crop.right, crop.bottom), - }; - glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); glDisable(GL_FRAMEBUFFER_SRGB); glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), @@ -109,7 +77,7 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, GLuint if (device.HasVertexBufferUnifiedMemory()) { glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, - sizeof(vertices)); + sizeof(decltype(vertices)::value_type)); } else { glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); } @@ -122,7 +90,14 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, GLuint Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + for (size_t i = 0; i < layer_count; i++) { + glBindTextureUnit(0, textures[i]); + glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE, + matrices[i].data()); + glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices[i]), std::data(vertices[i])); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.h b/src/video_core/renderer_opengl/present/window_adapt_pass.h index 65dcd09ffa..00975a9c60 100644 --- a/src/video_core/renderer_opengl/present/window_adapt_pass.h +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.h @@ -3,6 +3,9 @@ #pragma once +#include +#include + #include "common/math_util.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -10,9 +13,14 @@ namespace Layout { struct FramebufferLayout; } +namespace Tegra { +struct FramebufferConfig; +} + namespace OpenGL { class Device; +class Layer; class ProgramManager; class WindowAdaptPass final { @@ -21,9 +29,9 @@ public: std::string_view frag_source); ~WindowAdaptPass(); - void DrawToFramebuffer(ProgramManager& program_manager, GLuint texture, - const Layout::FramebufferLayout& layout, - const Common::Rectangle& crop); + void DrawToFramebuffer(ProgramManager& program_manager, std::list& layers, + std::span framebuffers, + const Layout::FramebufferLayout& layout); private: const Device& device; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 0d138c1897..10a9f973cd 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,10 +130,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { return; } - RenderScreenshot(*framebuffer); + RenderScreenshot(framebuffer); state_tracker.BindFramebuffer(0); - blit_screen->DrawScreen(*framebuffer, emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(std::span(framebuffer, 1), emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -159,7 +159,7 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer) { +void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { if (!renderer_settings.screenshot_requested) { return; } @@ -181,7 +181,7 @@ void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig& framebuffe glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - blit_screen->DrawScreen(framebuffer, layout); + blit_screen->DrawScreen(std::span(framebuffer, 1), layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7ab1633722..df76d3d05c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -52,7 +52,7 @@ public: private: void AddTelemetryFields(); - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer); + void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; From a595e9e8a7a6a742481b1cd05455d3c639095413 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 22 Jan 2024 12:40:50 -0500 Subject: [PATCH 14/15] nvnflinger/gpu: implement layer stack composition --- src/core/CMakeLists.txt | 3 + .../service/nvdrv/devices/nvdisp_disp0.cpp | 35 +++- .../hle/service/nvdrv/devices/nvdisp_disp0.h | 10 +- src/core/hle/service/nvnflinger/buffer_item.h | 2 +- .../service/nvnflinger/hardware_composer.cpp | 190 ++++++++++++++++++ .../service/nvnflinger/hardware_composer.h | 59 ++++++ src/core/hle/service/nvnflinger/hwc_layer.h | 27 +++ .../hle/service/nvnflinger/nvnflinger.cpp | 32 +-- src/core/hle/service/nvnflinger/nvnflinger.h | 1 + .../hle/service/vi/display/vi_display.cpp | 4 +- src/core/hle/service/vi/display/vi_display.h | 15 +- src/core/hle/service/vi/vi.cpp | 5 +- src/video_core/framebuffer_config.h | 3 +- src/video_core/gpu.cpp | 29 +-- src/video_core/gpu.h | 4 +- src/video_core/gpu_thread.cpp | 6 - src/video_core/gpu_thread.h | 15 +- src/video_core/renderer_base.h | 2 +- .../renderer_null/renderer_null.cpp | 4 +- src/video_core/renderer_null/renderer_null.h | 2 +- .../renderer_opengl/renderer_opengl.cpp | 12 +- .../renderer_opengl/renderer_opengl.h | 4 +- .../renderer_vulkan/present/util.cpp | 78 +++++++ src/video_core/renderer_vulkan/present/util.h | 4 + .../renderer_vulkan/renderer_vulkan.cpp | 169 +++------------- .../renderer_vulkan/renderer_vulkan.h | 4 +- .../renderer_vulkan/vk_blit_screen.cpp | 2 +- .../renderer_vulkan/vk_blit_screen.h | 2 +- 28 files changed, 470 insertions(+), 253 deletions(-) create mode 100644 src/core/hle/service/nvnflinger/hardware_composer.cpp create mode 100644 src/core/hle/service/nvnflinger/hardware_composer.h create mode 100644 src/core/hle/service/nvnflinger/hwc_layer.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 570acb1939..eb8f643a2d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -775,6 +775,9 @@ add_library(core STATIC hle/service/nvnflinger/graphic_buffer_producer.h hle/service/nvnflinger/hos_binder_driver_server.cpp hle/service/nvnflinger/hos_binder_driver_server.h + hle/service/nvnflinger/hardware_composer.cpp + hle/service/nvnflinger/hardware_composer.h + hle/service/nvnflinger/hwc_layer.h hle/service/nvnflinger/nvnflinger.cpp hle/service/nvnflinger/nvnflinger.h hle/service/nvnflinger/parcel.h diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index c1ebbd62d4..abe95303e2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -38,19 +40,30 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span in void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvdisp_disp0::OnClose(DeviceFD fd) {} -void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, - u32 height, u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle& crop_rect, - std::array& fences, u32 num_fences) { - const DAddr addr = nvmap.GetHandleAddress(buffer_handle); - LOG_TRACE(Service, - "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", - addr, offset, width, height, stride, format); +void nvdisp_disp0::Composite(std::span sorted_layers) { + std::vector output_layers; + std::vector output_fences; + output_layers.reserve(sorted_layers.size()); + output_fences.reserve(sorted_layers.size()); - const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, - stride, format, transform, crop_rect}; + for (auto& layer : sorted_layers) { + output_layers.emplace_back(Tegra::FramebufferConfig{ + .address = nvmap.GetHandleAddress(layer.buffer_handle), + .offset = layer.offset, + .width = layer.width, + .height = layer.height, + .stride = layer.stride, + .pixel_format = layer.format, + .transform_flags = layer.transform, + .crop_rect = layer.crop_rect, + }); - system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); + for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) { + output_fences.push_back(layer.acquire_fence.fences[i]); + } + } + + system.GPU().RequestComposite(std::move(output_layers), std::move(output_fences)); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().BeginSystemFrame(); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 5f13a50a2b..1082b85c2b 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -8,8 +8,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -#include "core/hle/service/nvnflinger/buffer_transform_flags.h" -#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" namespace Service::Nvidia::NvCore { class Container; @@ -35,11 +34,8 @@ public: void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; - /// Performs a screen flip, drawing the buffer pointed to by the handle. - void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, - u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle& crop_rect, - std::array& fences, u32 num_fences); + /// Performs a screen flip, compositing each buffer. + void Composite(std::span sorted_layers); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvnflinger/buffer_item.h b/src/core/hle/service/nvnflinger/buffer_item.h index 7fd808f546..f9f262628d 100644 --- a/src/core/hle/service/nvnflinger/buffer_item.h +++ b/src/core/hle/service/nvnflinger/buffer_item.h @@ -40,7 +40,7 @@ public: bool is_droppable{}; bool acquire_called{}; bool transform_to_display_inverse{}; - s32 swap_interval{}; + u32 swap_interval{}; }; } // namespace Service::android diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp new file mode 100644 index 0000000000..54889bb4f4 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "common/microprofile.h" +#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" +#include "core/hle/service/nvnflinger/buffer_item.h" +#include "core/hle/service/nvnflinger/buffer_item_consumer.h" +#include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" +#include "core/hle/service/nvnflinger/ui/graphic_buffer.h" +#include "core/hle/service/vi/display/vi_display.h" +#include "core/hle/service/vi/layer/vi_layer.h" + +namespace Service::Nvnflinger { + +HardwareComposer::HardwareComposer() = default; +HardwareComposer::~HardwareComposer() = default; + +u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, + u32 frame_advance) { + boost::container::small_vector composition_stack; + + m_frame_number += frame_advance; + + // Release any necessary framebuffers. + for (auto& [layer_id, framebuffer] : m_framebuffers) { + if (framebuffer.release_frame_number > m_frame_number) { + // Not yet ready to release this framebuffer. + continue; + } + + if (!framebuffer.is_acquired) { + // Already released. + continue; + } + + if (auto* layer = display.FindLayer(layer_id); layer != nullptr) { + // TODO: support release fence + // This is needed to prevent screen tearing + layer->GetConsumer().ReleaseBuffer(framebuffer.item, android::Fence::NoFence()); + framebuffer.is_acquired = false; + } + } + + // Determine the number of vsync periods to wait before composing again. + std::optional swap_interval{}; + bool has_acquired_buffer{}; + + // Acquire all necessary framebuffers. + for (size_t i = 0; i < display.GetNumLayers(); i++) { + auto& layer = display.GetLayer(i); + auto layer_id = layer.GetLayerId(); + + // Try to fetch the framebuffer (either new or stale). + const auto result = this->CacheFramebufferLocked(layer, layer_id); + + // If we failed, skip this layer. + if (result == CacheStatus::NoBufferAvailable) { + continue; + } + + // If we acquired a new buffer, we need to present. + if (result == CacheStatus::BufferAcquired) { + has_acquired_buffer = true; + } + + const auto& buffer = m_framebuffers[layer_id]; + const auto& item = buffer.item; + const auto& igbp_buffer = *item.graphic_buffer; + + // TODO: get proper Z-index from layer + composition_stack.emplace_back(HwcLayer{ + .buffer_handle = igbp_buffer.BufferId(), + .offset = igbp_buffer.Offset(), + .format = igbp_buffer.ExternalFormat(), + .width = igbp_buffer.Width(), + .height = igbp_buffer.Height(), + .stride = igbp_buffer.Stride(), + .z_index = 0, + .transform = static_cast(item.transform), + .crop_rect = item.crop, + .acquire_fence = item.fence, + }); + + // We need to compose again either before this frame is supposed to + // be released, or exactly on the vsync period it should be released. + // + // TODO: handle cases where swap intervals are relatively prime. So far, + // only swap intervals of 0, 1 and 2 have been observed, but if 3 were + // to be introduced, this would cause an issue. + if (swap_interval) { + swap_interval = std::min(*swap_interval, item.swap_interval); + } else { + swap_interval = item.swap_interval; + } + } + + // If any new buffers were acquired, we can present. + if (has_acquired_buffer) { + // Sort by Z-index. + std::stable_sort(composition_stack.begin(), composition_stack.end(), + [&](auto& l, auto& r) { return l.z_index < r.z_index; }); + + // Composite. + nvdisp.Composite(composition_stack); + } + + // Render MicroProfile. + MicroProfileFlip(); + + // If we advanced, then advance by at least 1 frame. + if (swap_interval) { + return std::max(*swap_interval, 1U); + } + + // Otherwise, advance by exactly one frame. + return 1U; +} + +void HardwareComposer::RemoveLayerLocked(VI::Display& display, LayerId layer_id) { + // Check if we are tracking a slot with this layer_id. + const auto it = m_framebuffers.find(layer_id); + if (it == m_framebuffers.end()) { + return; + } + + // Try to release the buffer item. + auto* const layer = display.FindLayer(layer_id); + if (layer && it->second.is_acquired) { + layer->GetConsumer().ReleaseBuffer(it->second.item, android::Fence::NoFence()); + } + + // Erase the slot. + m_framebuffers.erase(it); +} + +bool HardwareComposer::TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer) { + // Attempt the update. + const auto status = layer.GetConsumer().AcquireBuffer(&framebuffer.item, {}, false); + if (status != android::Status::NoError) { + return false; + } + + // We succeeded, so set the new release frame info. + framebuffer.release_frame_number = + m_frame_number + std::max(1U, framebuffer.item.swap_interval); + framebuffer.is_acquired = true; + + return true; +} + +HardwareComposer::CacheStatus HardwareComposer::CacheFramebufferLocked(VI::Layer& layer, + LayerId layer_id) { + // Check if this framebuffer is already present. + const auto it = m_framebuffers.find(layer_id); + if (it != m_framebuffers.end()) { + // If it's currently still acquired, we are done. + if (it->second.is_acquired) { + return CacheStatus::CachedBufferReused; + } + + // Try to acquire a new item. + if (this->TryAcquireFramebufferLocked(layer, it->second)) { + // We got a new item. + return CacheStatus::BufferAcquired; + } else { + // We didn't acquire a new item, but we can reuse the slot. + return CacheStatus::CachedBufferReused; + } + } + + // Framebuffer is not present, so try to create it. + Framebuffer framebuffer{}; + + if (this->TryAcquireFramebufferLocked(layer, framebuffer)) { + // Move the buffer item into a new slot. + m_framebuffers.emplace(layer_id, std::move(framebuffer)); + + // We succeeded. + return CacheStatus::BufferAcquired; + } + + // We couldn't acquire the buffer item, so don't create a slot. + return CacheStatus::NoBufferAvailable; +} + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hardware_composer.h b/src/core/hle/service/nvnflinger/hardware_composer.h new file mode 100644 index 0000000000..611afc169f --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.h @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include + +#include "core/hle/service/nvnflinger/buffer_item.h" + +namespace Service::Nvidia::Devices { +class nvdisp_disp0; +} + +namespace Service::VI { +class Display; +class Layer; +} // namespace Service::VI + +namespace Service::Nvnflinger { + +using LayerId = u64; + +class HardwareComposer { +public: + explicit HardwareComposer(); + ~HardwareComposer(); + + u32 ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, + u32 frame_advance); + void RemoveLayerLocked(VI::Display& display, LayerId layer_id); + +private: + // TODO: do we want to track frame number in vi instead? + u64 m_frame_number{0}; + +private: + using ReleaseFrameNumber = u64; + + struct Framebuffer { + android::BufferItem item{}; + ReleaseFrameNumber release_frame_number{}; + bool is_acquired{false}; + }; + + enum class CacheStatus : u32 { + NoBufferAvailable, + BufferAcquired, + CachedBufferReused, + }; + + boost::container::flat_map m_framebuffers{}; + +private: + bool TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer); + CacheStatus CacheFramebufferLocked(VI::Layer& layer, LayerId layer_id); +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hwc_layer.h b/src/core/hle/service/nvnflinger/hwc_layer.h new file mode 100644 index 0000000000..3af668a256 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hwc_layer.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvnflinger/buffer_transform_flags.h" +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" + +namespace Service::Nvnflinger { + +struct HwcLayer { + u32 buffer_handle; + u32 offset; + android::PixelFormat format; + u32 width; + u32 height; + u32 stride; + s32 z_index; + android::BufferTransformFlags transform; + Common::Rectangle crop_rect; + android::Fence acquire_fence; +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 51133853c5..e775a2ca8e 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -18,6 +18,7 @@ #include "core/hle/service/nvnflinger/buffer_item_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/ui/graphic_buffer.h" @@ -279,45 +280,18 @@ void Nvnflinger::Compose() { SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. - if (!display.HasLayers()) - continue; - - // TODO(Subv): Support more than 1 layer. - VI::Layer& layer = display.GetLayer(0); - - android::BufferItem buffer{}; - const auto status = layer.GetConsumer().AcquireBuffer(&buffer, {}, false); - - if (status != android::Status::NoError) { + if (!display.HasLayers()) { continue; } - const auto& igbp_buffer = *buffer.graphic_buffer; - if (!system.IsPoweredOn()) { return; // We are likely shutting down } - // Now send the buffer to the GPU for drawing. - // TODO(Subv): Support more than just disp0. The display device selection is probably based - // on which display we're drawing (Default, Internal, External, etc) auto nvdisp = nvdrv->GetDevice(disp_fd); ASSERT(nvdisp); - Common::Rectangle crop_rect{ - static_cast(buffer.crop.Left()), static_cast(buffer.crop.Top()), - static_cast(buffer.crop.Right()), static_cast(buffer.crop.Bottom())}; - - nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), - igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), - static_cast(buffer.transform), crop_rect, - buffer.fence.fences, buffer.fence.num_fences); - - MicroProfileFlip(); - - swap_interval = buffer.swap_interval; - - layer.GetConsumer().ReleaseBuffer(buffer, android::Fence::NoFence()); + swap_interval = display.GetComposer().ComposeLocked(display, *nvdisp, swap_interval); } } diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index 3694391423..73ff366205 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -46,6 +46,7 @@ class BufferQueueProducer; namespace Service::Nvnflinger { class FbShareBufferManager; +class HardwareComposer; class HosBinderDriverServer; class Nvnflinger final { diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index dab1905cc4..7f2af9acc5 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -16,6 +16,7 @@ #include "core/hle/service/nvnflinger/buffer_queue_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/layer/vi_layer.h" @@ -43,6 +44,7 @@ Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, Core::System& system_) : display_id{id}, name{std::move(name_)}, hos_binder_driver_server{hos_binder_driver_server_}, service_context{service_context_} { + hardware_composer = std::make_unique(); vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); } @@ -81,8 +83,6 @@ void Display::SignalVSyncEvent() { void Display::CreateLayer(u64 layer_id, u32 binder_id, Service::Nvidia::NvCore::Container& nv_core) { - ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); - auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile()); auto buffer_item_consumer = std::make_shared(std::move(consumer)); diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 8eb8a51559..220292cff9 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -11,9 +11,14 @@ #include "common/common_types.h" #include "core/hle/result.h" +namespace Core { +class System; +} + namespace Kernel { class KEvent; -} +class KReadableEvent; +} // namespace Kernel namespace Service::android { class BufferQueueProducer; @@ -24,8 +29,9 @@ class ServiceContext; } namespace Service::Nvnflinger { +class HardwareComposer; class HosBinderDriverServer; -} +} // namespace Service::Nvnflinger namespace Service::Nvidia::NvCore { class Container; @@ -118,6 +124,10 @@ public: /// const Layer* FindLayer(u64 layer_id) const; + Nvnflinger::HardwareComposer& GetComposer() const { + return *hardware_composer; + } + private: u64 display_id; std::string name; @@ -125,6 +135,7 @@ private: KernelHelpers::ServiceContext& service_context; std::vector> layers; + std::unique_ptr hardware_composer; Kernel::KEvent* vsync_event{}; bool is_abandoned{}; }; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 73058db9a5..d508ed28cb 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -195,8 +195,9 @@ private: void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 buffer_id = rp.PopRaw(); + const u64 aruid = ctx.GetPID(); - LOG_INFO(Service_VI, "called. buffer_id={:#x}", buffer_id); + LOG_INFO(Service_VI, "called. buffer_id={:#x}, aruid={:#x}", buffer_id, aruid); struct OutputParameters { s32 nvmap_handle; @@ -206,7 +207,7 @@ private: OutputParameters out{}; Nvnflinger::SharedMemoryPoolLayout layout{}; const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId( - &out.size, &out.nvmap_handle, &layout, buffer_id, 0); + &out.size, &out.nvmap_handle, &layout, buffer_id, aruid); ctx.WriteBuffer(&layout, sizeof(layout)); diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 10ddc75a71..6a18b76fb5 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h @@ -7,6 +7,7 @@ #include "common/math_util.h" #include "core/hle/service/nvnflinger/buffer_transform_flags.h" #include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" namespace Tegra { @@ -21,7 +22,7 @@ struct FramebufferConfig { u32 stride{}; Service::android::PixelFormat pixel_format{}; Service::android::BufferTransformFlags transform_flags{}; - Common::Rectangle crop_rect; + Common::Rectangle crop_rect{}; }; Common::Rectangle NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 609704b333..f4a5d831cd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -274,11 +274,6 @@ struct GPU::Impl { } } - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - gpu_thread.SwapBuffers(framebuffer); - } - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); @@ -313,8 +308,9 @@ struct GPU::Impl { gpu_thread.FlushAndInvalidateRegion(addr, size); } - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array& fences, size_t num_fences) { + void RequestComposite(std::vector&& layers, + std::vector&& fences) { + size_t num_fences{fences.size()}; size_t current_request_counter{}; { std::unique_lock lk(request_swap_mutex); @@ -328,13 +324,12 @@ struct GPU::Impl { } } const auto wait_fence = - RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] { auto& syncpoint_manager = host1x.GetSyncpointManager(); if (num_fences == 0) { - renderer->SwapBuffers(framebuffer); + renderer->Composite(layers); } - const auto executer = [this, current_request_counter, - framebuffer_copy = *framebuffer]() { + const auto executer = [this, current_request_counter, layers_copy = layers]() { { std::unique_lock lk(request_swap_mutex); if (--request_swap_counters[current_request_counter] != 0) { @@ -342,7 +337,7 @@ struct GPU::Impl { } free_swap_counters.push_back(current_request_counter); } - renderer->SwapBuffers(&framebuffer_copy); + renderer->Composite(layers_copy); }; for (size_t i = 0; i < num_fences; i++) { syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); @@ -505,9 +500,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } -void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array& fences, size_t num_fences) { - impl->RequestSwapBuffers(framebuffer, fences, num_fences); +void GPU::RequestComposite(std::vector&& layers, + std::vector&& fences) { + impl->RequestComposite(std::move(layers), std::move(fences)); } u64 GPU::GetTicks() const { @@ -554,10 +549,6 @@ void GPU::ClearCdmaInstance(u32 id) { impl->ClearCdmaInstance(id); } -void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - impl->SwapBuffers(framebuffer); -} - VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { return impl->OnCPURead(addr, size); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b3c1d15bde..c4602ca372 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -212,8 +212,8 @@ public: void RendererFrameEndNotify(); - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array& fences, size_t num_fences); + void RequestComposite(std::vector&& layers, + std::vector&& fences); /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 788d4f61ef..58d8110b86 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system, } if (auto* submit_list = std::get_if(&next.data)) { scheduler.Push(submit_list->channel, std::move(submit_list->entries)); - } else if (const auto* data = std::get_if(&next.data)) { - renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if(&next.data)) { @@ -78,10 +76,6 @@ void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(channel, std::move(entries))); } -void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); -} - void ThreadManager::FlushRegion(DAddr addr, u64 size) { if (!is_async) { // Always flush with synchronous GPU mode diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2de25e9ef1..dc0fce9f82 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -44,14 +44,6 @@ struct SubmitListCommand final { Tegra::CommandList entries; }; -/// Command to signal to the GPU thread that a swap buffers is pending -struct SwapBuffersCommand final { - explicit SwapBuffersCommand(std::optional framebuffer_) - : framebuffer{std::move(framebuffer_)} {} - - std::optional framebuffer; -}; - /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} @@ -81,8 +73,8 @@ struct FlushAndInvalidateRegionCommand final { struct GPUTickCommand final {}; using CommandData = - std::variant; + std::variant; struct CommandDataContainer { CommandDataContainer() = default; @@ -118,9 +110,6 @@ public: /// Push GPU command entries to be processed void SubmitList(s32 channel, Tegra::CommandList&& entries); - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 78ea5208b9..3ad180f678 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -38,7 +38,7 @@ public: virtual ~RendererBase(); /// Finalize rendering the guest frame and draw into the presentation texture - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + virtual void Composite(std::span layers) = 0; [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index 078feb9256..c89daff535 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -13,8 +13,8 @@ RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gp RendererNull::~RendererNull() = default; -void RendererNull::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererNull::Composite(std::span framebuffers) { + if (framebuffers.empty()) { return; } diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 9531b43f66..063b476bb9 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h @@ -17,7 +17,7 @@ public: std::unique_ptr context); ~RendererNull() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span framebuffer) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &m_rasterizer; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 10a9f973cd..e33a325927 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -125,15 +125,15 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, RendererOpenGL::~RendererOpenGL() = default; -void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererOpenGL::Composite(std::span framebuffers) { + if (framebuffers.empty()) { return; } - RenderScreenshot(framebuffer); + RenderScreenshot(framebuffers); state_tracker.BindFramebuffer(0); - blit_screen->DrawScreen(std::span(framebuffer, 1), emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -159,7 +159,7 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { +void RendererOpenGL::RenderScreenshot(std::span framebuffers) { if (!renderer_settings.screenshot_requested) { return; } @@ -181,7 +181,7 @@ void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig* framebuffe glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - blit_screen->DrawScreen(std::span(framebuffer, 1), layout); + blit_screen->DrawScreen(framebuffers, layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index df76d3d05c..c4625c96e2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -40,7 +40,7 @@ public: std::unique_ptr context_); ~RendererOpenGL() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -52,7 +52,7 @@ public: private: void AddTelemetryFields(); - void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); + void RenderScreenshot(std::span framebuffers); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 7bff1c436a..6ee16595df 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -7,6 +7,20 @@ namespace Vulkan { +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage) { + const VkBufferCreateInfo dst_buffer_info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + return allocator.CreateBuffer(dst_buffer_info, usage); +} + vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -96,6 +110,70 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc scheduler.Finish(); } +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent) { + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkImageMemoryBarrier image_write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + static constexpr VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const VkBufferImageCopy copy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset{.x = 0, .y = 0, .z = 0}, + .imageExtent{extent}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + memory_write_barrier, nullptr, image_write_barrier); +} + vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index fb4e4a8e46..1104aaa157 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -11,12 +11,16 @@ namespace Vulkan { #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage); + vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format); void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL); void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, std::span initial_contents = {}); +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent); void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image); vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 77837added..48a1053277 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -20,12 +20,14 @@ #include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" +#include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/textures/decoders.h" #include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_instance.h" @@ -116,18 +118,20 @@ RendererVulkan::~RendererVulkan() { void(device.GetLogical().WaitIdle()); } -void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererVulkan::Composite(std::span framebuffers) { + if (framebuffers.empty()) { return; } + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { return; } - RenderScreenshot(framebuffer); + RenderScreenshot(framebuffers); Frame* frame = present_manager.GetRenderFrame(); - blit_swapchain.DrawToFrame(rasterizer, frame, std::span(framebuffer, 1), + blit_swapchain.DrawToFrame(rasterizer, frame, framebuffers, render_window.GetFramebufferLayout(), swapchain.GetImageCount(), swapchain.GetImageViewFormat()); scheduler.Flush(*frame->render_ready); @@ -163,156 +167,37 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { +void Vulkan::RendererVulkan::RenderScreenshot( + std::span framebuffers) { if (!renderer_settings.screenshot_requested) { return; } - const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - auto frame = [&]() { - vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .extent = - { - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *staging_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .components{ - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - vk::Framebuffer screenshot_fb = - blit_screenshot.CreateFramebuffer(layout, *dst_view, VK_FORMAT_B8G8R8A8_UNORM); - return Frame{ - .width = layout.width, - .height = layout.height, - .image = std::move(staging_image), - .image_view = std::move(dst_view), - .framebuffer = std::move(screenshot_fb), - .cmdbuf{}, - .render_ready{}, - .present_done{}, - }; + constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM}; + const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; + + auto frame = [&]() { + Frame f{}; + f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, + ScreenshotFormat); + f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat); + f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat); + return f; }(); - blit_screenshot.DrawToFrame(rasterizer, &frame, std::span(framebuffer, 1), layout, 1, + blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, VK_FORMAT_B8G8R8A8_UNORM); - const auto buffer_size = static_cast(layout.width * layout.height * 4); - const VkBufferCreateInfo dst_buffer_info{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = buffer_size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - const vk::Buffer dst_buffer = - memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); + const auto dst_buffer = CreateWrappedBuffer( + memory_allocator, static_cast(layout.width * layout.height * 4), + MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier read_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *frame.image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - const VkImageMemoryBarrier image_write_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *frame.image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - static constexpr VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - const VkBufferImageCopy copy{ - .bufferOffset = 0, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset{.x = 0, .y = 0, .z = 0}, - .imageExtent{ - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyImageToBuffer(*frame.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, - copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); + DownloadColorImage(cmdbuf, *frame.image, *dst_buffer, + VkExtent3D{layout.width, layout.height, 1}); }); + // Ensure the copy is fully completed before saving the screenshot scheduler.Finish(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index bdeb43a54b..c6d8a0f216 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -46,7 +46,7 @@ public: std::unique_ptr context_); ~RendererVulkan() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -59,7 +59,7 @@ public: private: void Report() const; - void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); + void RenderScreenshot(std::span framebuffers); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b2dcbf80b0..2275fcc46a 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -115,7 +115,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, } vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& layout, - const VkImageView& image_view, + VkImageView image_view, VkFormat current_view_format) { const bool format_updated = std::exchange(swapchain_view_format, current_view_format) != current_view_format; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 9a3476c779..cbdf2d5d0b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -56,7 +56,7 @@ public: VkFormat current_swapchain_view_format); [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout, - const VkImageView& image_view, + VkImageView image_view, VkFormat current_view_format); private: From 2c421a7046c5ff1fdb8319f097a89a331907baf6 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 23 Jan 2024 10:19:55 -0500 Subject: [PATCH 15/15] hardware_composer: implement speed limit extensions --- src/core/hle/service/nvnflinger/buffer_item.h | 2 +- .../service/nvnflinger/hardware_composer.cpp | 53 ++++++++++++++----- .../service/nvnflinger/hardware_composer.h | 4 +- .../hle/service/nvnflinger/nvnflinger.cpp | 12 +++-- src/core/hle/service/nvnflinger/nvnflinger.h | 1 + 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/core/hle/service/nvnflinger/buffer_item.h b/src/core/hle/service/nvnflinger/buffer_item.h index f9f262628d..7fd808f546 100644 --- a/src/core/hle/service/nvnflinger/buffer_item.h +++ b/src/core/hle/service/nvnflinger/buffer_item.h @@ -40,7 +40,7 @@ public: bool is_droppable{}; bool acquire_called{}; bool transform_to_display_inverse{}; - u32 swap_interval{}; + s32 swap_interval{}; }; } // namespace Service::android diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp index 54889bb4f4..c720dd1f8e 100644 --- a/src/core/hle/service/nvnflinger/hardware_composer.cpp +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -16,11 +16,37 @@ namespace Service::Nvnflinger { +namespace { + +s32 NormalizeSwapInterval(f32* out_speed_scale, s32 swap_interval) { + if (swap_interval <= 0) { + // As an extension, treat nonpositive swap interval as speed multiplier. + if (out_speed_scale) { + *out_speed_scale = 2.f * static_cast(1 - swap_interval); + } + + swap_interval = 1; + } + + if (swap_interval >= 5) { + // As an extension, treat high swap interval as precise speed control. + if (out_speed_scale) { + *out_speed_scale = static_cast(swap_interval) / 100.f; + } + + swap_interval = 1; + } + + return swap_interval; +} + +} // namespace + HardwareComposer::HardwareComposer() = default; HardwareComposer::~HardwareComposer() = default; -u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, - u32 frame_advance) { +u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, VI::Display& display, + Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance) { boost::container::small_vector composition_stack; m_frame_number += frame_advance; @@ -45,8 +71,11 @@ u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdis } } + // Set default speed limit to 100%. + *out_speed_scale = 1.0f; + // Determine the number of vsync periods to wait before composing again. - std::optional swap_interval{}; + std::optional swap_interval{}; bool has_acquired_buffer{}; // Acquire all necessary framebuffers. @@ -87,14 +116,15 @@ u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdis // We need to compose again either before this frame is supposed to // be released, or exactly on the vsync period it should be released. - // + const s32 item_swap_interval = NormalizeSwapInterval(out_speed_scale, item.swap_interval); + // TODO: handle cases where swap intervals are relatively prime. So far, // only swap intervals of 0, 1 and 2 have been observed, but if 3 were // to be introduced, this would cause an issue. if (swap_interval) { - swap_interval = std::min(*swap_interval, item.swap_interval); + swap_interval = std::min(*swap_interval, item_swap_interval); } else { - swap_interval = item.swap_interval; + swap_interval = item_swap_interval; } } @@ -111,13 +141,8 @@ u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdis // Render MicroProfile. MicroProfileFlip(); - // If we advanced, then advance by at least 1 frame. - if (swap_interval) { - return std::max(*swap_interval, 1U); - } - - // Otherwise, advance by exactly one frame. - return 1U; + // Advance by at least one frame. + return swap_interval.value_or(1); } void HardwareComposer::RemoveLayerLocked(VI::Display& display, LayerId layer_id) { @@ -146,7 +171,7 @@ bool HardwareComposer::TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer // We succeeded, so set the new release frame info. framebuffer.release_frame_number = - m_frame_number + std::max(1U, framebuffer.item.swap_interval); + NormalizeSwapInterval(nullptr, framebuffer.item.swap_interval); framebuffer.is_acquired = true; return true; diff --git a/src/core/hle/service/nvnflinger/hardware_composer.h b/src/core/hle/service/nvnflinger/hardware_composer.h index 611afc169f..ddab94ac9d 100644 --- a/src/core/hle/service/nvnflinger/hardware_composer.h +++ b/src/core/hle/service/nvnflinger/hardware_composer.h @@ -26,8 +26,8 @@ public: explicit HardwareComposer(); ~HardwareComposer(); - u32 ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, - u32 frame_advance); + u32 ComposeLocked(f32* out_speed_scale, VI::Display& display, + Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance); void RemoveLayerLocked(VI::Display& display, LayerId layer_id); private: diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index e775a2ca8e..a4e8488823 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -291,7 +291,8 @@ void Nvnflinger::Compose() { auto nvdisp = nvdrv->GetDevice(disp_fd); ASSERT(nvdisp); - swap_interval = display.GetComposer().ComposeLocked(display, *nvdisp, swap_interval); + swap_interval = display.GetComposer().ComposeLocked(&compose_speed_scale, display, *nvdisp, + swap_interval); } } @@ -308,15 +309,16 @@ s64 Nvnflinger::GetNextTicks() const { speed_scale = 0.01f; } } + + // Adjust by speed limit determined during composition. + speed_scale /= compose_speed_scale; + if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) { // Run at intended presentation rate during video playback. speed_scale = 1.f; } - // As an extension, treat nonpositive swap interval as framerate multiplier. - const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast(1 - swap_interval) - : 60.f / static_cast(swap_interval); - + const f32 effective_fps = 60.f / static_cast(swap_interval); return static_cast(speed_scale * (1000000000.f / effective_fps)); } diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index 73ff366205..c984d55a00 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -144,6 +144,7 @@ private: u32 next_buffer_queue_id = 1; s32 swap_interval = 1; + f32 compose_speed_scale = 1.0f; bool is_abandoned = false;