From bec28d692d21a42f17ae26f0ab6271aca1c233cd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 15 Apr 2019 21:06:04 -0400 Subject: [PATCH] Implement Block Linear copies in Kepler Memory. --- src/video_core/engines/kepler_memory.cpp | 19 ++++++++++++++----- src/video_core/textures/decoders.cpp | 21 +++++++++++++++++++++ src/video_core/textures/decoders.h | 3 +++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 3ed28f4a78..4df19c1f5c 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -10,7 +10,6 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" -#include "video_core/textures/convert.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines { @@ -47,13 +46,12 @@ void KeplerMemory::ProcessExec() { void KeplerMemory::ProcessData(u32 data, bool is_last_call) { const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); - std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); + std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); state.write_offset += sub_copy_size; if (is_last_call) { - UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); + const GPUVAddr address{regs.dest.Address()}; + const auto host_ptr = memory_manager.GetPointer(address); if (regs.exec.linear != 0) { - const GPUVAddr address{regs.dest.Address()}; - const auto host_ptr = memory_manager.GetPointer(address); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination // address might contain a dirty surface that will have to be written back to memory. @@ -61,6 +59,17 @@ void KeplerMemory::ProcessData(u32 data, bool is_last_call) { rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, + regs.dest.y, regs.dest.BlockHeight(), state.copy_size, + state.inner_buffer.data(), host_ptr); } } } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068a..6e02a64078 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -288,6 +288,27 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } } +void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, + std::size_t copy_size, u8* source_data, u8* swizzle_data) { + const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; + std::size_t count = 0; + for (u32 y = dst_y; y < height && count < copy_size; ++y) { + const u32 gob_address_y = + (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const auto& table = legacy_swizzle_table[y % gob_size_y]; + for (u32 x = dst_x; x < width && count < copy_size; ++x) { + const u32 gob_address = gob_address_y + (x / gob_size_x) * gob_size * block_height; + const u32 swizzled_offset = gob_address + table[x % gob_size_x]; + const u8* source_line = source_data + count; + u8* dest_addr = swizzle_data + swizzled_offset; + count++; + + std::memcpy(dest_addr, source_line, 1); + } + } +} + std::vector DecodeTexture(const std::vector& texture_data, TextureFormat format, u32 width, u32 height) { std::vector rgba_data; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa2744..21d4b37fc5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -51,4 +51,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y); +void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, + std::size_t copy_size, u8* source_data, u8* swizzle_data); + } // namespace Tegra::Texture