From 367704aa829a515188d87860ffd230b1b2a136c8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 15 Apr 2019 23:01:35 -0400 Subject: [PATCH 1/7] GPU MemoryManager: Implement ReadBlockUnsafe and WriteBlockUnsafe --- src/video_core/memory_manager.cpp | 32 +++++++++++++++++++++++++++++++ src/video_core/memory_manager.h | 2 ++ 2 files changed, 34 insertions(+) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0f4e820aa9..3b9f6caf05 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -226,6 +226,22 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t } } +void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { + std::size_t remaining_size{size}; + std::size_t page_index{src_addr >> page_bits}; + std::size_t page_offset{src_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast(page_size) - page_offset, remaining_size)}; + std::memcpy(dest_buffer, src_ptr, copy_amount); + page_index++; + page_offset = 0; + dest_buffer = static_cast(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{dest_addr >> page_bits}; @@ -253,6 +269,22 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std:: } } +void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { + std::size_t remaining_size{size}; + std::size_t page_index{dest_addr >> page_bits}; + std::size_t page_offset{dest_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast(page_size) - page_offset, remaining_size)}; + std::memcpy(dest_ptr, src_buffer, copy_amount); + page_index++; + page_offset = 0; + src_buffer = static_cast(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 647cbf93ad..29f3860c1b 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -67,6 +67,8 @@ public: void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); private: From ef381e6924ee28162d3ce1cff0523ea7a88981d2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 15 Apr 2019 23:05:05 -0400 Subject: [PATCH 2/7] Use ReadBlockUnsafe on TIC and TSC reading Use ReadBlockUnsafe on TIC and TSC reading as memory is never flushed from host GPU there. --- src/video_core/engines/maxwell_3d.cpp | 4 ++-- src/video_core/memory_manager.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed48..69401fcda5 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; - memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; - memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); + memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3b9f6caf05..20d744c61b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -234,6 +234,7 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::s while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; + const u8* src_ptr{page_table.pointers[page_index] + page_offset}; std::memcpy(dest_buffer, src_ptr, copy_amount); page_index++; page_offset = 0; @@ -277,6 +278,7 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; + u8* dest_ptr{page_table.pointers[page_index] + page_offset}; std::memcpy(dest_ptr, src_buffer, copy_amount); page_index++; page_offset = 0; From 6fc562a9aa1c89c793e71390790796e46b7c0cb0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 15 Apr 2019 23:34:03 -0400 Subject: [PATCH 3/7] Use ReadBlockUnsafe for Shader Cache --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 99f67494c9..90598e2917 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -38,13 +38,14 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(const u8* host_ptr) { +ProgramCode GetShaderCode(const GPUVAddr gpu_addr, const u8* host_ptr) { + auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { std::fill(program_code.begin(), program_code.end(), 0); return program_code; }); - std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); + memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); return program_code; } @@ -497,11 +498,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (!shader) { // No shader found - create a new one - ProgramCode program_code{GetShaderCode(host_ptr)}; + ProgramCode program_code{GetShaderCode(program_addr, host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { - program_code_b = GetShaderCode( - memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); + const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; + program_code_b = + GetShaderCode(program_addr_b, memory_manager.GetPointer(program_addr_b)); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; From 06d1c5a9912dac4f20e6f0d31839ef44d8a260f2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 16 Apr 2019 10:11:35 -0400 Subject: [PATCH 4/7] Document unsafe versions and add BlockCopyUnsafe --- src/video_core/memory_manager.cpp | 18 +++++++---- src/video_core/memory_manager.h | 30 +++++++++++++++---- .../renderer_opengl/gl_shader_cache.cpp | 13 ++++---- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 20d744c61b..18a8d26848 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -199,7 +199,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { +void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -226,7 +226,8 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t } } -void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { +void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, + const std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -243,7 +244,7 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::s } } -void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { +void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; @@ -270,7 +271,8 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std:: } } -void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { +void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, + const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; @@ -287,7 +289,7 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, } } -void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -315,6 +317,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t } } +void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { + std::vector tmp_buffer(size); + ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); + WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 29f3860c1b..084d834c85 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -65,11 +65,31 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); - void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); - void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + + /* + * ReadBlock and WriteBlock are full read and write operations over virtual + * GPU Memory. It's important to use these when GPU memory may not be continous + * in the Host Memory counterpart. Note: This functions cause Host GPU Memory + * Flushes and Invalidations, respectively to each operation. + */ + void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; + void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); + void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + + /* + * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and + * WriteBlock respectively. In this versions, no flushing or invalidation is actually + * done and their performance is similar to a memcpy. This functions can be used + * on either of this 2 scenarios instead of their safe counterpart: + * - Memory which is sure to never be represented in the Host GPU. + * - Memory Managed by a Cache Manager. Example: Texture Flushing should use + * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture + * being flushed. + */ + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); + void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + private: using VMAMap = std::map; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 90598e2917..43f2906a88 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -38,14 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(const GPUVAddr gpu_addr, const u8* host_ptr) { - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, + const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { std::fill(program_code.begin(), program_code.end(), 0); return program_code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); + memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), + program_code.size() * sizeof(u64)); return program_code; } @@ -498,12 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (!shader) { // No shader found - create a new one - ProgramCode program_code{GetShaderCode(program_addr, host_ptr)}; + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = - GetShaderCode(program_addr_b, memory_manager.GetPointer(program_addr_b)); + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; From 13d626fc217c4286a80071c5885eee2ab8795d62 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 16 Apr 2019 10:19:52 -0400 Subject: [PATCH 5/7] Use ReadBlockUnsafe for fetyching DMA CommandLists --- src/video_core/dma_pusher.cpp | 4 ++-- src/video_core/memory_manager.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 046d047cbe..6674d94059 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -57,8 +57,8 @@ bool DmaPusher::Step() { // Push buffer non-empty, read a word command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); for (const CommandHeader& command_header : command_headers) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 084d834c85..017b051cfb 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -65,7 +65,6 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - /* * ReadBlock and WriteBlock are full read and write operations over virtual * GPU Memory. It's important to use these when GPU memory may not be continous @@ -90,7 +89,6 @@ public: void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); - private: using VMAMap = std::map; using VMAHandle = VMAMap::const_iterator; From d0082de82a094c98a5ef8907583415daef91604a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 16 Apr 2019 15:45:24 -0400 Subject: [PATCH 6/7] Implement IsBlockContinous This detects when a GPU Memory Block is not continous within host cpu memory. --- src/video_core/memory_manager.cpp | 8 ++++++++ src/video_core/memory_manager.h | 7 +++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 18a8d26848..095a7e5a46 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -199,6 +199,14 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } +bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) { + const GPUVAddr end = start + size; + const auto host_ptr_start = reinterpret_cast(GetPointer(start)); + const auto host_ptr_end = reinterpret_cast(GetPointer(end)); + const std::size_t range = static_cast(host_ptr_end - host_ptr_start); + return range == size; +} + void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 017b051cfb..e4f0c4bd6e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -65,7 +65,10 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - /* + // Returns true if the block is continous in host memory, false otherwise + bool IsBlockContinous(const GPUVAddr start, const std::size_t size); + + /** * ReadBlock and WriteBlock are full read and write operations over virtual * GPU Memory. It's important to use these when GPU memory may not be continous * in the Host Memory counterpart. Note: This functions cause Host GPU Memory @@ -75,7 +78,7 @@ public: void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); - /* + /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and * WriteBlock respectively. In this versions, no flushing or invalidation is actually * done and their performance is similar to a memcpy. This functions can be used From db4b2bc798388fd22d3ea60eb82c5f894b2b3506 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Apr 2019 20:35:54 -0400 Subject: [PATCH 7/7] make ReadBlockunsafe and WriteBlockunsafe, ignore invalid pages. --- src/video_core/memory_manager.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 095a7e5a46..6c98c67012 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -243,8 +243,13 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - std::memcpy(dest_buffer, src_ptr, copy_amount); + const u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + const u8* src_ptr{page_pointer + page_offset}; + std::memcpy(dest_buffer, src_ptr, copy_amount); + } else { + std::memset(dest_buffer, 0, copy_amount); + } page_index++; page_offset = 0; dest_buffer = static_cast(dest_buffer) + copy_amount; @@ -288,8 +293,11 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; - u8* dest_ptr{page_table.pointers[page_index] + page_offset}; - std::memcpy(dest_ptr, src_buffer, copy_amount); + u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + u8* dest_ptr{page_pointer + page_offset}; + std::memcpy(dest_ptr, src_buffer, copy_amount); + } page_index++; page_offset = 0; src_buffer = static_cast(src_buffer) + copy_amount;