Merge pull request #3610 from FernandoS27/gpu-caches

Refactor all the GPU Caches to use VAddr for cache addressing
This commit is contained in:
Rodrigo Locatti 2020-04-09 17:59:21 -03:00 committed by GitHub
commit 36f607217f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 573 additions and 430 deletions

View File

@ -242,7 +242,52 @@ struct Memory::Impl {
} }
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); system.GPU().FlushRegion(current_vaddr, copy_amount);
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* const src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
std::memcpy(dest_buffer, host_ptr, copy_amount); std::memcpy(dest_buffer, host_ptr, copy_amount);
break; break;
} }
@ -261,6 +306,10 @@ struct Memory::Impl {
ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
} }
void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
const std::size_t size) { const std::size_t size) {
const auto& page_table = process.VMManager().page_table; const auto& page_table = process.VMManager().page_table;
@ -290,7 +339,50 @@ struct Memory::Impl {
} }
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* const dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
std::memcpy(host_ptr, src_buffer, copy_amount); std::memcpy(host_ptr, src_buffer, copy_amount);
break; break;
} }
@ -309,6 +401,10 @@ struct Memory::Impl {
WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
} }
void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
const auto& page_table = process.VMManager().page_table; const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size; std::size_t remaining_size = size;
@ -337,7 +433,7 @@ struct Memory::Impl {
} }
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memset(host_ptr, 0, copy_amount); std::memset(host_ptr, 0, copy_amount);
break; break;
} }
@ -384,7 +480,7 @@ struct Memory::Impl {
} }
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); system.GPU().FlushRegion(current_vaddr, copy_amount);
WriteBlock(process, dest_addr, host_ptr, copy_amount); WriteBlock(process, dest_addr, host_ptr, copy_amount);
break; break;
} }
@ -545,7 +641,7 @@ struct Memory::Impl {
break; break;
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(vaddr); const u8* const host_ptr = GetPointerFromVMA(vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); system.GPU().FlushRegion(vaddr, sizeof(T));
T value; T value;
std::memcpy(&value, host_ptr, sizeof(T)); std::memcpy(&value, host_ptr, sizeof(T));
return value; return value;
@ -587,7 +683,7 @@ struct Memory::Impl {
break; break;
case Common::PageType::RasterizerCachedMemory: { case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromVMA(vaddr)}; u8* const host_ptr{GetPointerFromVMA(vaddr)};
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); system.GPU().InvalidateRegion(vaddr, sizeof(T));
std::memcpy(host_ptr, &data, sizeof(T)); std::memcpy(host_ptr, &data, sizeof(T));
break; break;
} }
@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
impl->ReadBlock(src_addr, dest_buffer, size); impl->ReadBlock(src_addr, dest_buffer, size);
} }
void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
}
void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size) { std::size_t size) {
impl->WriteBlock(process, dest_addr, src_buffer, size); impl->WriteBlock(process, dest_addr, src_buffer, size);
@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
impl->WriteBlock(dest_addr, src_buffer, size); impl->WriteBlock(dest_addr, src_buffer, size);
} }
void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
const void* src_buffer, std::size_t size) {
impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
}
void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(process, dest_addr, size); impl->ZeroBlock(process, dest_addr, size);
} }

View File

@ -294,6 +294,27 @@ public:
void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
std::size_t size); std::size_t size);
/**
* Reads a contiguous block of bytes from a specified process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param process The process to read the data from.
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* process' address space.
*/
void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/** /**
* Reads a contiguous block of bytes from the current process' address space. * Reads a contiguous block of bytes from the current process' address space.
* *
@ -312,6 +333,25 @@ public:
*/ */
void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
/**
* Reads a contiguous block of bytes from the current process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* current process' address space.
*/
void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
/** /**
* Writes a range of bytes into a given process' address space at the specified * Writes a range of bytes into a given process' address space at the specified
* virtual address. * virtual address.
@ -335,6 +375,26 @@ public:
void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size); std::size_t size);
/**
* Writes a range of bytes into a given process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param process The process to write data into the address space of.
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/** /**
* Writes a range of bytes into the current process' address space at the specified * Writes a range of bytes into the current process' address space at the specified
* virtual address. * virtual address.
@ -356,6 +416,24 @@ public:
*/ */
void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
/**
* Writes a range of bytes into the current process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the current process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the current process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
/** /**
* Fills the specified address range within a process' address space with zeroes. * Fills the specified address range within a process' address space with zeroes.
* *

View File

@ -15,37 +15,29 @@ namespace VideoCommon {
class BufferBlock { class BufferBlock {
public: public:
bool Overlaps(const CacheAddr start, const CacheAddr end) const { bool Overlaps(const VAddr start, const VAddr end) const {
return (cache_addr < end) && (cache_addr_end > start); return (cpu_addr < end) && (cpu_addr_end > start);
} }
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { bool IsInside(const VAddr other_start, const VAddr other_end) const {
return cache_addr <= other_start && other_end <= cache_addr_end; return cpu_addr <= other_start && other_end <= cpu_addr_end;
} }
u8* GetWritableHostPtr() const { std::size_t GetOffset(const VAddr in_addr) {
return FromCacheAddr(cache_addr); return static_cast<std::size_t>(in_addr - cpu_addr);
} }
u8* GetWritableHostPtr(std::size_t offset) const { VAddr GetCpuAddr() const {
return FromCacheAddr(cache_addr + offset); return cpu_addr;
} }
std::size_t GetOffset(const CacheAddr in_addr) { VAddr GetCpuAddrEnd() const {
return static_cast<std::size_t>(in_addr - cache_addr); return cpu_addr_end;
} }
CacheAddr GetCacheAddr() const { void SetCpuAddr(const VAddr new_addr) {
return cache_addr; cpu_addr = new_addr;
} cpu_addr_end = new_addr + size;
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + size;
} }
std::size_t GetSize() const { std::size_t GetSize() const {
@ -61,14 +53,14 @@ public:
} }
protected: protected:
explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
SetCacheAddr(cache_addr); SetCpuAddr(cpu_addr);
} }
~BufferBlock() = default; ~BufferBlock() = default;
private: private:
CacheAddr cache_addr{}; VAddr cpu_addr{};
CacheAddr cache_addr_end{}; VAddr cpu_addr_end{};
std::size_t size{}; std::size_t size{};
u64 epoch{}; u64 epoch{};
}; };

View File

@ -19,6 +19,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h" #include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -37,28 +38,45 @@ public:
bool is_written = false, bool use_fast_cbuf = false) { bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> cpu_addr_opt =
const auto host_ptr = memory_manager.GetPointer(gpu_addr); system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!host_ptr) {
if (!cpu_addr_opt) {
return {GetEmptyBuffer(size), 0}; return {GetEmptyBuffer(size), 0};
} }
const auto cache_addr = ToCacheAddr(host_ptr);
VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size. // Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games. // TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800; constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) { if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
auto& memory_manager = system.GPU().MemoryManager();
if (use_fast_cbuf) { if (use_fast_cbuf) {
if (memory_manager.IsGranularRange(gpu_addr, size)) {
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
return ConstBufferUpload(host_ptr, size); return ConstBufferUpload(host_ptr, size);
} else { } else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
return ConstBufferUpload(staging_buffer.data(), size);
}
} else {
if (memory_manager.IsGranularRange(gpu_addr, size)) {
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
return StreamBufferUpload(host_ptr, size, alignment); return StreamBufferUpload(host_ptr, size, alignment);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
return StreamBufferUpload(staging_buffer.data(), size, alignment);
}
} }
} }
} }
auto block = GetBlock(cache_addr, size); auto block = GetBlock(cpu_addr, size);
auto map = MapAddress(block, gpu_addr, cache_addr, size); auto map = MapAddress(block, gpu_addr, cpu_addr, size);
if (is_written) { if (is_written) {
map->MarkAsModified(true, GetModifiedTicks()); map->MarkAsModified(true, GetModifiedTicks());
if (!map->IsWritten()) { if (!map->IsWritten()) {
@ -71,7 +89,7 @@ public:
} }
} }
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
return {ToHandle(block), offset}; return {ToHandle(block), offset};
} }
@ -112,7 +130,7 @@ public:
} }
/// Write any cached resources overlapping the specified region back to memory /// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
std::vector<MapInterval> objects = GetMapsInRange(addr, size); std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@ -127,7 +145,7 @@ public:
} }
/// Mark the specified region as being invalidated /// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) { void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
std::vector<MapInterval> objects = GetMapsInRange(addr, size); std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@ -152,7 +170,7 @@ protected:
virtual void WriteBarrier() = 0; virtual void WriteBarrier() = 0;
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0; const u8* data) = 0;
@ -169,20 +187,17 @@ protected:
/// Register an object into the cache /// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) { void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart(); const VAddr cpu_addr = new_map->GetStart();
const std::optional<VAddr> cpu_addr = if (!cpu_addr) {
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
if (!cache_ptr || !cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
new_map->GetGpuAddress()); new_map->GetGpuAddress());
return; return;
} }
const std::size_t size = new_map->GetEnd() - new_map->GetStart(); const std::size_t size = new_map->GetEnd() - new_map->GetStart();
new_map->SetCpuAddress(*cpu_addr);
new_map->MarkAsRegistered(true); new_map->MarkAsRegistered(true);
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
mapped_addresses.insert({interval, new_map}); mapped_addresses.insert({interval, new_map});
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
if (inherit_written) { if (inherit_written) {
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
new_map->MarkAsWritten(true); new_map->MarkAsWritten(true);
@ -192,7 +207,7 @@ protected:
/// Unregisters an object from the cache /// Unregisters an object from the cache
void Unregister(MapInterval& map) { void Unregister(MapInterval& map) {
const std::size_t size = map->GetEnd() - map->GetStart(); const std::size_t size = map->GetEnd() - map->GetStart();
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
map->MarkAsRegistered(false); map->MarkAsRegistered(false);
if (map->IsWritten()) { if (map->IsWritten()) {
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@ -202,32 +217,39 @@ protected:
} }
private: private:
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
return std::make_shared<MapIntervalBase>(start, end, gpu_addr); return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
} }
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
const CacheAddr cache_addr, const std::size_t size) { const std::size_t size) {
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) { if (overlaps.empty()) {
const CacheAddr cache_addr_end = cache_addr + size; auto& memory_manager = system.GPU().MemoryManager();
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); const VAddr cpu_addr_end = cpu_addr + size;
u8* host_ptr = FromCacheAddr(cache_addr); MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); if (memory_manager.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
}
Register(new_map); Register(new_map);
return new_map; return new_map;
} }
const CacheAddr cache_addr_end = cache_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
if (overlaps.size() == 1) { if (overlaps.size() == 1) {
MapInterval& current_map = overlaps[0]; MapInterval& current_map = overlaps[0];
if (current_map->IsInside(cache_addr, cache_addr_end)) { if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
return current_map; return current_map;
} }
} }
CacheAddr new_start = cache_addr; VAddr new_start = cpu_addr;
CacheAddr new_end = cache_addr_end; VAddr new_end = cpu_addr_end;
bool write_inheritance = false; bool write_inheritance = false;
bool modified_inheritance = false; bool modified_inheritance = false;
// Calculate new buffer parameters // Calculate new buffer parameters
@ -237,7 +259,7 @@ private:
write_inheritance |= overlap->IsWritten(); write_inheritance |= overlap->IsWritten();
modified_inheritance |= overlap->IsModified(); modified_inheritance |= overlap->IsModified();
} }
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
for (auto& overlap : overlaps) { for (auto& overlap : overlaps) {
Unregister(overlap); Unregister(overlap);
} }
@ -250,7 +272,7 @@ private:
return new_map; return new_map;
} }
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
std::vector<MapInterval>& overlaps) { std::vector<MapInterval>& overlaps) {
const IntervalType base_interval{start, end}; const IntervalType base_interval{start, end};
IntervalSet interval_set{}; IntervalSet interval_set{};
@ -262,13 +284,15 @@ private:
for (auto& interval : interval_set) { for (auto& interval : interval_set) {
std::size_t size = interval.upper() - interval.lower(); std::size_t size = interval.upper() - interval.lower();
if (size > 0) { if (size > 0) {
u8* host_ptr = FromCacheAddr(interval.lower()); staging_buffer.resize(size);
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
UploadBlockData(block, block->GetOffset(interval.lower()), size,
staging_buffer.data());
} }
} }
} }
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
@ -290,8 +314,9 @@ private:
void FlushMap(MapInterval map) { void FlushMap(MapInterval map) {
std::size_t size = map->GetEnd() - map->GetStart(); std::size_t size = map->GetEnd() - map->GetStart();
TBuffer block = blocks[map->GetStart() >> block_page_bits]; TBuffer block = blocks[map->GetStart() >> block_page_bits];
u8* host_ptr = FromCacheAddr(map->GetStart()); staging_buffer.resize(size);
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
map->MarkAsModified(false, 0); map->MarkAsModified(false, 0);
} }
@ -316,14 +341,14 @@ private:
TBuffer EnlargeBlock(TBuffer buffer) { TBuffer EnlargeBlock(TBuffer buffer) {
const std::size_t old_size = buffer->GetSize(); const std::size_t old_size = buffer->GetSize();
const std::size_t new_size = old_size + block_page_size; const std::size_t new_size = old_size + block_page_size;
const CacheAddr cache_addr = buffer->GetCacheAddr(); const VAddr cpu_addr = buffer->GetCpuAddr();
TBuffer new_buffer = CreateBlock(cache_addr, new_size); TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(buffer, new_buffer, 0, 0, old_size); CopyBlock(buffer, new_buffer, 0, 0, old_size);
buffer->SetEpoch(epoch); buffer->SetEpoch(epoch);
pending_destruction.push_back(buffer); pending_destruction.push_back(buffer);
const CacheAddr cache_addr_end = cache_addr + new_size - 1; const VAddr cpu_addr_end = cpu_addr + new_size - 1;
u64 page_start = cache_addr >> block_page_bits; u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) { while (page_start <= page_end) {
blocks[page_start] = new_buffer; blocks[page_start] = new_buffer;
++page_start; ++page_start;
@ -334,9 +359,9 @@ private:
TBuffer MergeBlocks(TBuffer first, TBuffer second) { TBuffer MergeBlocks(TBuffer first, TBuffer second) {
const std::size_t size_1 = first->GetSize(); const std::size_t size_1 = first->GetSize();
const std::size_t size_2 = second->GetSize(); const std::size_t size_2 = second->GetSize();
const CacheAddr first_addr = first->GetCacheAddr(); const VAddr first_addr = first->GetCpuAddr();
const CacheAddr second_addr = second->GetCacheAddr(); const VAddr second_addr = second->GetCpuAddr();
const CacheAddr new_addr = std::min(first_addr, second_addr); const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2; const std::size_t new_size = size_1 + size_2;
TBuffer new_buffer = CreateBlock(new_addr, new_size); TBuffer new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@ -345,9 +370,9 @@ private:
second->SetEpoch(epoch); second->SetEpoch(epoch);
pending_destruction.push_back(first); pending_destruction.push_back(first);
pending_destruction.push_back(second); pending_destruction.push_back(second);
const CacheAddr cache_addr_end = new_addr + new_size - 1; const VAddr cpu_addr_end = new_addr + new_size - 1;
u64 page_start = new_addr >> block_page_bits; u64 page_start = new_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) { while (page_start <= page_end) {
blocks[page_start] = new_buffer; blocks[page_start] = new_buffer;
++page_start; ++page_start;
@ -355,18 +380,18 @@ private:
return new_buffer; return new_buffer;
} }
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
TBuffer found{}; TBuffer found{};
const CacheAddr cache_addr_end = cache_addr + size - 1; const VAddr cpu_addr_end = cpu_addr + size - 1;
u64 page_start = cache_addr >> block_page_bits; u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) { while (page_start <= page_end) {
auto it = blocks.find(page_start); auto it = blocks.find(page_start);
if (it == blocks.end()) { if (it == blocks.end()) {
if (found) { if (found) {
found = EnlargeBlock(found); found = EnlargeBlock(found);
} else { } else {
const CacheAddr start_addr = (page_start << block_page_bits); const VAddr start_addr = (page_start << block_page_bits);
found = CreateBlock(start_addr, block_page_size); found = CreateBlock(start_addr, block_page_size);
blocks[page_start] = found; blocks[page_start] = found;
} }
@ -386,7 +411,7 @@ private:
return found; return found;
} }
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { void MarkRegionAsWritten(const VAddr start, const VAddr end) {
u64 page_start = start >> write_page_bit; u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit; const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) { while (page_start <= page_end) {
@ -400,7 +425,7 @@ private:
} }
} }
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
u64 page_start = start >> write_page_bit; u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit; const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) { while (page_start <= page_end) {
@ -416,7 +441,7 @@ private:
} }
} }
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { bool IsRegionWritten(const VAddr start, const VAddr end) const {
u64 page_start = start >> write_page_bit; u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit; const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) { while (page_start <= page_end) {
@ -440,8 +465,8 @@ private:
u64 buffer_offset = 0; u64 buffer_offset = 0;
u64 buffer_offset_base = 0; u64 buffer_offset_base = 0;
using IntervalSet = boost::icl::interval_set<CacheAddr>; using IntervalSet = boost::icl::interval_set<VAddr>;
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
using IntervalType = typename IntervalCache::interval_type; using IntervalType = typename IntervalCache::interval_type;
IntervalCache mapped_addresses; IntervalCache mapped_addresses;
@ -456,6 +481,8 @@ private:
u64 epoch = 0; u64 epoch = 0;
u64 modified_ticks = 0; u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
std::recursive_mutex mutex; std::recursive_mutex mutex;
}; };

View File

@ -11,7 +11,7 @@ namespace VideoCommon {
class MapIntervalBase { class MapIntervalBase {
public: public:
MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
: start{start}, end{end}, gpu_addr{gpu_addr} {} : start{start}, end{end}, gpu_addr{gpu_addr} {}
void SetCpuAddress(VAddr new_cpu_addr) { void SetCpuAddress(VAddr new_cpu_addr) {
@ -26,7 +26,7 @@ public:
return gpu_addr; return gpu_addr;
} }
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { bool IsInside(const VAddr other_start, const VAddr other_end) const {
return (start <= other_start && other_end <= end); return (start <= other_start && other_end <= end);
} }
@ -46,11 +46,11 @@ public:
return is_registered; return is_registered;
} }
CacheAddr GetStart() const { VAddr GetStart() const {
return start; return start;
} }
CacheAddr GetEnd() const { VAddr GetEnd() const {
return end; return end;
} }
@ -76,8 +76,8 @@ public:
} }
private: private:
CacheAddr start; VAddr start;
CacheAddr end; VAddr end;
GPUVAddr gpu_addr; GPUVAddr gpu_addr;
VAddr cpu_addr{}; VAddr cpu_addr{};
bool is_written{}; bool is_written{};

View File

@ -270,13 +270,13 @@ public:
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0; virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
protected: protected:
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;

View File

@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer); gpu_thread.SwapBuffers(framebuffer);
} }
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size); gpu_thread.FlushRegion(addr, size);
} }
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size); gpu_thread.InvalidateRegion(addr, size);
} }
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size); gpu_thread.FlushAndInvalidateRegion(addr, size);
} }

View File

@ -27,9 +27,9 @@ public:
void Start() override; void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override; void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override; void WaitIdle() const override;
protected: protected:

View File

@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
renderer->SwapBuffers(framebuffer); renderer->SwapBuffers(framebuffer);
} }
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushRegion(addr, size); renderer->Rasterizer().FlushRegion(addr, size);
} }
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().InvalidateRegion(addr, size); renderer->Rasterizer().InvalidateRegion(addr, size);
} }
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
} }

View File

@ -26,9 +26,9 @@ public:
void Start() override; void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override; void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override {} void WaitIdle() const override {}
protected: protected:

View File

@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
} }
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { void ThreadManager::FlushRegion(VAddr addr, u64 size) {
PushCommand(FlushRegionCommand(addr, size)); PushCommand(FlushRegionCommand(addr, size));
} }
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().InvalidateRegion(addr, size); system.Renderer().Rasterizer().InvalidateRegion(addr, size);
} }
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size); InvalidateRegion(addr, size);
} }

View File

@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
/// Command to signal to the GPU thread to flush a region /// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final { struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
CacheAddr addr; VAddr addr;
u64 size; u64 size;
}; };
/// Command to signal to the GPU thread to invalidate a region /// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final { struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
CacheAddr addr; VAddr addr;
u64 size; u64 size;
}; };
/// Command to signal to the GPU thread to flush and invalidate a region /// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final { struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
: addr{addr}, size{size} {} : addr{addr}, size{size} {}
CacheAddr addr; VAddr addr;
u64 size; u64 size;
}; };
@ -111,13 +111,13 @@ public:
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(CacheAddr addr, u64 size); void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(CacheAddr addr, u64 size); void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(CacheAddr addr, u64 size); void FlushAndInvalidateRegion(VAddr addr, u64 size);
// Wait until the gpu thread is idle. // Wait until the gpu thread is idle.
void WaitIdle() const; void WaitIdle() const;

View File

@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & page_mask) == 0); ASSERT((gpu_addr & page_mask) == 0);
const u64 aligned_size{Common::AlignUp(size, page_size)}; const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
const auto cpu_addr = GpuToCpuAddress(gpu_addr); const auto cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
// Flush and invalidate through the GPU interface, to be asynchronous if possible. // Flush and invalidate through the GPU interface, to be asynchronous if possible.
system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size); UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess() ASSERT(system.CurrentProcess()
@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
return {}; return {};
} }
const u8* page_pointer{page_table.pointers[addr >> page_bits]}; const u8* page_pointer{GetPointer(addr)};
if (page_pointer) { if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block // NOTE: Avoid adding any extra logic to this fast-path block
T value; T value;
std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); std::memcpy(&value, page_pointer, sizeof(T));
return value; return value;
} }
@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
return; return;
} }
u8* page_pointer{page_table.pointers[addr >> page_bits]}; u8* page_pointer{GetPointer(addr)};
if (page_pointer) { if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block // NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); std::memcpy(page_pointer, &data, sizeof(T));
return; return;
} }
@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {}; return {};
} }
u8* const page_pointer{page_table.pointers[addr >> page_bits]}; auto& memory = system.Memory();
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask); const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
if (page_addr != 0) {
return memory.GetPointer(page_addr + (addr & page_mask));
} }
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
return {}; return {};
} }
const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; const auto& memory = system.Memory();
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask); const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
if (page_addr != 0) {
return memory.GetPointer(page_addr + (addr & page_mask));
} }
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
std::size_t page_index{src_addr >> page_bits}; std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask}; std::size_t page_offset{src_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) { while (remaining_size > 0) {
const std::size_t copy_amount{ const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) { switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: { case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset}; const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
// Flush must happen on the rasterizer interface, such that memory is always synchronous // Flush must happen on the rasterizer interface, such that memory is always synchronous
// when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); rasterizer.FlushRegion(src_addr, copy_amount);
std::memcpy(dest_buffer, src_ptr, copy_amount); memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
break; break;
} }
default: default:
@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
std::size_t page_index{src_addr >> page_bits}; std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask}; std::size_t page_offset{src_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) { while (remaining_size > 0) {
const std::size_t copy_amount{ const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
const u8* page_pointer = page_table.pointers[page_index]; const u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) { if (page_pointer) {
const u8* src_ptr{page_pointer + page_offset}; const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
std::memcpy(dest_buffer, src_ptr, copy_amount); memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
} else { } else {
std::memset(dest_buffer, 0, copy_amount); std::memset(dest_buffer, 0, copy_amount);
} }
@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
std::size_t page_index{dest_addr >> page_bits}; std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask}; std::size_t page_offset{dest_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) { while (remaining_size > 0) {
const std::size_t copy_amount{ const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) { switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: { case Common::PageType::Memory: {
u8* dest_ptr{page_table.pointers[page_index] + page_offset}; const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
// Invalidate must happen on the rasterizer interface, such that memory is always // Invalidate must happen on the rasterizer interface, such that memory is always
// synchronous when it is written (even when in asynchronous GPU mode). // synchronous when it is written (even when in asynchronous GPU mode).
rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); rasterizer.InvalidateRegion(dest_addr, copy_amount);
std::memcpy(dest_ptr, src_buffer, copy_amount); memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
break; break;
} }
default: default:
@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
std::size_t page_index{dest_addr >> page_bits}; std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask}; std::size_t page_offset{dest_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) { while (remaining_size > 0) {
const std::size_t copy_amount{ const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
u8* page_pointer = page_table.pointers[page_index]; u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) { if (page_pointer) {
u8* dest_ptr{page_pointer + page_offset}; const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
std::memcpy(dest_ptr, src_buffer, copy_amount); memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
} }
page_index++; page_index++;
page_offset = 0; page_offset = 0;
@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
} }
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
std::size_t remaining_size{size}; std::vector<u8> tmp_buffer(size);
std::size_t page_index{src_addr >> page_bits}; ReadBlock(src_addr, tmp_buffer.data(), size);
std::size_t page_offset{src_addr & page_mask}; WriteBlock(dest_addr, tmp_buffer.data(), size);
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
// Flush must happen on the rasterizer interface, such that memory is always synchronous
// when it is copied (even when in asynchronous GPU mode).
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
WriteBlock(dest_addr, src_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
remaining_size -= copy_amount;
}
} }
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
} }
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
const std::size_t page = (addr & Memory::PAGE_MASK) + size;
return page <= Memory::PAGE_SIZE;
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
VAddr backing_addr) { VAddr backing_addr) {
LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,

View File

@ -97,6 +97,11 @@ public:
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer
*/
bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
private: private:
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
using VMAHandle = VMAMap::const_iterator; using VMAHandle = VMAMap::const_iterator;

View File

@ -98,12 +98,12 @@ public:
static_cast<QueryCache&>(*this), static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {} VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(CacheAddr addr, std::size_t size) { void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size); FlushAndRemoveRegion(addr, size);
} }
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size); FlushAndRemoveRegion(addr, size);
} }
@ -117,14 +117,16 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr_opt);
VAddr cpu_addr = *cpu_addr_opt;
CachedQuery* query = TryGet(cpu_addr);
if (!query) {
ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
const auto host_ptr = memory_manager.GetPointer(gpu_addr); const auto host_ptr = memory_manager.GetPointer(gpu_addr);
CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
if (!query) {
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT_OR_EXECUTE(cpu_addr, return;);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
} }
query->BindCounter(Stream(type).Current(), timestamp); query->BindCounter(Stream(type).Current(), timestamp);
@ -173,11 +175,11 @@ protected:
private: private:
/// Flushes a memory range to guest memory and removes it from the cache. /// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
const u64 addr_begin = static_cast<u64>(addr); const u64 addr_begin = static_cast<u64>(addr);
const u64 addr_end = addr_begin + static_cast<u64>(size); const u64 addr_end = addr_begin + static_cast<u64>(size);
const auto in_range = [addr_begin, addr_end](CachedQuery& query) { const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
const u64 cache_begin = query.GetCacheAddr(); const u64 cache_begin = query.GetCpuAddr();
const u64 cache_end = cache_begin + query.SizeInBytes(); const u64 cache_end = cache_begin + query.SizeInBytes();
return cache_begin < addr_end && addr_begin < cache_end; return cache_begin < addr_end && addr_begin < cache_end;
}; };
@ -193,7 +195,7 @@ private:
if (!in_range(query)) { if (!in_range(query)) {
continue; continue;
} }
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
query.Flush(); query.Flush();
} }
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@ -204,22 +206,21 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query. /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr); host_ptr);
} }
/// Tries to a get a cached query. Returns nullptr on failure. /// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(CacheAddr addr) { CachedQuery* TryGet(VAddr addr) {
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
const auto it = cached_queries.find(page); const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) { if (it == std::end(cached_queries)) {
return nullptr; return nullptr;
} }
auto& contents = it->second; auto& contents = it->second;
const auto found = const auto found = std::find_if(std::begin(contents), std::end(contents),
std::find_if(std::begin(contents), std::end(contents), [addr](auto& query) { return query.GetCpuAddr() == addr; });
[addr](auto& query) { return query.GetCacheAddr() == addr; });
return found != std::end(contents) ? &*found : nullptr; return found != std::end(contents) ? &*found : nullptr;
} }
@ -323,14 +324,10 @@ public:
timestamp = timestamp_; timestamp = timestamp_;
} }
VAddr CpuAddr() const noexcept { VAddr GetCpuAddr() const noexcept {
return cpu_addr; return cpu_addr;
} }
CacheAddr GetCacheAddr() const noexcept {
return ToCacheAddr(host_ptr);
}
u64 SizeInBytes() const noexcept { u64 SizeInBytes() const noexcept {
return SizeInBytes(timestamp.has_value()); return SizeInBytes(timestamp.has_value());
} }

View File

@ -18,22 +18,14 @@
class RasterizerCacheObject { class RasterizerCacheObject {
public: public:
explicit RasterizerCacheObject(const u8* host_ptr) explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
virtual ~RasterizerCacheObject(); virtual ~RasterizerCacheObject();
CacheAddr GetCacheAddr() const { VAddr GetCpuAddr() const {
return cache_addr; return cpu_addr;
} }
const u8* GetHostPtr() const {
return host_ptr;
}
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management /// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0; virtual std::size_t GetSizeInBytes() const = 0;
@ -68,8 +60,7 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
}; };
template <class T> template <class T>
@ -80,7 +71,7 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory /// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -90,7 +81,7 @@ public:
} }
/// Mark the specified region as being invalidated /// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) { void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -114,27 +105,20 @@ public:
protected: protected:
/// Tries to get an object from the cache with the specified cache address /// Tries to get an object from the cache with the specified cache address
T TryGet(CacheAddr addr) const { T TryGet(VAddr addr) const {
const auto iter = map_cache.find(addr); const auto iter = map_cache.find(addr);
if (iter != map_cache.end()) if (iter != map_cache.end())
return iter->second; return iter->second;
return nullptr; return nullptr;
} }
T TryGet(const void* addr) const {
const auto iter = map_cache.find(ToCacheAddr(addr));
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
/// Register an object into the cache /// Register an object into the cache
virtual void Register(const T& object) { virtual void Register(const T& object) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
object->SetIsRegistered(true); object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}}); interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetCacheAddr(), object}); map_cache.insert({object->GetCpuAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
} }
@ -144,7 +128,7 @@ protected:
object->SetIsRegistered(false); object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr(); const VAddr addr = object->GetCpuAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}}); interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr); map_cache.erase(addr);
} }
@ -173,7 +157,7 @@ protected:
private: private:
/// Returns a list of cached objects from the specified memory region, ordered by access time /// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
@ -197,13 +181,13 @@ private:
} }
using ObjectSet = std::set<T>; using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<CacheAddr, T>; using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type; using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) { static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetCacheAddr(), return ObjectInterval::right_open(object->GetCpuAddr(),
object->GetCacheAddr() + object->GetSizeInBytes()); object->GetCpuAddr() + object->GetSizeInBytes());
} }
ObjectCache map_cache; ObjectCache map_cache;

View File

@ -53,14 +53,14 @@ public:
virtual void FlushAll() = 0; virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0; virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated /// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify the rasterizer to send all written commands to the host GPU. /// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0; virtual void FlushCommands() = 0;

View File

@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
: VideoCommon::BufferBlock{cache_addr, size} { : VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create(); gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
} }
@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
} }
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size); return std::make_shared<CachedBufferBlock>(cpu_addr, size);
} }
void OGLBufferCache::WriteBarrier() { void OGLBufferCache::WriteBarrier() {

View File

@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
class CachedBufferBlock : public VideoCommon::BufferBlock { class CachedBufferBlock : public VideoCommon::BufferBlock {
public: public:
explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
~CachedBufferBlock(); ~CachedBufferBlock();
const GLuint* GetHandle() const { const GLuint* GetHandle() const {
@ -55,7 +55,7 @@ public:
} }
protected: protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
void WriteBarrier() override; void WriteBarrier() override;

View File

@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) { if (addr == 0 || size == 0) {
return; return;
} }
texture_cache.FlushRegion(addr, size); texture_cache.FlushRegion(addr, size);
@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) { if (addr == 0 || size == 0) {
return; return;
} }
texture_cache.InvalidateRegion(addr, size); texture_cache.InvalidateRegion(addr, size);
@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::values.use_accurate_gpu_emulation) {
FlushRegion(addr, size); FlushRegion(addr, size);
} }
@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto surface{ const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
if (!surface) { if (!surface) {
return {}; return {};
} }

View File

@ -65,9 +65,9 @@ public:
void ResetCounter(VideoCore::QueryType type) override; void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override; void FlushCommands() override;
void TickFrame() override; void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,

View File

@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace } // Anonymous namespace
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program) ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} size_in_bytes{size_in_bytes}, program{std::move(program)} {}
CachedShader::~CachedShader() = default; CachedShader::~CachedShader() = default;
@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry)); params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, return std::shared_ptr<CachedShader>(new CachedShader(
size_in_bytes, std::move(registry), params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
MakeEntries(ir), std::move(program)));
} }
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@ -279,16 +278,15 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry)); params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, return std::shared_ptr<CachedShader>(new CachedShader(
size_in_bytes, std::move(registry), params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
MakeEntries(ir), std::move(program)));
} }
Shader CachedShader::CreateFromCache(const ShaderParameters& params, Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader, const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) { std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader( return std::shared_ptr<CachedShader>(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program)); precompiled_shader.entries, precompiled_shader.program));
} }
@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const GPUVAddr address{GetShaderAddress(system, program)}; const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address // Look up shader in the cache based on address
const auto host_ptr{memory_manager.GetPointer(address)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{TryGet(host_ptr)}; Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
if (shader) { if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader; return last_shaders[static_cast<std::size_t>(program)] = shader;
} }
const auto host_ptr{memory_manager.GetPointer(address)};
// No shader found - create a new one // No shader found - create a new one
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
ProgramCode code_b; ProgramCode code_b;
@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier( const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier); const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) { if (found == runtime_cache.end()) {
@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()}; auto& memory_manager{system.GPU().MemoryManager()};
const auto host_ptr{memory_manager.GetPointer(code_addr)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = TryGet(host_ptr);
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (kernel) { if (kernel) {
return kernel; return kernel;
} }
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one // No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier); const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) { if (found == runtime_cache.end()) {

View File

@ -65,11 +65,6 @@ public:
/// Gets the GL program handle for the shader /// Gets the GL program handle for the shader
GLuint GetHandle() const; GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader /// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
return size_in_bytes; return size_in_bytes;
@ -90,13 +85,12 @@ public:
std::size_t size_in_bytes); std::size_t size_in_bytes);
private: private:
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program); ShaderEntries entries, std::shared_ptr<OGLProgram> program);
std::shared_ptr<VideoCommon::Shader::Registry> registry; std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries; ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0; std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program; std::shared_ptr<OGLProgram> program;
}; };

View File

@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
} // Anonymous namespace } // Anonymous namespace
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size) VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cache_addr, size} { : VideoCommon::BufferBlock{cpu_addr, size} {
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst, vk::BufferUsageFlagBits::eTransferDst,
@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
VKBufferCache::~VKBufferCache() = default; VKBufferCache::~VKBufferCache() = default;
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
} }
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {

View File

@ -30,7 +30,7 @@ class VKScheduler;
class CachedBufferBlock final : public VideoCommon::BufferBlock { class CachedBufferBlock final : public VideoCommon::BufferBlock {
public: public:
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size); VAddr cpu_addr, std::size_t size);
~CachedBufferBlock(); ~CachedBufferBlock();
const vk::Buffer* GetHandle() const { const vk::Buffer* GetHandle() const {
@ -55,7 +55,7 @@ public:
protected: protected:
void WriteBarrier() override {} void WriteBarrier() override {}
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
const vk::Buffer* ToHandle(const Buffer& buffer) override; const vk::Buffer* ToHandle(const Buffer& buffer) override;

View File

@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace } // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
ProgramCode program_code, u32 main_offset) u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
shader_ir{this->program_code, main_offset, compiler_settings, registry}, compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {} entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default; CachedShader::~CachedShader() = default;
@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
auto& memory_manager{system.GPU().MemoryManager()}; auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)}; const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)}; const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
auto shader = TryGet(host_ptr); ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) { if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one // No shader found - create a new one
constexpr u32 stage_offset = 10; constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset); std::move(code), stage_offset);
Register(shader); Register(shader);
} }
shaders[index] = std::move(shader); shaders[index] = std::move(shader);
@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader; const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0; constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code), program_addr, *cpu_addr, std::move(code),
kernel_main_offset); kernel_main_offset);
Register(shader); Register(shader);
} }
@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
} }
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr); const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
const auto shader = TryGet(host_ptr); ASSERT(cpu_addr);
const auto shader = TryGet(*cpu_addr);
ASSERT(shader); ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5

View File

@ -113,17 +113,13 @@ namespace Vulkan {
class CachedShader final : public RasterizerCacheObject { class CachedShader final : public RasterizerCacheObject {
public: public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
~CachedShader(); ~CachedShader();
GPUVAddr GetGpuAddr() const { GPUVAddr GetGpuAddr() const {
return gpu_addr; return gpu_addr;
} }
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64); return program_code.size() * sizeof(u64);
} }
@ -149,7 +145,6 @@ private:
Tegra::Engines::ShaderType stage); Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code; ProgramCode program_code;
VideoCommon::Shader::Registry registry; VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir; VideoCommon::Shader::ShaderIR shader_ir;

View File

@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.FlushRegion(addr, size); texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.InvalidateRegion(addr, size); texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size); FlushRegion(addr, size);
InvalidateRegion(addr, size); InvalidateRegion(addr, size);
} }
@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return false; return false;
} }
const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
if (!surface) { if (!surface) {
return false; return false;
} }

View File

@ -118,9 +118,9 @@ public:
void ResetCounter(VideoCore::QueryType type) override; void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override; void FlushCommands() override;
void TickFrame() override; void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,

View File

@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
MICROPROFILE_SCOPE(GPU_Load_Texture); MICROPROFILE_SCOPE(GPU_Load_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0); auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr; u8* host_ptr;
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer // Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1); auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size); tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data(); host_ptr = tmp_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
if (params.is_tiled) { if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
auto& staging_buffer = staging_cache.GetBuffer(0); auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr; u8* host_ptr;
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer // Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1); auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size); tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data(); host_ptr = tmp_buffer.data();
}
if (params.is_tiled) { if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
} }
} }
} }
if (!is_continuous) {
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
} }
} // namespace VideoCommon } // namespace VideoCommon

View File

@ -68,8 +68,8 @@ public:
return gpu_addr; return gpu_addr;
} }
bool Overlaps(const CacheAddr start, const CacheAddr end) const { bool Overlaps(const VAddr start, const VAddr end) const {
return (cache_addr < end) && (cache_addr_end > start); return (cpu_addr < end) && (cpu_addr_end > start);
} }
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@ -86,21 +86,13 @@ public:
return cpu_addr; return cpu_addr;
} }
VAddr GetCpuAddrEnd() const {
return cpu_addr_end;
}
void SetCpuAddr(const VAddr new_addr) { void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr; cpu_addr = new_addr;
} cpu_addr_end = new_addr + guest_memory_size;
CacheAddr GetCacheAddr() const {
return cache_addr;
}
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + guest_memory_size;
} }
const SurfaceParams& GetSurfaceParams() const { const SurfaceParams& GetSurfaceParams() const {
@ -119,14 +111,6 @@ public:
return mipmap_sizes[level]; return mipmap_sizes[level];
} }
void MarkAsContinuous(const bool is_continuous) {
this->is_continuous = is_continuous;
}
bool IsContinuous() const {
return is_continuous;
}
bool IsLinear() const { bool IsLinear() const {
return !params.is_tiled; return !params.is_tiled;
} }
@ -175,10 +159,8 @@ protected:
std::size_t guest_memory_size; std::size_t guest_memory_size;
std::size_t host_memory_size; std::size_t host_memory_size;
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
VAddr cpu_addr{}; VAddr cpu_addr{};
bool is_continuous{}; VAddr cpu_addr_end{};
bool is_converted{}; bool is_converted{};
std::vector<std::size_t> mipmap_sizes; std::vector<std::size_t> mipmap_sizes;

View File

@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView> template <typename TSurface, typename TView>
class TextureCache { class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
public: public:
void InvalidateRegion(CacheAddr addr, std::size_t size) { void InvalidateRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) { for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@ -76,7 +74,7 @@ public:
guard_samplers = new_guard; guard_samplers = new_guard;
} }
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto surfaces = GetSurfacesInRegion(addr, size); auto surfaces = GetSurfacesInRegion(addr, size);
@ -99,9 +97,9 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
@ -110,7 +108,7 @@ public:
} }
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -124,13 +122,13 @@ public:
if (!gpu_addr) { if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -159,14 +157,14 @@ public:
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first; depth_buffer.target = surface_view.first;
@ -199,15 +197,15 @@ public:
return {}; return {};
} }
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
SetEmptyColorBuffer(index); SetEmptyColorBuffer(index);
return {}; return {};
} }
auto surface_view = auto surface_view =
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true); preserve_contents, true);
if (render_targets[index].target) if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@ -257,27 +255,26 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; const std::optional<VAddr> dst_cpu_addr =
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; const std::optional<VAddr> src_cpu_addr =
const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface = std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface = std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config); ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick()); dst_surface.first->MarkAsModified(true, Tick());
} }
TSurface TryFindFramebufferSurface(const u8* host_ptr) { TSurface TryFindFramebufferSurface(VAddr addr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr); if (!addr) {
if (!cache_addr) {
return nullptr; return nullptr;
} }
const CacheAddr page = cache_addr >> registry_page_bits; const VAddr page = addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page]; std::vector<TSurface>& list = registry[page];
for (auto& surface : list) { for (auto& surface : list) {
if (surface->GetCacheAddr() == cache_addr) { if (surface->GetCpuAddr() == addr) {
return surface; return surface;
} }
} }
@ -338,18 +335,14 @@ protected:
void Register(TSurface surface) { void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_ptr || !cpu_addr) { if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr); gpu_addr);
return; return;
} }
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
surface->MarkAsContinuous(continuous);
surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr); surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface); RegisterInnerCache(surface);
surface->MarkAsRegistered(true); surface->MarkAsRegistered(true);
@ -634,7 +627,7 @@ private:
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const SurfaceParams& params,
const GPUVAddr gpu_addr, const GPUVAddr gpu_addr,
const CacheAddr cache_addr, const VAddr cpu_addr,
bool preserve_contents) { bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D) {
bool failed = false; bool failed = false;
@ -659,7 +652,7 @@ private:
failed = true; failed = true;
break; break;
} }
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
modified |= surface->IsModified(); modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@ -679,7 +672,7 @@ private:
} else { } else {
for (const auto& surface : overlaps) { for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) { if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::values.use_accurate_gpu_emulation) {
return std::nullopt; return std::nullopt;
} }
@ -688,7 +681,7 @@ private:
} }
return std::nullopt; return std::nullopt;
} }
if (surface->GetCacheAddr() != cache_addr) { if (surface->GetCpuAddr() != cpu_addr) {
continue; continue;
} }
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@ -722,13 +715,13 @@ private:
* left blank. * left blank.
* @param is_render Whether or not the surface is a render target. * @param is_render Whether or not the surface is a render target.
**/ **/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
const SurfaceParams& params, bool preserve_contents, const SurfaceParams& params, bool preserve_contents,
bool is_render) { bool is_render) {
// Step 1 // Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface // Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done. // matches at certain level we are pretty much done.
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second; TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
@ -755,7 +748,7 @@ private:
// Step 2 // Step 2
// Obtain all possible overlaps in the memory region // Obtain all possible overlaps in the memory region
const std::size_t candidate_size = params.GetGuestSizeInBytes(); const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
// If none are found, we are done. we just load the surface and create it. // If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) { if (overlaps.empty()) {
@ -777,7 +770,7 @@ private:
// Check if it's a 3D texture // Check if it's a 3D texture
if (params.block_depth > 0) { if (params.block_depth > 0) {
auto surface = auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
if (surface) { if (surface) {
return *surface; return *surface;
} }
@ -852,16 +845,16 @@ private:
* @param params The parameters on the candidate surface. * @param params The parameters on the candidate surface.
**/ **/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const std::optional<VAddr> cpu_addr =
const auto cache_addr{ToCacheAddr(host_ptr)}; system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) { if (!cpu_addr) {
Deduction result{}; Deduction result{};
result.type = DeductionType::DeductionFailed; result.type = DeductionType::DeductionFailed;
return result; return result;
} }
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second; TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
@ -880,7 +873,7 @@ private:
} }
const std::size_t candidate_size = params.GetGuestSizeInBytes(); const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
if (overlaps.empty()) { if (overlaps.empty()) {
Deduction result{}; Deduction result{};
@ -1024,10 +1017,10 @@ private:
} }
void RegisterInnerCache(TSurface& surface) { void RegisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr(); const VAddr cpu_addr = surface->GetCpuAddr();
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache[cache_addr] = surface; l1_cache[cpu_addr] = surface;
while (start <= end) { while (start <= end) {
registry[start].push_back(surface); registry[start].push_back(surface);
start++; start++;
@ -1035,10 +1028,10 @@ private:
} }
void UnregisterInnerCache(TSurface& surface) { void UnregisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr(); const VAddr cpu_addr = surface->GetCpuAddr();
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cache_addr); l1_cache.erase(cpu_addr);
while (start <= end) { while (start <= end) {
auto& reg{registry[start]}; auto& reg{registry[start]};
reg.erase(std::find(reg.begin(), reg.end(), surface)); reg.erase(std::find(reg.begin(), reg.end(), surface));
@ -1046,18 +1039,18 @@ private:
} }
} }
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
const CacheAddr cache_addr_end = cache_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
CacheAddr start = cache_addr >> registry_page_bits; VAddr start = cpu_addr >> registry_page_bits;
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces; std::vector<TSurface> surfaces;
while (start <= end) { while (start <= end) {
std::vector<TSurface>& list = registry[start]; std::vector<TSurface>& list = registry[start];
for (auto& surface : list) { for (auto& surface : list) {
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
surface->MarkAsPicked(true); surface->MarkAsPicked(true);
surfaces.push_back(surface); surfaces.push_back(surface);
} }
@ -1146,14 +1139,14 @@ private:
// large in size. // large in size.
static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_bits{20};
static constexpr u64 registry_page_size{1 << registry_page_bits}; static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry; std::unordered_map<VAddr, std::vector<TSurface>> registry;
static constexpr u32 DEPTH_RT = 8; static constexpr u32 DEPTH_RT = 8;
static constexpr u32 NO_RT = 0xFFFFFFFF; static constexpr u32 NO_RT = 0xFFFFFFFF;
// The L1 Cache is used for fast texture lookup before checking the overlaps // The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs. // This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache; std::unordered_map<VAddr, TSurface> l1_cache;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and /// previously been used. This is to prevent surfaces from being constantly created and