SMMU: Add Android compatibility

This commit is contained in:
Fernando Sahmkow 2023-12-30 04:37:25 +01:00 committed by Liam
parent 0adc09e0af
commit 303cd31162
9 changed files with 42 additions and 50 deletions

View File

@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
cached_pages = std::make_unique<CachedPages>(); cached_pages = std::make_unique<CachedPages>();
for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
compressed_device_addr[i] = 0;
}
} }
template <typename Traits> template <typename Traits>
@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
u64 cache_begin = 0; u64 cache_begin = 0;
u64 uncache_bytes = 0; u64 uncache_bytes = 0;
u64 cache_bytes = 0; u64 cache_bytes = 0;
const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
std::atomic_thread_fence(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_acquire);
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
@ -577,4 +574,4 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
} }
} }
} // namespace Core } // namespace Core

View File

@ -8,6 +8,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32)); cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
} }
// Some games expect command_buffers to be written back // Some games expect command_buffers to be written back
@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
DeviceFD fd) {
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
for (size_t i = 0; i < num_entries; i++) { for (size_t i = 0; i < num_entries; i++) {
DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);

View File

@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
// from outside classes. This also allows modification to the internals of the memory // from outside classes. This also allows modification to the internals of the memory
// subsystem without needing to rebuild all files that make use of the memory interface. // subsystem without needing to rebuild all files that make use of the memory interface.
struct Memory::Impl { struct Memory::Impl {
explicit Impl(Core::System& system_) explicit Impl(Core::System& system_) : system{system_} {}
: system{system_} {}
void SetCurrentPageTable(Kernel::KProcess& process) { void SetCurrentPageTable(Kernel::KProcess& process) {
current_page_table = &process.GetPageTable().GetImpl(); current_page_table = &process.GetPageTable().GetImpl();
@ -640,18 +639,6 @@ struct Memory::Impl {
LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
// During boot, current_page_table might not be set yet, in which case we need not flush
/*if (system.IsPoweredOn()) {
auto& gpu = system.GPU();
for (u64 i = 0; i < size; i++) {
const auto page = base + i;
if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
}
}
}*/
const auto end = base + size; const auto end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size()); base + page_table.pointers.size());
@ -823,8 +810,7 @@ struct Memory::Impl {
} }
const size_t core = system.GetCurrentHostThreadID(); const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_read_areas[core]; auto& current_area = rasterizer_read_areas[core];
gpu_device_memory->ApplyOpOnPointer( gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
p, scratch_buffers[core], [&](DAddr address) {
const DAddr end_address = address + size; const DAddr end_address = address + size;
if (current_area.start_address <= address && end_address <= current_area.end_address) if (current_area.start_address <= address && end_address <= current_area.end_address)
[[likely]] { [[likely]] {
@ -852,8 +838,7 @@ struct Memory::Impl {
sys_core_guard.unlock(); sys_core_guard.unlock();
} }
}); });
gpu_device_memory->ApplyOpOnPointer( gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
p, scratch_buffers[core], [&](DAddr address) {
auto& current_area = rasterizer_write_areas[core]; auto& current_area = rasterizer_write_areas[core];
PAddr subaddress = address >> YUZU_PAGEBITS; PAddr subaddress = address >> YUZU_PAGEBITS;
bool do_collection = current_area.last_address == subaddress; bool do_collection = current_area.last_address == subaddress;
@ -872,12 +857,25 @@ struct Memory::Impl {
PAddr last_address; PAddr last_address;
}; };
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { void InvalidateGPUMemory(u8* p, size_t size) {
system.GPU().InvalidateRegion(GetInteger(dest_addr), size); constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
} const size_t core = std::min(system.GetCurrentHostThreadID(),
sys_core); // any other calls threads go to syscore.
void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { if (!gpu_device_memory) [[unlikely]] {
system.GPU().FlushRegion(GetInteger(dest_addr), size); gpu_device_memory = &system.Host1x().MemoryManager();
}
// Guard on sys_core;
if (core == sys_core) [[unlikely]] {
sys_core_guard.lock();
}
SCOPE_EXIT({
if (core == sys_core) [[unlikely]] {
sys_core_guard.unlock();
}
});
auto& gpu = system.GPU();
gpu_device_memory->ApplyOpOnPointer(
p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
} }
Core::System& system; Core::System& system;
@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
impl->MarkRegionDebug(GetInteger(vaddr), size, debug); impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
} }
void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->InvalidateRegion(dest_addr, size);
}
void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->FlushRegion(dest_addr, size);
}
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
[[maybe_unused]] bool mapped = true; [[maybe_unused]] bool mapped = true;
[[maybe_unused]] bool rasterizer = false; [[maybe_unused]] bool rasterizer = false;
@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
GetInteger(vaddr)); GetInteger(vaddr));
mapped = false; mapped = false;
}, },
[&] { [&] { rasterizer = true; });
impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); if (rasterizer) {
rasterizer = true; impl->InvalidateGPUMemory(ptr, size);
}); }
#ifdef __linux__ #ifdef __linux__
if (!rasterizer && mapped) { if (!rasterizer && mapped) {

View File

@ -486,10 +486,10 @@ public:
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
bool InvalidateSeparateHeap(void* fault_address); bool InvalidateSeparateHeap(void* fault_address);
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
private: private:
Core::System& system; Core::System& system;

View File

@ -17,7 +17,7 @@ struct MaxwellDeviceTraits {
static constexpr bool supports_pinning = false; static constexpr bool supports_pinning = false;
static constexpr size_t device_virtual_bits = 34; static constexpr size_t device_virtual_bits = 34;
using DeviceInterface = typename VideoCore::RasterizerInterface; using DeviceInterface = typename VideoCore::RasterizerInterface;
using DeviceMethods = typename MaxwellDeviceMethods; using DeviceMethods = MaxwellDeviceMethods;
}; };
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;

View File

@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_)
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
Host1x::~Host1x() = default;
} // namespace Host1x } // namespace Host1x
} // namespace Tegra } // namespace Tegra

View File

@ -21,6 +21,7 @@ namespace Host1x {
class Host1x { class Host1x {
public: public:
explicit Host1x(Core::System& system); explicit Host1x(Core::System& system);
~Host1x();
SyncpointManager& GetSyncpointManager() { SyncpointManager& GetSyncpointManager() {
return syncpoint_manager; return syncpoint_manager;

View File

@ -68,7 +68,7 @@ public:
if (!address) { if (!address) {
return {}; return {};
} }
return memory.GetPointer(*address); return memory.GetPointer<T>(*address);
} }
template <typename T> template <typename T>

View File

@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
}; };
u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence; bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
pointer, pointer_timestamp] { pointer, pointer_timestamp] {
@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
} }
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
False(query_base->flags & QueryFlagBits::IsGuestSynced)) { False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
return false; return false;