SMMU: Add Android compatibility

2023-12-30 04:37:25 +01:00 · 2023-12-30 04:37:25 +01:00 · 303cd31162
parent 0adc09e0af
commit 303cd31162
9 changed files with 42 additions and 50 deletions
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
      cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
    impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
    cached_pages = std::make_unique<CachedPages>();
-    for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
-        compressed_device_addr[i] = 0;
-    }
 }

 template <typename Traits>
@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
    u64 cache_begin = 0;
    u64 uncache_bytes = 0;
    u64 cache_bytes = 0;
-    const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
+    const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;

    std::atomic_thread_fence(std::memory_order_acquire);
    const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
@ -577,4 +574,4 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
    }
 }

-} // namespace Core
+} // namespace Core
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@ -8,6 +8,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/kernel/k_process.h"
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
        ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
        Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
        session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
-                                             cmdlist.size() * sizeof(u32));
+                                                cmdlist.size() * sizeof(u32));
        gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
    }
    // Some games expect command_buffers to be written back
@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
    return NvResult::Success;
 }

-NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) {
+NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
+                                        DeviceFD fd) {
    const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
    for (size_t i = 0; i < num_entries; i++) {
        DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
 // from outside classes. This also allows modification to the internals of the memory
 // subsystem without needing to rebuild all files that make use of the memory interface.
 struct Memory::Impl {
-    explicit Impl(Core::System& system_)
-        : system{system_} {}
+    explicit Impl(Core::System& system_) : system{system_} {}

    void SetCurrentPageTable(Kernel::KProcess& process) {
        current_page_table = &process.GetPageTable().GetImpl();
@ -640,18 +639,6 @@ struct Memory::Impl {
        LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
                  base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);

-        // During boot, current_page_table might not be set yet, in which case we need not flush
-        /*if (system.IsPoweredOn()) {
-            auto& gpu = system.GPU();
-            for (u64 i = 0; i < size; i++) {
-                const auto page = base + i;
-                if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
-
-                    gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
-                }
-            }
-        }*/
-
        const auto end = base + size;
        ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
                   base + page_table.pointers.size());
@ -823,8 +810,7 @@ struct Memory::Impl {
        }
        const size_t core = system.GetCurrentHostThreadID();
        auto& current_area = rasterizer_read_areas[core];
-        gpu_device_memory->ApplyOpOnPointer(
-            p, scratch_buffers[core], [&](DAddr address) {
+        gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
            const DAddr end_address = address + size;
            if (current_area.start_address <= address && end_address <= current_area.end_address)
                [[likely]] {
@ -852,8 +838,7 @@ struct Memory::Impl {
                sys_core_guard.unlock();
            }
        });
-        gpu_device_memory->ApplyOpOnPointer(
-            p, scratch_buffers[core], [&](DAddr address) {
+        gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
            auto& current_area = rasterizer_write_areas[core];
            PAddr subaddress = address >> YUZU_PAGEBITS;
            bool do_collection = current_area.last_address == subaddress;
@ -872,12 +857,25 @@ struct Memory::Impl {
        PAddr last_address;
    };

-    void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
-        system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
-    }
-
-    void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
-        system.GPU().FlushRegion(GetInteger(dest_addr), size);
+    void InvalidateGPUMemory(u8* p, size_t size) {
+        constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
+        const size_t core = std::min(system.GetCurrentHostThreadID(),
+                                     sys_core); // any other calls threads go to syscore.
+        if (!gpu_device_memory) [[unlikely]] {
+            gpu_device_memory = &system.Host1x().MemoryManager();
+        }
+        // Guard on sys_core;
+        if (core == sys_core) [[unlikely]] {
+            sys_core_guard.lock();
+        }
+        SCOPE_EXIT({
+            if (core == sys_core) [[unlikely]] {
+                sys_core_guard.unlock();
+            }
+        });
+        auto& gpu = system.GPU();
+        gpu_device_memory->ApplyOpOnPointer(
+            p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
    }

    Core::System& system;
@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
    impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
 }

-void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
-    impl->InvalidateRegion(dest_addr, size);
-}
-
-void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
-    impl->FlushRegion(dest_addr, size);
-}
-
 bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
    [[maybe_unused]] bool mapped = true;
    [[maybe_unused]] bool rasterizer = false;
@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
                      GetInteger(vaddr));
            mapped = false;
        },
-        [&] {
-            impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size);
-            rasterizer = true;
-        });
+        [&] { rasterizer = true; });
+    if (rasterizer) {
+        impl->InvalidateGPUMemory(ptr, size);
+    }

 #ifdef __linux__
    if (!rasterizer && mapped) {
--- a/src/core/memory.h
+++ b/src/core/memory.h
@ -486,10 +486,10 @@ public:
    void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);

    void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
-    void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
+
    bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
+    
    bool InvalidateSeparateHeap(void* fault_address);
-    void FlushRegion(Common::ProcessAddress dest_addr, size_t size);

 private:
    Core::System& system;
--- a/src/video_core/host1x/gpu_device_memory_manager.h
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@ -17,7 +17,7 @@ struct MaxwellDeviceTraits {
    static constexpr bool supports_pinning = false;
    static constexpr size_t device_virtual_bits = 34;
    using DeviceInterface = typename VideoCore::RasterizerInterface;
-    using DeviceMethods = typename MaxwellDeviceMethods;
+    using DeviceMethods = MaxwellDeviceMethods;
 };

 using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_)
      memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
      allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}

+Host1x::~Host1x() = default;
+
 } // namespace Host1x

 } // namespace Tegra
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@ -21,6 +21,7 @@ namespace Host1x {
 class Host1x {
 public:
    explicit Host1x(Core::System& system);
+    ~Host1x();

    SyncpointManager& GetSyncpointManager() {
        return syncpoint_manager;
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@ -68,7 +68,7 @@ public:
        if (!address) {
            return {};
        }
-        return memory.GetPointer(*address);
+        return memory.GetPointer<T>(*address);
    }

    template <typename T>
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
        return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
                                        static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
    };
-    u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr);
-    u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8);
+    u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
+    u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
    bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
    std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
                                     pointer, pointer_timestamp] {
@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
    }
    if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
        False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
-        auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address);
+        auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
        if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
            std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
            return false;