Fixes and workarounds to make UBSan happier on macOS

There are still some other issues not addressed here, but it's a start.

Workarounds for false-positive reports:

- `RasterizerAccelerated`: Put a gigantic array behind a `unique_ptr`,
  because UBSan has a [hardcoded limit](https://stackoverflow.com/questions/64531383/c-runtime-error-using-fsanitize-undefined-object-has-a-possibly-invalid-vp)
  of how big it thinks objects can be, specifically when dealing with
  offset-to-top values used with multiple inheritance.  Hopefully this
  doesn't have a performance impact.

- `QueryCacheBase::QueryCacheBase`: Avoid an operation that UBSan thinks
  is UB even though it at least arguably isn't.  See the link in the
  comment for more information.

Fixes for correct reports:

- `PageTable`, `Memory`: Use `uintptr_t` values instead of pointers to
  avoid UB from pointer overflow (when pointer arithmetic wraps around
  the address space).

- `KScheduler::Reload`: `thread->GetOwnerProcess()` can be `nullptr`;
  avoid calling methods on it in this case.  (The existing code returns
  a garbage reference to a field, which is then passed into
  `LoadWatchpointArray`, and apparently it's never used, so it's
  harmless in practice but still triggers UBSan.)

- `KAutoObject::Close`: This function calls `this->Destroy()`, which
  overwrites the beginning of the object with junk (specifically a free
  list pointer).  Then it calls `this->UnregisterWithKernel()`.  UBSan
  complains about a type mismatch because the vtable has been
  overwritten, and I believe this is indeed UB.  `UnregisterWithKernel`
  also loads `m_kernel` from the 'freed' object, which seems to be
  technically safe (the overwriting doesn't extend as far as that
  field), but seems dubious.  Switch to a `static` method and load
  `m_kernel` in advance.
This commit is contained in:
comex 2023-07-01 15:00:39 -07:00
parent 04868ab9da
commit d7c532d889
11 changed files with 42 additions and 32 deletions

View File

@ -51,7 +51,7 @@ struct PageTable {
class PageInfo {
public:
/// Returns the page pointer
[[nodiscard]] u8* Pointer() const noexcept {
[[nodiscard]] uintptr_t Pointer() const noexcept {
return ExtractPointer(raw.load(std::memory_order_relaxed));
}
@ -61,7 +61,7 @@ struct PageTable {
}
/// Returns the page pointer and attribute pair, extracted from the same atomic read
[[nodiscard]] std::pair<u8*, PageType> PointerType() const noexcept {
[[nodiscard]] std::pair<uintptr_t, PageType> PointerType() const noexcept {
const uintptr_t non_atomic_raw = raw.load(std::memory_order_relaxed);
return {ExtractPointer(non_atomic_raw), ExtractType(non_atomic_raw)};
}
@ -73,13 +73,13 @@ struct PageTable {
}
/// Write a page pointer and type pair atomically
void Store(u8* pointer, PageType type) noexcept {
raw.store(reinterpret_cast<uintptr_t>(pointer) | static_cast<uintptr_t>(type));
void Store(uintptr_t pointer, PageType type) noexcept {
raw.store(pointer | static_cast<uintptr_t>(type));
}
/// Unpack a pointer from a page info raw representation
[[nodiscard]] static u8* ExtractPointer(uintptr_t raw) noexcept {
return reinterpret_cast<u8*>(raw & (~uintptr_t{0} << ATTRIBUTE_BITS));
[[nodiscard]] static uintptr_t ExtractPointer(uintptr_t raw) noexcept {
return raw & (~uintptr_t{0} << ATTRIBUTE_BITS);
}
/// Unpack a page type from a page info raw representation

View File

@ -217,8 +217,8 @@ void ARM_Interface::Run() {
}
}
void ARM_Interface::LoadWatchpointArray(const WatchpointArray& wp) {
watchpoints = &wp;
void ARM_Interface::LoadWatchpointArray(const WatchpointArray* wp) {
watchpoints = wp;
}
const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint(

View File

@ -186,7 +186,7 @@ public:
virtual void SaveContext(ThreadContext64& ctx) const = 0;
virtual void LoadContext(const ThreadContext32& ctx) = 0;
virtual void LoadContext(const ThreadContext64& ctx) = 0;
void LoadWatchpointArray(const WatchpointArray& wp);
void LoadWatchpointArray(const WatchpointArray* wp);
/// Clears the exclusive monitor's state.
virtual void ClearExclusiveState() = 0;

View File

@ -15,8 +15,8 @@ void KAutoObject::RegisterWithKernel() {
m_kernel.RegisterKernelObject(this);
}
void KAutoObject::UnregisterWithKernel() {
m_kernel.UnregisterKernelObject(this);
void KAutoObject::UnregisterWithKernel(KernelCore& kernel, KAutoObject* self) {
kernel.UnregisterKernelObject(self);
}
} // namespace Kernel

View File

@ -159,14 +159,15 @@ public:
// If ref count hits zero, destroy the object.
if (cur_ref_count - 1 == 0) {
KernelCore& kernel = m_kernel;
this->Destroy();
this->UnregisterWithKernel();
KAutoObject::UnregisterWithKernel(kernel, this);
}
}
private:
void RegisterWithKernel();
void UnregisterWithKernel();
static void UnregisterWithKernel(KernelCore& kernel, KAutoObject* self);
protected:
KernelCore& m_kernel;

View File

@ -510,11 +510,12 @@ void KScheduler::Unload(KThread* thread) {
void KScheduler::Reload(KThread* thread) {
auto& cpu_core = m_kernel.System().ArmInterface(m_core_id);
auto* process = thread->GetOwnerProcess();
cpu_core.LoadContext(thread->GetContext32());
cpu_core.LoadContext(thread->GetContext64());
cpu_core.SetTlsAddress(GetInteger(thread->GetTlsAddress()));
cpu_core.SetTPIDR_EL0(thread->GetTpidrEl0());
cpu_core.LoadWatchpointArray(thread->GetOwnerProcess()->GetWatchpoints());
cpu_core.LoadWatchpointArray(process ? &process->GetWatchpoints() : nullptr);
cpu_core.ClearExclusiveState();
}

View File

@ -129,7 +129,7 @@ Result KThread::Initialize(KThreadFunction func, uintptr_t arg, KProcessAddress
case ThreadType::User:
ASSERT(((owner == nullptr) ||
(owner->GetCoreMask() | (1ULL << virt_core)) == owner->GetCoreMask()));
ASSERT(((owner == nullptr) ||
ASSERT(((owner == nullptr) || (prio > Svc::LowestThreadPriority) ||
(owner->GetPriorityMask() | (1ULL << prio)) == owner->GetPriorityMask()));
break;
case ThreadType::Kernel:

View File

@ -73,7 +73,7 @@ struct Memory::Impl {
return {};
}
return system.DeviceMemory().GetPointer<u8>(paddr) + vaddr;
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
}
[[nodiscard]] u8* GetPointerFromDebugMemory(u64 vaddr) const {
@ -84,7 +84,7 @@ struct Memory::Impl {
return {};
}
return system.DeviceMemory().GetPointer<u8>(paddr) + vaddr;
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
}
u8 Read8(const Common::ProcessAddress addr) {
@ -204,7 +204,8 @@ struct Memory::Impl {
break;
}
case Common::PageType::Memory: {
u8* mem_ptr = pointer + page_offset + (page_index << YUZU_PAGEBITS);
u8* mem_ptr =
reinterpret_cast<u8*>(pointer + page_offset + (page_index << YUZU_PAGEBITS));
on_memory(copy_amount, mem_ptr);
break;
}
@ -448,7 +449,7 @@ struct Memory::Impl {
break;
case Common::PageType::Memory:
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
nullptr, Common::PageType::DebugMemory);
0, Common::PageType::DebugMemory);
break;
default:
UNREACHABLE();
@ -466,7 +467,8 @@ struct Memory::Impl {
case Common::PageType::DebugMemory: {
u8* const pointer{GetPointerFromDebugMemory(vaddr & ~YUZU_PAGEMASK)};
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
pointer - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory);
reinterpret_cast<uintptr_t>(pointer) - (vaddr & ~YUZU_PAGEMASK),
Common::PageType::Memory);
break;
}
default:
@ -506,7 +508,7 @@ struct Memory::Impl {
case Common::PageType::DebugMemory:
case Common::PageType::Memory:
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
nullptr, Common::PageType::RasterizerCachedMemory);
0, Common::PageType::RasterizerCachedMemory);
break;
case Common::PageType::RasterizerCachedMemory:
// There can be more than one GPU region mapped per CPU region, so it's common
@ -534,10 +536,11 @@ struct Memory::Impl {
// pagetable after unmapping a VMA. In that case the underlying VMA will no
// longer exist, and we should just leave the pagetable entry blank.
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
nullptr, Common::PageType::Unmapped);
0, Common::PageType::Unmapped);
} else {
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
pointer - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory);
reinterpret_cast<uintptr_t>(pointer) - (vaddr & ~YUZU_PAGEMASK),
Common::PageType::Memory);
}
break;
}
@ -584,7 +587,7 @@ struct Memory::Impl {
"Mapping memory page without a pointer @ {:016x}", base * YUZU_PAGESIZE);
while (base != end) {
page_table.pointers[base].Store(nullptr, type);
page_table.pointers[base].Store(0, type);
page_table.backing_addr[base] = 0;
page_table.blocks[base] = 0;
base += 1;
@ -593,7 +596,8 @@ struct Memory::Impl {
auto orig_base = base;
while (base != end) {
auto host_ptr =
system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS);
reinterpret_cast<uintptr_t>(system.DeviceMemory().GetPointer<u8>(target)) -
(base << YUZU_PAGEBITS);
auto backing = GetInteger(target) - (base << YUZU_PAGEBITS);
page_table.pointers[base].Store(host_ptr, type);
page_table.backing_addr[base] = backing;
@ -619,8 +623,8 @@ struct Memory::Impl {
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return &pointer[vaddr];
if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return reinterpret_cast<u8*>(pointer + vaddr);
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
@ -814,7 +818,7 @@ bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const {
return false;
}
const auto [pointer, type] = page_table.pointers[page].PointerType();
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory ||
return pointer != 0 || type == Common::PageType::RasterizerCachedMemory ||
type == Common::PageType::DebugMemory;
}

View File

@ -103,7 +103,9 @@ public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_)
: rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
// Use reinterpret_cast instead of static_cast as workaround for
// UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {
(void)slot_async_jobs.insert(); // Null value
}

View File

@ -13,7 +13,8 @@ namespace VideoCore {
using namespace Core::Memory;
RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {}
RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_)
: cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {}
RasterizerAccelerated::~RasterizerAccelerated() = default;
@ -26,7 +27,7 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del
std::atomic_thread_fence(std::memory_order_acquire);
const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page);
std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
if (delta > 0) {
ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");

View File

@ -41,7 +41,8 @@ private:
};
static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
std::array<CacheEntry, 0x2000000> cached_pages;
using CachedPages = std::array<CacheEntry, 0x2000000>;
std::unique_ptr<CachedPages> cached_pages;
Core::Memory::Memory& cpu_memory;
};