Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios.

This commit adds a series of HLE methods for handling 3D textures in
general. This helps games that generate 3D textures on every frame and
may reduce loading times for certain games.
This commit is contained in:
Fernando Sahmkow 2019-12-08 13:13:18 -04:00 committed by FernandoS27
parent aea978e037
commit 51c9e98677
4 changed files with 143 additions and 1 deletions

View File

@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const {
}
}
u32 SurfaceParams::GetBlockSize() const {
const u32 x = 64U << block_width;
const u32 y = 8U << block_height;
const u32 z = 1U << block_depth;
return x * y * z;
}
std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
const u32 x_pixels = 64U / GetBytesPerPixel();
const u32 x = x_pixels << block_width;
const u32 y = 8U << block_height;
return {x, y};
}
std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 block_size = GetBlockSize();
const u32 block_index = offset / block_size;
const u32 gob_offset = offset % block_size;
const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
const u32 x_block_pixels = x_gob_pixels << block_width;
const u32 y_block_pixels = 8U << block_height;
const u32 z_block_pixels = 1U << block_depth;
const u32 x_blocks = div_ceil(width, x_block_pixels);
const u32 y_blocks = div_ceil(height, y_block_pixels);
const u32 z_blocks = div_ceil(depth, z_block_pixels);
const u32 base_x = block_index % x_blocks;
const u32 base_y = (block_index / x_blocks) % y_blocks;
const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
u32 x = base_x * x_block_pixels;
u32 y = base_y * y_block_pixels;
u32 z = base_z * z_block_pixels;
z += gob_index >> block_height;
y += (gob_index * 8U) % y_block_pixels;
return {x, y, z};
}
} // namespace VideoCommon

View File

@ -4,6 +4,8 @@
#pragma once
#include <utility>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/cityhash.h"
@ -136,6 +138,15 @@ public:
std::size_t GetConvertedMipmapSize(u32 level) const;
// Get this texture Tegra Block size in guest memory layout
u32 GetBlockSize() const;
// Get X, Y sizes of a block
std::pair<u32, u32> GetBlockXY() const;
// Get the offset in x, y, z coordinates from a memory offset
std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
/// Returns the size of a layer in bytes in guest memory.
std::size_t GetGuestLayerSize() const {
return GetLayerSize(false, false);
@ -269,7 +280,8 @@ private:
/// Returns the size of all mipmap levels and aligns as needed.
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
return GetLayerSize(as_host_size, uncompressed) *
(layer_only ? 1U : (is_layered ? depth : 1U));
}
/// Returns the size of a layer

View File

@ -615,6 +615,85 @@ private:
return {{new_surface, new_surface->GetMainView()}};
}
/**
* Takes care of managing 3D textures and its slices. Does some HLE methods when possible.
* Fallsback to LLE when it isn't possible.
*
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
* @param cache_addr The starting address of the new surface on physical memory.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const CacheAddr cache_addr,
bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) {
bool failed = false;
if (params.num_levels > 1) {
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
return std::nullopt;
}
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
bool modified = false;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.target != SurfaceTarget::Texture2D) {
failed = true;
break;
}
if (src_params.height != params.height) {
failed = true;
break;
}
if (src_params.block_depth != params.block_depth ||
src_params.block_height != params.block_height) {
failed = true;
break;
}
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
1);
ImageCopy(surface, new_surface, copy_params);
}
if (failed) {
return std::nullopt;
}
for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
return {{new_surface, new_surface->GetMainView()}};
} else {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
if (Settings::values.use_accurate_gpu_emulation) {
return std::nullopt;
}
Unregister(surface);
return InitializeSurface(gpu_addr, params, preserve_contents);
}
return std::nullopt;
}
if (surface->GetCacheAddr() != cache_addr) {
continue;
}
const auto struct_result = surface->MatchesStructure(params);
if (struct_result == MatchStructureResult::FullMatch) {
return {{surface, surface->GetMainView()}};
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
}
/**
* Gets the starting address and parameters of a candidate surface and tries
* to find a matching surface within the cache. This is done in 3 big steps:
@ -687,6 +766,15 @@ private:
}
}
// Look if it's a 3D texture
if (params.block_depth > 0) {
std::optional<std::pair<TSurface, TView>> surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
if (surface) {
return *surface;
}
}
// Split cases between 1 overlap or many.
if (overlaps.size() == 1) {
TSurface current_surface = overlaps[0];

View File

@ -12,6 +12,10 @@ namespace Tegra::Texture {
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
// an small rect of (64/bytes_per_pixel)X8.
inline std::size_t GetGOBSize() {
return 512;
}
inline std::size_t GetGOBSizeShift() {
return 9;
}