// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include #include #include #include "common/alignment.h" #include "common/color.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/texture/etc1.h" #include "video_core/utils.h" namespace VideoCore { template inline T MakeInt(const u8* bytes) { T integer{}; std::memcpy(&integer, bytes, sizeof(T)); return integer; } template constexpr void DecodePixel(const u8* source, u8* dest) { using namespace Common::Color; constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; if constexpr (format == PixelFormat::RGBA8 && converted) { const auto abgr = DecodeRGBA8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::RGB8 && converted) { const auto abgr = DecodeRGB8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::RGB565 && converted) { const auto abgr = DecodeRGB565(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::RGB5A1 && converted) { const auto abgr = DecodeRGB5A1(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::RGBA4 && converted) { const auto abgr = DecodeRGBA4(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::IA8) { const auto abgr = DecodeIA8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::RG8) { const auto abgr = DecodeRG8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::I8) { const auto abgr = DecodeI8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::A8) { const auto abgr = DecodeA8(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::IA4) { const auto abgr = DecodeIA4(source); std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::D24 && converted) { const auto d32 = DecodeD24(source) / 16777215.f; std::memcpy(dest, &d32, sizeof(d32)); } else if constexpr (format == PixelFormat::D24S8) { const u32 d24s8 = std::rotl(MakeInt(source), 8); std::memcpy(dest, &d24s8, sizeof(u32)); } else { std::memcpy(dest, source, bytes_per_pixel); } } template constexpr void DecodePixel4(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) { const u32 morton_offset = VideoCore::MortonInterleave(x, y); const u8 value = source_tile[morton_offset >> 1]; const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF)); if constexpr (format == PixelFormat::I4) { std::memset(dest_pixel, pixel, 3); dest_pixel[3] = 255; } else { std::memset(dest_pixel, 0, 3); dest_pixel[3] = pixel; } } template constexpr void DecodePixelETC1(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) { constexpr u32 subtile_width = 4; constexpr u32 subtile_height = 4; constexpr bool has_alpha = format == PixelFormat::ETC1A4; constexpr std::size_t subtile_size = has_alpha ? 16 : 8; const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height); x %= subtile_width; y %= subtile_height; const u8* subtile_ptr = source_tile + subtile_index * subtile_size; u8 alpha = 255; if constexpr (has_alpha) { u64_le packed_alpha; std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64)); subtile_ptr += sizeof(u64); alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF); } const u64_le subtile_data = MakeInt(subtile_ptr); const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y); // Copy the uncompressed pixel to the destination std::memcpy(dest_pixel, rgb.AsArray(), 3); dest_pixel[3] = alpha; } template constexpr void EncodePixel(const u8* source, u8* dest) { using namespace Common::Color; constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; if constexpr (format == PixelFormat::RGBA8 && converted) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRGBA8(rgba, dest); } else if constexpr (format == PixelFormat::RGB8 && converted) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRGB8(rgba, dest); } else if constexpr (format == PixelFormat::RGB565 && converted) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRGB565(rgba, dest); } else if constexpr (format == PixelFormat::RGB5A1 && converted) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRGB5A1(rgba, dest); } else if constexpr (format == PixelFormat::RGBA4 && converted) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRGBA4(rgba, dest); } else if constexpr (format == PixelFormat::IA8) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeIA8(rgba, dest); } else if constexpr (format == PixelFormat::RG8) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeRG8(rgba, dest); } else if constexpr (format == PixelFormat::I8) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeI8(rgba, dest); } else if constexpr (format == PixelFormat::A8) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeA8(rgba, dest); } else if constexpr (format == PixelFormat::IA4) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source, 4); EncodeIA4(rgba, dest); } else if constexpr (format == PixelFormat::D24 && converted) { float d32; std::memcpy(&d32, source, sizeof(d32)); EncodeD24(d32 * 0xFFFFFF, dest); } else if constexpr (format == PixelFormat::D24S8) { const u32 s8d24 = std::rotr(MakeInt(source), 8); std::memcpy(dest, &s8d24, sizeof(u32)); } else { std::memcpy(dest, source, bytes_per_pixel); } } template constexpr void EncodePixel4(u32 x, u32 y, const u8* source_pixel, u8* dest_tile_buffer) { Common::Vec4 rgba; std::memcpy(rgba.AsArray(), source_pixel, 4); u8 pixel; if constexpr (format == PixelFormat::I4) { pixel = Common::Color::AverageRgbComponents(rgba); } else { pixel = rgba.a(); } const u32 morton_offset = VideoCore::MortonInterleave(x, y); const u32 byte_offset = morton_offset >> 1; const u8 current_values = dest_tile_buffer[byte_offset]; const u8 new_value = Common::Color::Convert8To4(pixel); if (morton_offset % 2) { dest_tile_buffer[byte_offset] = (new_value << 4) | (current_values & 0x0F); } else { dest_tile_buffer[byte_offset] = (current_values & 0xF0) | new_value; } } template constexpr void MortonCopyTile(u32 stride, std::span tile_buffer, std::span linear_buffer) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetFormatBytesPerPixel(format); constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4; constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4; for (u32 y = 0; y < 8; y++) { for (u32 x = 0; x < 8; x++) { const auto tiled_pixel = tile_buffer.subspan( VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel); const auto linear_pixel = linear_buffer.subspan( ((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel); if constexpr (morton_to_linear) { if constexpr (is_compressed) { DecodePixelETC1(x, y, tile_buffer.data(), linear_pixel.data()); } else if constexpr (is_4bit) { DecodePixel4(x, y, tile_buffer.data(), linear_pixel.data()); } else { DecodePixel(tiled_pixel.data(), linear_pixel.data()); } } else { if constexpr (is_4bit) { EncodePixel4(x, y, linear_pixel.data(), tile_buffer.data()); } else { EncodePixel(linear_pixel.data(), tiled_pixel.data()); } } } } } /** * @brief Performs morton to/from linear convertions on the provided pixel data * @param converted If true performs RGBA8 to/from convertion to all color formats * @param width, height The dimentions of the rectangular region of pixels in linear_buffer * @param start_offset The number of bytes from the start of the first tile to the start of * tiled_buffer * @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer * @param linear_buffer The linear pixel data * @param tiled_buffer The tiled pixel data * * The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for * converting between linear and morton tiled layouts. The function handles both convertions but * there are slightly different paths and inputs for each: * * Morton to Linear: * During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the * linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire * texture area, but rather the specific rectangle affected by the upload. * * Linear to Morton: * This is similar to the other convertion but with some differences. In this case tiled_buffer is * not required to be aligned to any specific boundary which requires special care. * start_offset/end_offset are useful here as they tell us exactly where the data should be placed * in the linear_buffer. */ template static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, std::span linear_buffer, std::span tiled_buffer) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetFormatBytesPerPixel(format); constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel; const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size); const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size); const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size); ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset)); // In OpenGL the texture origin is in the bottom left corner as opposed to other // APIs that have it at the top left. To avoid flipping texture coordinates in // the shader we read/write the linear buffer from the bottom up u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel; u32 tiled_offset = 0; u32 x = 0; u32 y = 0; const auto LinearNextTile = [&] { x = (x + 8) % width; linear_offset += 8 * aligned_bytes_per_pixel; if (!x) { y = (y + 8) % height; if (!y) { return; } linear_offset -= width * 9 * aligned_bytes_per_pixel; } }; // If during a texture download the start coordinate is not tile aligned, swizzle // the tile affected to a temporary buffer and copy the part we are interested in if (start_offset < aligned_start_offset && !morton_to_linear) { std::array tmp_buf; auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); MortonCopyTile(width, tmp_buf, linear_data); std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, std::min(aligned_start_offset, end_offset) - start_offset); tiled_offset += aligned_start_offset - start_offset; LinearNextTile(); } // If the copy spans multiple tiles, copy the fully aligned tiles in between. if (aligned_start_offset < aligned_end_offset) { const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset; while (tiled_offset < buffer_end) { auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); MortonCopyTile(width, tiled_data, linear_data); tiled_offset += tile_size; LinearNextTile(); } } // If during a texture download the end coordinate is not tile aligned, swizzle // the tile affected to a temporary buffer and copy the part we are interested in if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) { std::array tmp_buf; auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); MortonCopyTile(width, tmp_buf, linear_data); std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset); } } /** * Performs a linear copy, converting pixel formats if required. * @tparam decode If true, decodes the texture if needed. Otherwise, encodes if needed. * @tparam format Pixel format to copy. * @tparam converted If true, converts the texture to/from the appropriate format. * @param src_buffer The source pixel data * @param dst_buffer The destination pixel data * @return */ template static constexpr void LinearCopy(std::span src_buffer, std::span dst_buffer) { const std::size_t src_size = src_buffer.size(); const std::size_t dst_size = dst_buffer.size(); if constexpr (converted) { constexpr u32 encoded_bytes_per_pixel = GetFormatBpp(format) / 8; constexpr u32 decoded_bytes_per_pixel = 4; constexpr u32 src_bytes_per_pixel = decode ? encoded_bytes_per_pixel : decoded_bytes_per_pixel; constexpr u32 dst_bytes_per_pixel = decode ? decoded_bytes_per_pixel : encoded_bytes_per_pixel; for (std::size_t src_index = 0, dst_index = 0; src_index < src_size && dst_index < dst_size; src_index += src_bytes_per_pixel, dst_index += dst_bytes_per_pixel) { const auto src_pixel = src_buffer.subspan(src_index, src_bytes_per_pixel); const auto dst_pixel = dst_buffer.subspan(dst_index, dst_bytes_per_pixel); if constexpr (decode) { DecodePixel(src_pixel.data(), dst_pixel.data()); } else { EncodePixel(src_pixel.data(), dst_pixel.data()); } } } else { std::memcpy(dst_buffer.data(), src_buffer.data(), std::min(src_size, dst_size)); } } using MortonFunc = void (*)(u32, u32, u32, u32, std::span, std::span); static constexpr std::array UNSWIZZLE_TABLE = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 MortonCopy, // 3 MortonCopy, // 4 MortonCopy, // 5 MortonCopy, // 6 MortonCopy, // 7 MortonCopy, // 8 MortonCopy, // 9 MortonCopy, // 10 MortonCopy, // 11 MortonCopy, // 12 MortonCopy, // 13 MortonCopy, // 14 nullptr, // 15 MortonCopy, // 16 MortonCopy, // 17 }; static constexpr std::array UNSWIZZLE_TABLE_CONVERTED = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 MortonCopy, // 3 MortonCopy, // 4 // The following formats are implicitly converted to RGBA regardless, so ignore them. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 MortonCopy, // 14 nullptr, // 15 MortonCopy, // 16 // No conversion here as we need to do a special deinterleaving conversion elsewhere. nullptr, // 17 }; static constexpr std::array SWIZZLE_TABLE = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 MortonCopy, // 3 MortonCopy, // 4 MortonCopy, // 5 MortonCopy, // 6 MortonCopy, // 7 MortonCopy, // 8 MortonCopy, // 9 MortonCopy, // 10 MortonCopy, // 11 nullptr, // 12 nullptr, // 13 MortonCopy, // 14 nullptr, // 15 MortonCopy, // 16 MortonCopy, // 17 }; static constexpr std::array SWIZZLE_TABLE_CONVERTED = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 MortonCopy, // 3 MortonCopy, // 4 // The following formats are implicitly converted from RGBA regardless, so ignore them. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 MortonCopy, // 14 nullptr, // 15 MortonCopy, // 16 // No conversion here as we need to do a special interleaving conversion elsewhere. nullptr, // 17 }; using LinearFunc = void (*)(std::span, std::span); static constexpr std::array LINEAR_DECODE_TABLE = { LinearCopy, // 0 LinearCopy, // 1 LinearCopy, // 2 LinearCopy, // 3 LinearCopy, // 4 // These formats cannot be used linearly and can be ignored. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 LinearCopy, // 14 nullptr, // 15 LinearCopy, // 16 LinearCopy, // 17 }; static constexpr std::array LINEAR_DECODE_TABLE_CONVERTED = { LinearCopy, // 0 LinearCopy, // 1 LinearCopy, // 2 LinearCopy, // 3 LinearCopy, // 4 // These formats cannot be used linearly and can be ignored. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 LinearCopy, // 14 nullptr, // 15 LinearCopy, // 16 // No conversion here as we need to do a special deinterleaving conversion elsewhere. nullptr, // 17 }; static constexpr std::array LINEAR_ENCODE_TABLE = { LinearCopy, // 0 LinearCopy, // 1 LinearCopy, // 2 LinearCopy, // 3 LinearCopy, // 4 // These formats cannot be used linearly and can be ignored. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 LinearCopy, // 14 nullptr, // 15 LinearCopy, // 16 LinearCopy, // 17 }; static constexpr std::array LINEAR_ENCODE_TABLE_CONVERTED = { LinearCopy, // 0 LinearCopy, // 1 LinearCopy, // 2 LinearCopy, // 3 LinearCopy, // 4 // These formats cannot be used linearly and can be ignored. nullptr, // 5 nullptr, // 6 nullptr, // 7 nullptr, // 8 nullptr, // 9 nullptr, // 10 nullptr, // 11 nullptr, // 12 nullptr, // 13 LinearCopy, // 14 nullptr, // 15 LinearCopy, // 16 // No conversion here as we need to do a special interleaving conversion elsewhere. nullptr, // 17 }; } // namespace VideoCore