From ade45b5b9930b52b6a1d399306539073e8e2196d Mon Sep 17 00:00:00 2001 From: wwylele Date: Mon, 17 Apr 2017 10:01:45 +0300 Subject: [PATCH 1/2] pica/swrasterizer: implement procedural texture --- src/common/vector_math.h | 10 + src/video_core/CMakeLists.txt | 2 + src/video_core/command_processor.cpp | 31 +++ src/video_core/pica_state.h | 54 +++++ src/video_core/regs.h | 7 + src/video_core/regs_texturing.h | 96 ++++++++- src/video_core/swrasterizer/proctex.cpp | 223 +++++++++++++++++++++ src/video_core/swrasterizer/proctex.h | 16 ++ src/video_core/swrasterizer/rasterizer.cpp | 13 +- 9 files changed, 448 insertions(+), 4 deletions(-) create mode 100644 src/video_core/swrasterizer/proctex.cpp create mode 100644 src/video_core/swrasterizer/proctex.h diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 7ca8e15f56..c7a461a1ef 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h @@ -652,6 +652,16 @@ static inline decltype((X{} * int{} + X{} * int{}) / base) LerpInt(const X& begi return (begin * (base - t) + end * t) / base; } +// bilinear interpolation. s is for interpolating x00-x01 and x10-x11, and t is for the second +// interpolation. +template +inline auto BilinearInterp(const X& x00, const X& x01, const X& x10, const X& x11, const float s, + const float t) { + auto y0 = Lerp(x00, x01, s); + auto y1 = Lerp(x10, x11, s); + return Lerp(y0, y1, t); +} + // Utility vector factories template static inline Vec2 MakeVec(const T& x, const T& y) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5317719e8e..e00b88f718 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRCS shader/shader_interpreter.cpp swrasterizer/clipper.cpp swrasterizer/framebuffer.cpp + swrasterizer/proctex.cpp swrasterizer/rasterizer.cpp swrasterizer/swrasterizer.cpp swrasterizer/texturing.cpp @@ -54,6 +55,7 @@ set(HEADERS shader/shader_interpreter.h swrasterizer/clipper.h swrasterizer/framebuffer.h + swrasterizer/proctex.h swrasterizer/rasterizer.h swrasterizer/swrasterizer.h swrasterizer/texturing.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2e32ff9053..49a93e9806 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -458,6 +458,37 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7): { + auto& index = regs.texturing.proctex_lut_config.index; + auto& pt = g_state.proctex; + + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + pt.noise_table[index % pt.noise_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + pt.color_map_table[index % pt.color_map_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + pt.alpha_map_table[index % pt.alpha_map_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::Color: + pt.color_table[index % pt.color_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + pt.color_diff_table[index % pt.color_diff_table.size()].raw = value; + break; + } + index.Assign(index + 1); + break; + } default: break; } diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index af7536d11e..f46db09fba 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -7,6 +7,7 @@ #include #include "common/bit_field.h" #include "common/common_types.h" +#include "common/vector_math.h" #include "video_core/primitive_assembly.h" #include "video_core/regs.h" #include "video_core/shader/shader.h" @@ -25,6 +26,59 @@ struct State { Shader::AttributeBuffer input_default_attributes; + struct ProcTex { + union ValueEntry { + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField<0, 12, u32> value; // 0.0.12 fixed point + + // Difference between two entry values. Used for efficient interpolation. + // 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5). + // Note: the type of this is different from the one of lighting LUT + BitField<12, 12, s32> difference; + + float ToFloat() const { + return static_cast(value) / 4095.f; + } + + float DiffToFloat() const { + return static_cast(difference) / 4095.f; + } + }; + + union ColorEntry { + u32 raw; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; + BitField<16, 8, u32> b; + BitField<24, 8, u32> a; + + Math::Vec4 ToVector() const { + return {static_cast(r), static_cast(g), static_cast(b), + static_cast(a)}; + } + }; + + union ColorDifferenceEntry { + u32 raw; + BitField<0, 8, s32> r; // half of the difference between two ColorEntry + BitField<8, 8, s32> g; + BitField<16, 8, s32> b; + BitField<24, 8, s32> a; + + Math::Vec4 ToVector() const { + return Math::Vec4{r, g, b, a} * 2; + } + }; + + std::array noise_table; + std::array color_map_table; + std::array alpha_map_table; + std::array color_table; + std::array color_diff_table; + } proctex; + struct { union LutEntry { // Used for raw access diff --git a/src/video_core/regs.h b/src/video_core/regs.h index 1776dad89b..6d5f98cac3 100644 --- a/src/video_core/regs.h +++ b/src/video_core/regs.h @@ -101,6 +101,13 @@ ASSERT_REG_POSITION(texturing.texture1, 0x91); ASSERT_REG_POSITION(texturing.texture1_format, 0x96); ASSERT_REG_POSITION(texturing.texture2, 0x99); ASSERT_REG_POSITION(texturing.texture2_format, 0x9e); +ASSERT_REG_POSITION(texturing.proctex, 0xa8); +ASSERT_REG_POSITION(texturing.proctex_noise_u, 0xa9); +ASSERT_REG_POSITION(texturing.proctex_noise_v, 0xaa); +ASSERT_REG_POSITION(texturing.proctex_noise_frequency, 0xab); +ASSERT_REG_POSITION(texturing.proctex_lut, 0xac); +ASSERT_REG_POSITION(texturing.proctex_lut_offset, 0xad); +ASSERT_REG_POSITION(texturing.proctex_lut_config, 0xaf); ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0); ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8); ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0); diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 8a7c6efe4e..20f9495ed3 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -122,8 +122,8 @@ struct TexturingRegs { BitField<0, 1, u32> texture0_enable; BitField<1, 1, u32> texture1_enable; BitField<2, 1, u32> texture2_enable; - BitField<8, 2, u32> texture3_coordinates; // TODO: unimplemented - BitField<10, 1, u32> texture3_enable; // TODO: unimplemented + BitField<8, 2, u32> texture3_coordinates; + BitField<10, 1, u32> texture3_enable; BitField<13, 1, u32> texture2_use_coord1; BitField<16, 1, u32> clear_texture_cache; // TODO: unimplemented } main_config; @@ -137,7 +137,7 @@ struct TexturingRegs { INSERT_PADDING_WORDS(0x2); TextureConfig texture2; BitField<0, 4, TextureFormat> texture2_format; - INSERT_PADDING_WORDS(0x21); + INSERT_PADDING_WORDS(0x9); struct FullTextureConfig { const bool enabled; @@ -152,6 +152,96 @@ struct TexturingRegs { }}; } + // 0xa8-0xad: ProcTex Config + enum class ProcTexClamp : u32 { + ToZero = 0, + ToEdge = 1, + SymmetricalRepeat = 2, + MirroredRepeat = 3, + Pulse = 4, + }; + + enum class ProcTexCombiner : u32 { + U = 0, // u + U2 = 1, // u * u + V = 2, // v + V2 = 3, // v * v + Add = 4, // (u + v) / 2 + Add2 = 5, // (u * u + v * v) / 2 + SqrtAdd2 = 6, // sqrt(u * u + v * v) + Min = 7, // min(u, v) + Max = 8, // max(u, v) + RMax = 9, // Average of Max and SqrtAdd2 + }; + + enum class ProcTexShift : u32 { + None = 0, + Odd = 1, + Even = 2, + }; + + union { + BitField<0, 3, ProcTexClamp> u_clamp; + BitField<3, 3, ProcTexClamp> v_clamp; + BitField<6, 4, ProcTexCombiner> color_combiner; + BitField<10, 4, ProcTexCombiner> alpha_combiner; + BitField<14, 1, u32> separate_alpha; + BitField<15, 1, u32> noise_enable; + BitField<16, 2, ProcTexShift> u_shift; + BitField<18, 2, ProcTexShift> v_shift; + BitField<20, 8, u32> bias_low; // float16 TODO: unimplemented + } proctex; + + union ProcTexNoiseConfig { + BitField<0, 16, s32> amplitude; // fixed1.3.12 + BitField<16, 16, u32> phase; // float16 + }; + + ProcTexNoiseConfig proctex_noise_u; + ProcTexNoiseConfig proctex_noise_v; + + union { + BitField<0, 16, u32> u; // float16 + BitField<16, 16, u32> v; // float16 + } proctex_noise_frequency; + + enum class ProcTexFilter : u32 { + Nearest = 0, + Linear = 1, + NearestMipmapNearest = 2, + LinearMipmapNearest = 3, + NearestMipmapLinear = 4, + LinearMipmapLinear = 5, + }; + + union { + BitField<0, 3, ProcTexFilter> filter; + BitField<11, 8, u32> width; + BitField<19, 8, u32> bias_high; // TODO: unimplemented + } proctex_lut; + + BitField<0, 8, u32> proctex_lut_offset; + + INSERT_PADDING_WORDS(0x1); + + // 0xaf-0xb7: ProcTex LUT + enum class ProcTexLutTable : u32 { + Noise = 0, + ColorMap = 2, + AlphaMap = 3, + Color = 4, + ColorDiff = 5, + }; + + union { + BitField<0, 8, u32> index; + BitField<8, 4, ProcTexLutTable> ref_table; + } proctex_lut_config; + + u32 proctex_lut_data[8]; + + INSERT_PADDING_WORDS(0x8); + // 0xc0-0xff: Texture Combiner (akin to glTexEnv) struct TevStageConfig { enum class Source : u32 { diff --git a/src/video_core/swrasterizer/proctex.cpp b/src/video_core/swrasterizer/proctex.cpp new file mode 100644 index 0000000000..b69892778b --- /dev/null +++ b/src/video_core/swrasterizer/proctex.cpp @@ -0,0 +1,223 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/math_util.h" +#include "video_core/swrasterizer/proctex.h" + +namespace Pica { +namespace Rasterizer { + +using ProcTexClamp = TexturingRegs::ProcTexClamp; +using ProcTexShift = TexturingRegs::ProcTexShift; +using ProcTexCombiner = TexturingRegs::ProcTexCombiner; +using ProcTexFilter = TexturingRegs::ProcTexFilter; + +static float LookupLUT(const std::array& lut, float coord) { + // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and + // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using + // value entries and difference entries. + coord *= 128; + const int index_int = std::min(static_cast(coord), 127); + const float frac = coord - index_int; + return lut[index_int].ToFloat() + frac * lut[index_int].DiffToFloat(); +} + +// These function are used to generate random noise for procedural texture. Their results are +// verified against real hardware, but it's not known if the algorithm is the same as hardware. +static unsigned int NoiseRand1D(unsigned int v) { + static constexpr std::array table{ + {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}}; + return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; +} + +static float NoiseRand2D(unsigned int x, unsigned int y) { + static constexpr std::array table{ + {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}}; + unsigned int u2 = NoiseRand1D(x); + unsigned int v2 = NoiseRand1D(y); + v2 += ((u2 & 3) == 1) ? 4 : 0; + v2 ^= (u2 & 1) * 6; + v2 += 10 + u2; + v2 &= 0xF; + v2 ^= table[u2]; + return -1.0f + v2 * 2.0f / 15.0f; +} + +static float NoiseCoef(float u, float v, TexturingRegs regs, State::ProcTex state) { + const float freq_u = float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); + const float freq_v = float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); + const float phase_u = float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); + const float phase_v = float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(); + const float x = 9 * freq_u * std::abs(u + phase_u); + const float y = 9 * freq_v * std::abs(v + phase_v); + const int x_int = static_cast(x); + const int y_int = static_cast(y); + const float x_frac = x - x_int; + const float y_frac = y - y_int; + + const float g0 = NoiseRand2D(x_int, y_int) * (x_frac + y_frac); + const float g1 = NoiseRand2D(x_int + 1, y_int) * (x_frac + y_frac - 1); + const float g2 = NoiseRand2D(x_int, y_int + 1) * (x_frac + y_frac - 1); + const float g3 = NoiseRand2D(x_int + 1, y_int + 1) * (x_frac + y_frac - 2); + const float x_noise = LookupLUT(state.noise_table, x_frac); + const float y_noise = LookupLUT(state.noise_table, y_frac); + return Math::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise); +} + +static float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) { + const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f; + switch (mode) { + case ProcTexShift::None: + return 0; + case ProcTexShift::Odd: + return offset * (((int)v / 2) % 2); + case ProcTexShift::Even: + return offset * ((((int)v + 1) / 2) % 2); + default: + LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast(mode)); + return 0; + } +}; + +static void ClampCoord(float& coord, ProcTexClamp mode) { + switch (mode) { + case ProcTexClamp::ToZero: + if (coord > 1.0f) + coord = 0.0f; + break; + case ProcTexClamp::ToEdge: + coord = std::min(coord, 1.0f); + break; + case ProcTexClamp::SymmetricalRepeat: + coord = coord - std::floor(coord); + break; + case ProcTexClamp::MirroredRepeat: { + int integer = static_cast(coord); + float frac = coord - integer; + coord = (integer % 2) == 0 ? frac : (1.0f - frac); + break; + } + case ProcTexClamp::Pulse: + if (coord <= 0.5f) + coord = 0.0f; + else + coord = 1.0f; + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast(mode)); + coord = std::min(coord, 1.0f); + break; + } +} + +float CombineAndMap(float u, float v, ProcTexCombiner combiner, + const std::array& map_table) { + float f; + switch (combiner) { + case ProcTexCombiner::U: + f = u; + break; + case ProcTexCombiner::U2: + f = u * u; + break; + case TexturingRegs::ProcTexCombiner::V: + f = v; + break; + case TexturingRegs::ProcTexCombiner::V2: + f = v * v; + break; + case TexturingRegs::ProcTexCombiner::Add: + f = (u + v) * 0.5f; + break; + case TexturingRegs::ProcTexCombiner::Add2: + f = (u * u + v * v) * 0.5f; + break; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + f = std::min(std::sqrt(u * u + v * v), 1.0f); + break; + case TexturingRegs::ProcTexCombiner::Min: + f = std::min(u, v); + break; + case TexturingRegs::ProcTexCombiner::Max: + f = std::max(u, v); + break; + case TexturingRegs::ProcTexCombiner::RMax: + f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast(combiner)); + f = 0.0f; + break; + } + return LookupLUT(map_table, f); +} + +Math::Vec4 ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state) { + u = std::abs(u); + v = std::abs(v); + + // Get shift offset before noise generation + const float u_shift = GetShiftOffset(v, regs.proctex.u_shift, regs.proctex.u_clamp); + const float v_shift = GetShiftOffset(u, regs.proctex.v_shift, regs.proctex.v_clamp); + + // Generate noise + if (regs.proctex.noise_enable) { + float noise = NoiseCoef(u, v, regs, state); + u += noise * regs.proctex_noise_u.amplitude / 4095.0f; + v += noise * regs.proctex_noise_v.amplitude / 4095.0f; + u = std::abs(u); + v = std::abs(v); + } + + // Shift + u += u_shift; + v += v_shift; + + // Clamp + ClampCoord(u, regs.proctex.u_clamp); + ClampCoord(v, regs.proctex.v_clamp); + + // Combine and map + const float lut_coord = CombineAndMap(u, v, regs.proctex.color_combiner, state.color_map_table); + + // Look up the color + // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] + const u32 offset = regs.proctex_lut_offset; + const u32 width = regs.proctex_lut.width; + const float index = offset + (lut_coord * (width - 1)); + Math::Vec4 final_color; + // TODO(wwylele): implement mipmap + switch (regs.proctex_lut.filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::LinearMipmapLinear: + case ProcTexFilter::LinearMipmapNearest: { + const int index_int = static_cast(index); + const float frac = index - index_int; + const auto color_value = state.color_table[index_int].ToVector().Cast(); + const auto color_diff = state.color_diff_table[index_int].ToVector().Cast(); + final_color = (color_value + frac * color_diff).Cast(); + break; + } + case ProcTexFilter::Nearest: + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::NearestMipmapNearest: + final_color = state.color_table[static_cast(std::round(index))].ToVector(); + break; + } + + if (regs.proctex.separate_alpha) { + // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It + // uses the output of CombineAndMap directly instead. + const float final_alpha = + CombineAndMap(u, v, regs.proctex.alpha_combiner, state.alpha_map_table); + return Math::MakeVec(final_color.rgb(), static_cast(final_alpha * 255)); + } else { + return final_color; + } +} + +} // namespace Rasterizer +} // namespace Pica diff --git a/src/video_core/swrasterizer/proctex.h b/src/video_core/swrasterizer/proctex.h new file mode 100644 index 0000000000..036e4620e5 --- /dev/null +++ b/src/video_core/swrasterizer/proctex.h @@ -0,0 +1,16 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "common/vector_math.h" +#include "video_core/pica_state.h" + +namespace Pica { +namespace Rasterizer { + +/// Generates procedural texture color for the given coordinates +Math::Vec4 ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state); + +} // namespace Rasterizer +} // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 20addf0bdb..e9edf03606 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -23,6 +23,7 @@ #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" #include "video_core/swrasterizer/framebuffer.h" +#include "video_core/swrasterizer/proctex.h" #include "video_core/swrasterizer/rasterizer.h" #include "video_core/swrasterizer/texturing.h" #include "video_core/texture/texture_decode.h" @@ -268,7 +269,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); - Math::Vec4 texture_color[3]{}; + Math::Vec4 texture_color[4]{}; for (int i = 0; i < 3; ++i) { const auto& texture = textures[i]; if (!texture.enabled) @@ -334,6 +335,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve } } + // sample procedural texture + if (regs.texturing.main_config.texture3_enable) { + const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates]; + texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(), + g_state.regs.texturing, g_state.proctex); + } + // Texture environment - consists of 6 stages of color and alpha combining. // // Color combiners take three input color values from some source (e.g. interpolated @@ -376,6 +384,9 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve case Source::Texture2: return texture_color[2]; + case Source::Texture3: + return texture_color[3]; + case Source::PreviousBuffer: return combiner_buffer; From 4d62e75fb2438fea3e9199db1641a7fe2848222a Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 5 May 2017 15:25:04 +0300 Subject: [PATCH 2/2] gl_rasterizer: implement procedural texture --- .../renderer_opengl/gl_rasterizer.cpp | 232 +++++++++++++++ .../renderer_opengl/gl_rasterizer.h | 35 ++- .../renderer_opengl/gl_shader_gen.cpp | 271 +++++++++++++++++- .../renderer_opengl/gl_shader_gen.h | 13 + src/video_core/renderer_opengl/gl_state.cpp | 36 +++ src/video_core/renderer_opengl/gl_state.h | 20 ++ 6 files changed, 600 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12ac9bbd97..aa9b831dd5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,6 +55,12 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { uniform_block_data.fog_lut_dirty = true; + uniform_block_data.proctex_noise_lut_dirty = true; + uniform_block_data.proctex_color_map_dirty = true; + uniform_block_data.proctex_alpha_map_dirty = true; + uniform_block_data.proctex_lut_dirty = true; + uniform_block_data.proctex_diff_lut_dirty = true; + // Set vertex attributes glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); @@ -115,6 +121,51 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Setup the noise LUT for proctex + proctex_noise_lut.Create(); + state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE10); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + // Setup the color map for proctex + proctex_color_map.Create(); + state.proctex_color_map.texture_1d = proctex_color_map.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE11); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + // Setup the alpha map for proctex + proctex_alpha_map.Create(); + state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE12); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + // Setup the LUT for proctex + proctex_lut.Create(); + state.proctex_lut.texture_1d = proctex_lut.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE13); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + // Setup the difference LUT for proctex + proctex_diff_lut.Create(); + state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE14); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Sync fixed function OpenGL state SyncCullMode(); SyncBlendEnabled(); @@ -272,6 +323,36 @@ void RasterizerOpenGL::DrawTriangles() { uniform_block_data.fog_lut_dirty = false; } + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty) { + SyncProcTexNoiseLUT(); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty) { + SyncProcTexColorMap(); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty) { + SyncProcTexAlphaMap(); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty) { + SyncProcTexLUT(); + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty) { + SyncProcTexDiffLUT(); + uniform_block_data.proctex_diff_lut_dirty = false; + } + // Sync the uniform data if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, @@ -354,6 +435,47 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { uniform_block_data.fog_lut_dirty = true; break; + // ProcTex state + case PICA_REG_INDEX(texturing.proctex): + case PICA_REG_INDEX(texturing.proctex_lut): + case PICA_REG_INDEX(texturing.proctex_lut_offset): + shader_dirty = true; + break; + + case PICA_REG_INDEX(texturing.proctex_noise_u): + case PICA_REG_INDEX(texturing.proctex_noise_v): + case PICA_REG_INDEX(texturing.proctex_noise_frequency): + SyncProcTexNoise(); + break; + + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6): + case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7): + using Pica::TexturingRegs; + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + uniform_block_data.proctex_noise_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + uniform_block_data.proctex_color_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + uniform_block_data.proctex_alpha_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::Color: + uniform_block_data.proctex_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + uniform_block_data.proctex_diff_lut_dirty = true; + break; + } + break; + // Alpha test case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): SyncAlphaTest(); @@ -1072,6 +1194,35 @@ void RasterizerOpenGL::SetShader() { glUniform1i(uniform_fog_lut, 9); } + GLuint uniform_proctex_noise_lut = + glGetUniformLocation(shader->shader.handle, "proctex_noise_lut"); + if (uniform_proctex_noise_lut != -1) { + glUniform1i(uniform_proctex_noise_lut, 10); + } + + GLuint uniform_proctex_color_map = + glGetUniformLocation(shader->shader.handle, "proctex_color_map"); + if (uniform_proctex_color_map != -1) { + glUniform1i(uniform_proctex_color_map, 11); + } + + GLuint uniform_proctex_alpha_map = + glGetUniformLocation(shader->shader.handle, "proctex_alpha_map"); + if (uniform_proctex_alpha_map != -1) { + glUniform1i(uniform_proctex_alpha_map, 12); + } + + GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); + if (uniform_proctex_lut != -1) { + glUniform1i(uniform_proctex_lut, 13); + } + + GLuint uniform_proctex_diff_lut = + glGetUniformLocation(shader->shader.handle, "proctex_diff_lut"); + if (uniform_proctex_diff_lut != -1) { + glUniform1i(uniform_proctex_diff_lut, 14); + } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); @@ -1105,6 +1256,7 @@ void RasterizerOpenGL::SetShader() { } SyncFogColor(); + SyncProcTexNoise(); } } } @@ -1204,6 +1356,86 @@ void RasterizerOpenGL::SyncFogLUT() { } } +void RasterizerOpenGL::SyncProcTexNoise() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_noise_f = { + Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), + }; + uniform_block_data.data.proctex_noise_a = { + regs.proctex_noise_u.amplitude / 4095.0f, regs.proctex_noise_v.amplitude / 4095.0f, + }; + uniform_block_data.data.proctex_noise_p = { + Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), + }; + + uniform_block_data.dirty = true; +} + +// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap +static void SyncProcTexValueLUT(const std::array& lut, + std::array& lut_data, GLenum texture) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data) { + lut_data = new_data; + glActiveTexture(texture); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RG, GL_FLOAT, lut_data.data()); + } +} + +void RasterizerOpenGL::SyncProcTexNoiseLUT() { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10); +} + +void RasterizerOpenGL::SyncProcTexColorMap() { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + GL_TEXTURE11); +} + +void RasterizerOpenGL::SyncProcTexAlphaMap() { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + GL_TEXTURE12); +} + +void RasterizerOpenGL::SyncProcTexLUT() { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data) { + proctex_lut_data = new_data; + glActiveTexture(GL_TEXTURE13); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data()); + } +} + +void RasterizerOpenGL::SyncProcTexDiffLUT() { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data) { + proctex_diff_lut_data = new_data; + glActiveTexture(GL_TEXTURE14); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data()); + } +} + void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 3e1770d775..a9ad7d6607 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -143,6 +143,9 @@ private: GLint scissor_x2; GLint scissor_y2; alignas(16) GLvec3 fog_color; + alignas(8) GLvec2 proctex_noise_f; + alignas(8) GLvec2 proctex_noise_a; + alignas(8) GLvec2 proctex_noise_p; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages @@ -150,7 +153,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x3C0, + sizeof(UniformData) == 0x3E0, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); @@ -180,6 +183,16 @@ private: void SyncFogColor(); void SyncFogLUT(); + /// Sync the procedural texture noise configuration to match the PICA register + void SyncProcTexNoise(); + + /// Sync the procedural texture lookup tables + void SyncProcTexNoiseLUT(); + void SyncProcTexColorMap(); + void SyncProcTexAlphaMap(); + void SyncProcTexLUT(); + void SyncProcTexDiffLUT(); + /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); @@ -248,6 +261,11 @@ private: UniformData data; bool lut_dirty[6]; bool fog_lut_dirty; + bool proctex_noise_lut_dirty; + bool proctex_color_map_dirty; + bool proctex_alpha_map_dirty; + bool proctex_lut_dirty; + bool proctex_diff_lut_dirty; bool dirty; } uniform_block_data = {}; @@ -262,4 +280,19 @@ private: OGLTexture fog_lut; std::array fog_lut_data{}; + + OGLTexture proctex_noise_lut; + std::array proctex_noise_lut_data{}; + + OGLTexture proctex_color_map; + std::array proctex_color_map_data{}; + + OGLTexture proctex_alpha_map; + std::array proctex_alpha_map_data{}; + + OGLTexture proctex_lut; + std::array proctex_lut_data{}; + + OGLTexture proctex_diff_lut; + std::array proctex_diff_lut_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7b44dade81..600119321d 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -114,6 +114,22 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; + state.proctex.enable = regs.texturing.main_config.texture3_enable; + if (state.proctex.enable) { + state.proctex.coord = regs.texturing.main_config.texture3_coordinates; + state.proctex.u_clamp = regs.texturing.proctex.u_clamp; + state.proctex.v_clamp = regs.texturing.proctex.v_clamp; + state.proctex.color_combiner = regs.texturing.proctex.color_combiner; + state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; + state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; + state.proctex.noise_enable = regs.texturing.proctex.noise_enable; + state.proctex.u_shift = regs.texturing.proctex.u_shift; + state.proctex.v_shift = regs.texturing.proctex.v_shift; + state.proctex.lut_width = regs.texturing.proctex_lut.width; + state.proctex.lut_offset = regs.texturing.proctex_lut_offset; + state.proctex.lut_filter = regs.texturing.proctex_lut.filter; + } + return res; } @@ -132,8 +148,7 @@ static std::string TexCoord(const PicaShaderConfig& config, int texture_unit) { if (texture_unit == 2 && config.state.texture2_use_coord1) { return "texcoord[1]"; } - // TODO: if texture unit 3 (procedural texture) implementation also uses this function, - // config.state.texture3_coordinates should be repected here. + return "texcoord[" + std::to_string(texture_unit) + "]"; } @@ -175,6 +190,14 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, case Source::Texture2: out += "texture(tex[2], " + TexCoord(config, 2) + ")"; break; + case Source::Texture3: + if (config.state.proctex.enable) { + out += "ProcTex()"; + } else { + LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it"); + out += "vec4(0.0)"; + } + break; case Source::PreviousBuffer: out += "combiner_buffer"; break; @@ -483,9 +506,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map, read perturbation vector from the selected // texture - std::string bump_selector = std::to_string(lighting.bump_selector); - out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " + - TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n"; + if (lighting.bump_selector == 3) { + if (config.state.proctex.enable) { + out += "vec3 surface_normal = 2.0 * ProcTex().rgb - 1.0;\n"; + } else { + LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it"); + out += "vec3 surface_normal = vec3(-1.0);\n"; + } + } else { + std::string bump_selector = std::to_string(lighting.bump_selector); + out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " + + TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n"; + } // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result @@ -693,6 +725,221 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; } +using ProcTexClamp = TexturingRegs::ProcTexClamp; +using ProcTexShift = TexturingRegs::ProcTexShift; +using ProcTexCombiner = TexturingRegs::ProcTexCombiner; +using ProcTexFilter = TexturingRegs::ProcTexFilter; + +void AppendProcTexShiftOffset(std::string& out, const std::string& v, ProcTexShift mode, + ProcTexClamp clamp_mode) { + std::string offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; + switch (mode) { + case ProcTexShift::None: + out += "0"; + break; + case ProcTexShift::Odd: + out += offset + " * ((int(" + v + ") / 2) % 2)"; + break; + case ProcTexShift::Even: + out += offset + " * (((int(" + v + ") + 1) / 2) % 2)"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast(mode)); + out += "0"; + break; + } +} + +void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp mode) { + switch (mode) { + case ProcTexClamp::ToZero: + out += var + " = " + var + " > 1.0 ? 0 : " + var + ";\n"; + break; + case ProcTexClamp::ToEdge: + out += var + " = " + "min(" + var + ", 1.0);\n"; + break; + case ProcTexClamp::SymmetricalRepeat: + out += var + " = " + "fract(" + var + ");\n"; + break; + case ProcTexClamp::MirroredRepeat: { + out += + var + " = int(" + var + ") % 2 == 0 ? fract(" + var + ") : 1.0 - fract(" + var + ");\n"; + break; + } + case ProcTexClamp::Pulse: + out += var + " = " + var + " > 0.5 ? 1.0 : 0.0;\n"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast(mode)); + out += var + " = " + "min(" + var + ", 1.0);\n"; + break; + } +} + +void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, + const std::string& map_lut) { + std::string combined; + switch (combiner) { + case ProcTexCombiner::U: + combined = "u"; + break; + case ProcTexCombiner::U2: + combined = "(u * u)"; + break; + case TexturingRegs::ProcTexCombiner::V: + combined = "v"; + break; + case TexturingRegs::ProcTexCombiner::V2: + combined = "(v * v)"; + break; + case TexturingRegs::ProcTexCombiner::Add: + combined = "((u + v) * 0.5)"; + break; + case TexturingRegs::ProcTexCombiner::Add2: + combined = "((u * u + v * v) * 0.5)"; + break; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + combined = "min(sqrt(u * u + v * v), 1.0)"; + break; + case TexturingRegs::ProcTexCombiner::Min: + combined = "min(u, v)"; + break; + case TexturingRegs::ProcTexCombiner::Max: + combined = "max(u, v)"; + break; + case TexturingRegs::ProcTexCombiner::RMax: + combined = "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast(combiner)); + combined = "0.0"; + break; + } + out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")"; +} + +void AppendProcTexSampler(std::string& out, const PicaShaderConfig& config) { + // LUT sampling uitlity + // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and + // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using + // value entries and difference entries. + out += R"( +float ProcTexLookupLUT(sampler1D lut, float coord) { + coord *= 128; + float index_i = clamp(floor(coord), 0.0, 127.0); + float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be + // extracted as index_i = 127.0 and index_f = 1.0 + vec2 entry = texelFetch(lut, int(index_i), 0).rg; + return clamp(entry.r + entry.g * index_f, 0.0, 1.0); +} + )"; + + // Noise utility + if (config.state.proctex.noise_enable) { + // See swrasterizer/proctex.cpp for more information about these functions + out += R"( +int ProcTexNoiseRand1D(int v) { + const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); + return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; +} + +float ProcTexNoiseRand2D(vec2 point) { + const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); + int u2 = ProcTexNoiseRand1D(int(point.x)); + int v2 = ProcTexNoiseRand1D(int(point.y)); + v2 += ((u2 & 3) == 1) ? 4 : 0; + v2 ^= (u2 & 1) * 6; + v2 += 10 + u2; + v2 &= 0xF; + v2 ^= table[u2]; + return -1.0 + float(v2) * 2.0/ 15.0; +} + +float ProcTexNoiseCoef(vec2 x) { + vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); + vec2 point = floor(grid); + vec2 frac = grid - point; + + float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); + float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); + float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); + float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); + + float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y); + float x0 = mix(g0, g1, x_noise); + float x1 = mix(g2, g3, x_noise); + return mix(x0, x1, y_noise); +} + )"; + } + + out += "vec4 ProcTex() {\n"; + out += "vec2 uv = abs(texcoord[" + std::to_string(config.state.proctex.coord) + "]);\n"; + + // Get shift offset before noise generation + out += "float u_shift = "; + AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, + config.state.proctex.u_clamp); + out += ";\n"; + out += "float v_shift = "; + AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, + config.state.proctex.v_clamp); + out += ";\n"; + + // Generate noise + if (config.state.proctex.noise_enable) { + out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"; + out += "uv = abs(uv);\n"; + } + + // Shift + out += "float u = uv.x + u_shift;\n"; + out += "float v = uv.y + v_shift;\n"; + + // Clamp + AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); + AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); + + // Combine and map + out += "float lut_coord = "; + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map"); + out += ";\n"; + + // Look up color + // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] + out += "lut_coord *= " + std::to_string(config.state.proctex.lut_width - 1) + ";\n"; + // TODO(wwylele): implement mipmap + switch (config.state.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::LinearMipmapLinear: + case ProcTexFilter::LinearMipmapNearest: + out += "int lut_index_i = int(lut_coord) + " + + std::to_string(config.state.proctex.lut_offset) + ";\n"; + out += "float lut_index_f = fract(lut_coord);\n"; + out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i, 0) + lut_index_f * " + "texelFetch(proctex_diff_lut, lut_index_i, 0);\n"; + break; + case ProcTexFilter::Nearest: + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::NearestMipmapNearest: + out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; + out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)), 0);\n"; + break; + } + + if (config.state.proctex.separate_alpha) { + // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It + // uses the output of CombineAndMap directly instead. + out += "float final_alpha = "; + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map"); + out += ";\n"; + out += "return vec4(final_color.xyz, final_alpha);\n}\n"; + } else { + out += "return final_color;\n}\n"; + } +} + std::string GenerateFragmentShader(const PicaShaderConfig& config) { const auto& state = config.state; @@ -735,6 +982,9 @@ layout (std140) uniform shader_data { int scissor_x2; int scissor_y2; vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; vec4 const_color[NUM_TEV_STAGES]; @@ -744,12 +994,21 @@ layout (std140) uniform shader_data { uniform sampler2D tex[3]; uniform sampler1D lut[6]; uniform usampler1D fog_lut; +uniform sampler1D proctex_noise_lut; +uniform sampler1D proctex_color_map; +uniform sampler1D proctex_alpha_map; +uniform sampler1D proctex_lut; +uniform sampler1D proctex_diff_lut; // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); -} +})"; + if (config.state.proctex.enable) + AppendProcTexSampler(out, config); + + out += R"( void main() { vec4 primary_fragment_color = vec4(0.0); vec4 secondary_fragment_color = vec4(0.0); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3fb046b76e..ea6d216d1c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -113,6 +113,19 @@ union PicaShaderConfig { } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; } lighting; + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + } state; }; #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 3c03b424ad..bf837a7fba 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,6 +58,12 @@ OpenGLState::OpenGLState() { fog_lut.texture_1d = 0; + proctex_lut.texture_1d = 0; + proctex_diff_lut.texture_1d = 0; + proctex_color_map.texture_1d = 0; + proctex_alpha_map.texture_1d = 0; + proctex_noise_lut.texture_1d = 0; + draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -201,6 +207,36 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d); } + // ProcTex Noise LUT + if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) { + glActiveTexture(GL_TEXTURE10); + glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d); + } + + // ProcTex Color Map + if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) { + glActiveTexture(GL_TEXTURE11); + glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d); + } + + // ProcTex Alpha Map + if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) { + glActiveTexture(GL_TEXTURE12); + glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d); + } + + // ProcTex LUT + if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) { + glActiveTexture(GL_TEXTURE13); + glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d); + } + + // ProcTex Diff LUT + if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) { + glActiveTexture(GL_TEXTURE14); + glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d); + } + // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index aee3c29464..7dcc03bd5c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -71,6 +71,26 @@ public: GLuint texture_1d; // GL_TEXTURE_BINDING_1D } fog_lut; + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } proctex_noise_lut; + + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } proctex_color_map; + + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } proctex_alpha_map; + + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } proctex_lut; + + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } proctex_diff_lut; + struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING