video_core: Take factors into account with min/max blending functions (#6925)

* sw_framebuffer: Take factors into account for min/max blending

* renderer_gl: Take factors into account for min/max blending

* Address review comments

* gl_shader_gen: Fix frambuffer fetch on qcom and mali

* renderer_opengl: Add fallback path for mesa

* gl_shader_gen: Avoid emitting blend emulation if minmax_factor is present
This commit is contained in:
GPUCode 2023-08-30 21:26:28 +03:00 committed by GitHub
parent 93c7c6a995
commit 1159e4d928
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 306 additions and 63 deletions

View File

@ -1,28 +1,32 @@
/*
OpenGL, OpenGL ES loader generated by glad 0.1.36 on Sat Apr 1 20:34:42 2023.
OpenGL, OpenGL ES loader generated by glad 0.1.34 on Sat Aug 26 18:38:43 2023.
Language/Generator: C/C++
Specification: gl
APIs: gl=4.3, gles2=3.2
Profile: core
Extensions:
GL_AMD_blend_minmax_factor,
GL_ARB_buffer_storage,
GL_ARB_clear_texture,
GL_ARB_get_texture_sub_image,
GL_ARB_texture_compression_bptc,
GL_ARM_shader_framebuffer_fetch,
GL_EXT_buffer_storage,
GL_EXT_clip_cull_distance,
GL_EXT_texture_compression_s3tc
GL_EXT_shader_framebuffer_fetch,
GL_EXT_texture_compression_s3tc,
GL_NV_blend_minmax_factor
Loader: True
Local files: False
Omit khrplatform: False
Reproducible: False
Commandline:
--profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc"
--profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_AMD_blend_minmax_factor,GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_ARM_shader_framebuffer_fetch,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_shader_framebuffer_fetch,GL_EXT_texture_compression_s3tc,GL_NV_blend_minmax_factor"
Online:
https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc
https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_AMD_blend_minmax_factor&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_ARM_shader_framebuffer_fetch&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_shader_framebuffer_fetch&extensions=GL_EXT_texture_compression_s3tc&extensions=GL_NV_blend_minmax_factor
*/
@ -3320,6 +3324,8 @@ typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location,
GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv;
#define glGetnUniformuiv glad_glGetnUniformuiv
#endif
#define GL_FACTOR_MIN_AMD 0x901C
#define GL_FACTOR_MAX_AMD 0x901D
#define GL_MAP_PERSISTENT_BIT 0x0040
#define GL_MAP_COHERENT_BIT 0x0080
#define GL_DYNAMIC_STORAGE_BIT 0x0100
@ -3332,10 +3338,13 @@ GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv;
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F
#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
#define GL_FETCH_PER_SAMPLE_ARM 0x8F65
#define GL_FRAGMENT_SHADER_FRAMEBUFFER_FETCH_MRT_ARM 0x8F66
#define GL_MAP_PERSISTENT_BIT_EXT 0x0040
#define GL_MAP_COHERENT_BIT_EXT 0x0080
#define GL_DYNAMIC_STORAGE_BIT_EXT 0x0100
@ -3354,6 +3363,10 @@ GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv;
#define GL_CLIP_DISTANCE5_EXT 0x3005
#define GL_CLIP_DISTANCE6_EXT 0x3006
#define GL_CLIP_DISTANCE7_EXT 0x3007
#ifndef GL_AMD_blend_minmax_factor
#define GL_AMD_blend_minmax_factor 1
GLAPI int GLAD_GL_AMD_blend_minmax_factor;
#endif
#ifndef GL_ARB_buffer_storage
#define GL_ARB_buffer_storage 1
GLAPI int GLAD_GL_ARB_buffer_storage;
@ -3385,10 +3398,22 @@ GLAPI PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC glad_glGetCompressedTextureSubImage;
#define GL_ARB_texture_compression_bptc 1
GLAPI int GLAD_GL_ARB_texture_compression_bptc;
#endif
#ifndef GL_EXT_shader_framebuffer_fetch
#define GL_EXT_shader_framebuffer_fetch 1
GLAPI int GLAD_GL_EXT_shader_framebuffer_fetch;
#endif
#ifndef GL_EXT_texture_compression_s3tc
#define GL_EXT_texture_compression_s3tc 1
GLAPI int GLAD_GL_EXT_texture_compression_s3tc;
#endif
#ifndef GL_NV_blend_minmax_factor
#define GL_NV_blend_minmax_factor 1
GLAPI int GLAD_GL_NV_blend_minmax_factor;
#endif
#ifndef GL_ARM_shader_framebuffer_fetch
#define GL_ARM_shader_framebuffer_fetch 1
GLAPI int GLAD_GL_ARM_shader_framebuffer_fetch;
#endif
#ifndef GL_EXT_buffer_storage
#define GL_EXT_buffer_storage 1
GLAPI int GLAD_GL_EXT_buffer_storage;
@ -3400,10 +3425,18 @@ GLAPI PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT;
#define GL_EXT_clip_cull_distance 1
GLAPI int GLAD_GL_EXT_clip_cull_distance;
#endif
#ifndef GL_EXT_shader_framebuffer_fetch
#define GL_EXT_shader_framebuffer_fetch 1
GLAPI int GLAD_GL_EXT_shader_framebuffer_fetch;
#endif
#ifndef GL_EXT_texture_compression_s3tc
#define GL_EXT_texture_compression_s3tc 1
GLAPI int GLAD_GL_EXT_texture_compression_s3tc;
#endif
#ifndef GL_NV_blend_minmax_factor
#define GL_NV_blend_minmax_factor 1
GLAPI int GLAD_GL_NV_blend_minmax_factor;
#endif
#ifdef __cplusplus
}

View File

@ -1,28 +1,32 @@
/*
OpenGL, OpenGL ES loader generated by glad 0.1.36 on Sat Apr 1 20:34:42 2023.
OpenGL, OpenGL ES loader generated by glad 0.1.34 on Sat Aug 26 18:38:43 2023.
Language/Generator: C/C++
Specification: gl
APIs: gl=4.3, gles2=3.2
Profile: core
Extensions:
GL_AMD_blend_minmax_factor,
GL_ARB_buffer_storage,
GL_ARB_clear_texture,
GL_ARB_get_texture_sub_image,
GL_ARB_texture_compression_bptc,
GL_ARM_shader_framebuffer_fetch,
GL_EXT_buffer_storage,
GL_EXT_clip_cull_distance,
GL_EXT_texture_compression_s3tc
GL_EXT_shader_framebuffer_fetch,
GL_EXT_texture_compression_s3tc,
GL_NV_blend_minmax_factor
Loader: True
Local files: False
Omit khrplatform: False
Reproducible: False
Commandline:
--profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc"
--profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_AMD_blend_minmax_factor,GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_ARM_shader_framebuffer_fetch,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_shader_framebuffer_fetch,GL_EXT_texture_compression_s3tc,GL_NV_blend_minmax_factor"
Online:
https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc
https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_AMD_blend_minmax_factor&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_ARM_shader_framebuffer_fetch&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_shader_framebuffer_fetch&extensions=GL_EXT_texture_compression_s3tc&extensions=GL_NV_blend_minmax_factor
*/
#include <stdio.h>
@ -853,13 +857,17 @@ PFNGLVIEWPORTARRAYVPROC glad_glViewportArrayv = NULL;
PFNGLVIEWPORTINDEXEDFPROC glad_glViewportIndexedf = NULL;
PFNGLVIEWPORTINDEXEDFVPROC glad_glViewportIndexedfv = NULL;
PFNGLWAITSYNCPROC glad_glWaitSync = NULL;
int GLAD_GL_AMD_blend_minmax_factor = 0;
int GLAD_GL_ARB_buffer_storage = 0;
int GLAD_GL_ARB_clear_texture = 0;
int GLAD_GL_ARB_get_texture_sub_image = 0;
int GLAD_GL_ARB_texture_compression_bptc = 0;
int GLAD_GL_ARM_shader_framebuffer_fetch = 0;
int GLAD_GL_EXT_buffer_storage = 0;
int GLAD_GL_EXT_clip_cull_distance = 0;
int GLAD_GL_EXT_shader_framebuffer_fetch = 0;
int GLAD_GL_EXT_texture_compression_s3tc = 0;
int GLAD_GL_NV_blend_minmax_factor = 0;
PFNGLBUFFERSTORAGEPROC glad_glBufferStorage = NULL;
PFNGLCLEARTEXIMAGEPROC glad_glClearTexImage = NULL;
PFNGLCLEARTEXSUBIMAGEPROC glad_glClearTexSubImage = NULL;
@ -1498,11 +1506,14 @@ static void load_GL_ARB_get_texture_sub_image(GLADloadproc load) {
}
static int find_extensionsGL(void) {
if (!get_exts()) return 0;
GLAD_GL_AMD_blend_minmax_factor = has_ext("GL_AMD_blend_minmax_factor");
GLAD_GL_ARB_buffer_storage = has_ext("GL_ARB_buffer_storage");
GLAD_GL_ARB_clear_texture = has_ext("GL_ARB_clear_texture");
GLAD_GL_ARB_get_texture_sub_image = has_ext("GL_ARB_get_texture_sub_image");
GLAD_GL_ARB_texture_compression_bptc = has_ext("GL_ARB_texture_compression_bptc");
GLAD_GL_EXT_shader_framebuffer_fetch = has_ext("GL_EXT_shader_framebuffer_fetch");
GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc");
GLAD_GL_NV_blend_minmax_factor = has_ext("GL_NV_blend_minmax_factor");
free_exts();
return 1;
}
@ -1971,9 +1982,12 @@ static void load_GL_EXT_buffer_storage(GLADloadproc load) {
}
static int find_extensionsGLES2(void) {
if (!get_exts()) return 0;
GLAD_GL_ARM_shader_framebuffer_fetch = has_ext("GL_ARM_shader_framebuffer_fetch");
GLAD_GL_EXT_buffer_storage = has_ext("GL_EXT_buffer_storage");
GLAD_GL_EXT_clip_cull_distance = has_ext("GL_EXT_clip_cull_distance");
GLAD_GL_EXT_shader_framebuffer_fetch = has_ext("GL_EXT_shader_framebuffer_fetch");
GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc");
GLAD_GL_NV_blend_minmax_factor = has_ext("GL_NV_blend_minmax_factor");
free_exts();
return 1;
}

View File

@ -170,6 +170,9 @@ void Driver::CheckExtensionSupport() {
arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc;
ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance;
ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc;
shader_framebuffer_fetch =
GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
blend_minmax_factor = GLAD_GL_AMD_blend_minmax_factor || GLAD_GL_NV_blend_minmax_factor;
is_suitable = GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1;
}

View File

@ -105,6 +105,16 @@ public:
return ext_clip_cull_distance;
}
/// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch
bool HasShaderFramebufferFetch() const {
return shader_framebuffer_fetch;
}
/// Returns true if the implementation supports (NV/AMD)_blend_minmax_factor
bool HasBlendMinMaxFactor() const {
return blend_minmax_factor;
}
private:
void ReportDriverInfo();
void DeduceVendor();
@ -125,6 +135,8 @@ private:
bool ext_clip_cull_distance{};
bool ext_texture_compression_s3tc{};
bool arb_texture_compression_bptc{};
bool shader_framebuffer_fetch{};
bool blend_minmax_factor{};
std::string_view gl_version{};
std::string_view gpu_vendor{};

View File

@ -468,14 +468,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
}
use_custom_normal = false;
return succeeded;
}
void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) {
using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
// Reset transient draw state
state.color_buffer.texture_2d = 0;
use_custom_normal = false;
const auto pica_textures = regs.texturing.GetTextures();
for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
const auto& texture = pica_textures[texture_index];
@ -519,6 +521,10 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) {
state.texture_units[texture_index].texture_2d = surface.Handle();
}
}
if (emulate_minmax_blend && !driver.HasShaderFramebufferFetch()) {
state.color_buffer.texture_2d = framebuffer->Attachment(SurfaceType::Color);
}
}
void RasterizerOpenGL::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
@ -760,17 +766,14 @@ void RasterizerOpenGL::SyncCullMode() {
case Pica::RasterizerRegs::CullMode::KeepAll:
state.cull.enabled = false;
break;
case Pica::RasterizerRegs::CullMode::KeepClockWise:
state.cull.enabled = true;
state.cull.front_face = GL_CW;
break;
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
state.cull.enabled = true;
state.cull.front_face = GL_CCW;
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}",
static_cast<u32>(regs.rasterizer.cull_mode.Value()));
@ -784,10 +787,12 @@ void RasterizerOpenGL::SyncBlendEnabled() {
}
void RasterizerOpenGL::SyncBlendFuncs() {
state.blend.rgb_equation =
PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb);
state.blend.a_equation =
PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a);
const bool has_minmax_factor = driver.HasBlendMinMaxFactor();
state.blend.rgb_equation = PicaToGL::BlendEquation(
regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb, has_minmax_factor);
state.blend.a_equation = PicaToGL::BlendEquation(
regs.framebuffer.output_merger.alpha_blending.blend_equation_a, has_minmax_factor);
state.blend.src_rgb_func =
PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb);
state.blend.dst_rgb_func =
@ -796,14 +801,39 @@ void RasterizerOpenGL::SyncBlendFuncs() {
PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a);
state.blend.dst_a_func =
PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a);
if (has_minmax_factor) {
return;
}
// Blending with min/max equations is emulated in the fragment shader so
// configure blending to not modify the incoming fragment color.
emulate_minmax_blend = false;
if (state.EmulateColorBlend()) {
emulate_minmax_blend = true;
state.blend.rgb_equation = GL_FUNC_ADD;
state.blend.src_rgb_func = GL_ONE;
state.blend.dst_rgb_func = GL_ZERO;
}
if (state.EmulateAlphaBlend()) {
emulate_minmax_blend = true;
state.blend.a_equation = GL_FUNC_ADD;
state.blend.src_a_func = GL_ONE;
state.blend.dst_a_func = GL_ZERO;
}
}
void RasterizerOpenGL::SyncBlendColor() {
auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw);
const auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw);
state.blend.color.red = blend_color[0];
state.blend.color.green = blend_color[1];
state.blend.color.blue = blend_color[2];
state.blend.color.alpha = blend_color[3];
if (blend_color != uniform_block_data.data.blend_color) {
uniform_block_data.data.blend_color = blend_color;
uniform_block_data.dirty = true;
}
}
void RasterizerOpenGL::SyncLogicOp() {

View File

@ -155,6 +155,7 @@ private:
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;
bool use_custom_normal{};
bool emulate_minmax_blend{};
};
} // namespace OpenGL

View File

@ -9,6 +9,7 @@
#include "core/core.h"
#include "core/telemetry_session.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@ -60,7 +61,8 @@ out gl_PerVertex {
return out;
}
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool use_normal) {
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor,
bool use_normal) {
PicaFSConfig res{};
auto& state = res.state;
@ -229,6 +231,29 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool use_normal
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
}
const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value();
const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value();
if (regs.framebuffer.output_merger.alphablend_enable && !has_blend_minmax_factor) {
if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max ||
rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) {
state.rgb_blend.emulate_blending = true;
state.rgb_blend.eq = rgb_eq;
state.rgb_blend.src_factor =
regs.framebuffer.output_merger.alpha_blending.factor_source_rgb;
state.rgb_blend.dst_factor =
regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb;
}
if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max ||
alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) {
state.alpha_blend.emulate_blending = true;
state.alpha_blend.eq = alpha_eq;
state.alpha_blend.src_factor =
regs.framebuffer.output_merger.alpha_blending.factor_source_a;
state.alpha_blend.dst_factor =
regs.framebuffer.output_merger.alpha_blending.factor_dest_a;
}
}
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
FramebufferRegs::FragmentOperationMode::Shadow;
if (state.shadow_rendering) {
@ -1222,6 +1247,103 @@ float ProcTexNoiseCoef(vec2 x) {
}
}
static void WriteLogicOp(std::string& out, const PicaFSConfig& config) {
if (!GLES || config.state.alphablend_enable) {
return;
}
switch (config.state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
out += "color = vec4(0);\n";
break;
case FramebufferRegs::LogicOp::Set:
out += "color = vec4(1);\n";
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(config.state.logic_op));
UNIMPLEMENTED();
}
}
static void WriteBlending(std::string& out, const PicaFSConfig& config) {
if (!config.state.rgb_blend.emulate_blending && !config.state.alpha_blend.emulate_blending)
[[likely]] {
return;
}
using BlendFactor = Pica::FramebufferRegs::BlendFactor;
out += R"(
vec4 source_color = last_tex_env_out;
#if defined(GL_EXT_shader_framebuffer_fetch)
vec4 dest_color = color;
#elif defined(GL_ARM_shader_framebuffer_fetch)
vec4 dest_color = gl_LastFragColorARM;
#else
vec4 dest_color = texelFetch(colorBuffer, ivec2(gl_FragCoord.xy), 0);
#endif
)";
const auto get_factor = [&](BlendFactor factor) -> std::string {
switch (factor) {
case BlendFactor::Zero:
return "vec4(0.f)";
case BlendFactor::One:
return "vec4(1.f)";
case BlendFactor::SourceColor:
return "source_color";
case BlendFactor::OneMinusSourceColor:
return "vec4(1.f) - source_color";
case BlendFactor::DestColor:
return "dest_color";
case BlendFactor::OneMinusDestColor:
return "vec4(1.f) - dest_color";
case BlendFactor::SourceAlpha:
return "source_color.aaaa";
case BlendFactor::OneMinusSourceAlpha:
return "vec4(1.f) - source_color.aaaa";
case BlendFactor::DestAlpha:
return "dest_color.aaaa";
case BlendFactor::OneMinusDestAlpha:
return "vec4(1.f) - dest_color.aaaa";
case BlendFactor::ConstantColor:
return "blend_color";
case BlendFactor::OneMinusConstantColor:
return "vec4(1.f) - blend_color";
case BlendFactor::ConstantAlpha:
return "blend_color.aaaa";
case BlendFactor::OneMinusConstantAlpha:
return "vec4(1.f) - blend_color.aaaa";
default:
LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor);
return "vec4(1.f)";
}
};
const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) {
return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max";
};
if (config.state.rgb_blend.emulate_blending) {
out += fmt::format(
"last_tex_env_out.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n",
get_func(config.state.rgb_blend.eq), get_factor(config.state.rgb_blend.src_factor),
get_factor(config.state.rgb_blend.dst_factor));
}
if (config.state.alpha_blend.emulate_blending) {
out += fmt::format(
"last_tex_env_out.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n",
get_func(config.state.alpha_blend.eq), get_factor(config.state.alpha_blend.src_factor),
get_factor(config.state.alpha_blend.dst_factor));
}
}
ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
bool separable_shader) {
const auto& state = config.state;
@ -1235,6 +1357,17 @@ ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& confi
out += fragment_shader_precision_OES;
}
out += R"(
#if defined(GL_EXT_shader_framebuffer_fetch)
#extension GL_EXT_shader_framebuffer_fetch : enable
#elif defined(GL_ARM_shader_framebuffer_fetch)
#extension GL_ARM_shader_framebuffer_fetch : enable
#else
layout(location = 10) uniform sampler2D colorBuffer;
#endif
)";
out += GetVertexInterfaceDeclaration(false, separable_shader);
out += R"(
@ -1242,7 +1375,7 @@ ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& confi
in vec4 gl_FragCoord;
#endif // CITRA_GLES
out vec4 color;
layout(location = 0) out vec4 color;
uniform sampler2D tex0;
uniform sampler2D tex1;
@ -1552,34 +1685,12 @@ do {
} else {
out += "gl_FragDepth = depth;\n";
// Round the final fragment color to maintain the PICA's 8 bits of precision
out += "color = byteround(last_tex_env_out);\n";
out += "last_tex_env_out = byteround(last_tex_env_out);\n";
WriteBlending(out, config);
out += "color = last_tex_env_out;\n";
}
if (GLES) {
if (!state.alphablend_enable) {
switch (state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
out += "color = vec4(0);\n";
break;
case FramebufferRegs::LogicOp::Set:
out += "color = vec4(1);\n";
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
UNIMPLEMENTED();
}
}
}
WriteLogicOp(out, config);
out += '}';

View File

@ -11,6 +11,8 @@
namespace OpenGL {
class Driver;
namespace ShaderDecompiler {
struct ProgramResult;
}
@ -115,6 +117,13 @@ struct PicaFSConfigState {
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
struct {
bool emulate_blending;
Pica::FramebufferRegs::BlendEquation eq;
Pica::FramebufferRegs::BlendFactor src_factor;
Pica::FramebufferRegs::BlendFactor dst_factor;
} rgb_blend, alpha_blend;
bool shadow_rendering;
bool shadow_texture_orthographic;
bool use_custom_normal_map;
@ -131,7 +140,8 @@ struct PicaFSConfigState {
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
/// Construct a PicaFSConfig with the given Pica register configuration.
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool use_normal = false);
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor,
bool use_normal = false);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));

View File

@ -418,7 +418,8 @@ void ShaderProgramManager::UseTrivialGeometryShader() {
}
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) {
PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs, use_normal);
PicaFSConfig config =
PicaFSConfig::BuildFromRegs(regs, driver.HasBlendMinMaxFactor(), use_normal);
auto [handle, result] = impl->fragment_shaders.Get(config);
impl->current.fs = handle;
impl->current.fs_hash = config.Hash();
@ -543,7 +544,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code,
std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(),
driver.HasBlendMinMaxFactor());
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(shader));
} else {
@ -655,7 +657,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(),
driver.HasBlendMinMaxFactor());
result = GenerateFragmentShader(conf, impl->separable);
OGLShaderStage stage{impl->separable};
stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER);

View File

@ -247,6 +247,12 @@ void OpenGLState::Apply() const {
glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer);
}
// Color buffer
if (color_buffer.texture_2d != cur_state.color_buffer.texture_2d) {
glActiveTexture(TextureUnits::TextureColorBuffer.Enum());
glBindTexture(GL_TEXTURE_2D, color_buffer.texture_2d);
}
// Shadow Images
if (image_shadow_buffer != cur_state.image_shadow_buffer) {
glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0,

View File

@ -27,6 +27,7 @@ constexpr TextureUnit TextureBufferLUT_LF{3};
constexpr TextureUnit TextureBufferLUT_RG{4};
constexpr TextureUnit TextureBufferLUT_RGBA{5};
constexpr TextureUnit TextureNormalMap{7};
constexpr TextureUnit TextureColorBuffer{10};
} // namespace TextureUnits
@ -115,6 +116,10 @@ public:
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} texture_buffer_lut_rgba;
struct {
GLuint texture_2d; // GL_TEXTURE_BINDING_2D
} color_buffer;
// GL_IMAGE_BINDING_NAME
GLuint image_shadow_buffer;
union {
@ -165,6 +170,14 @@ public:
return cur_state;
}
bool EmulateColorBlend() const {
return blend.rgb_equation == GL_MIN || blend.rgb_equation == GL_MAX;
}
bool EmulateAlphaBlend() const {
return blend.a_equation == GL_MIN || blend.a_equation == GL_MAX;
}
/// Apply this state as the current OpenGL state
void Apply() const;

View File

@ -95,7 +95,7 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
return gl_mode;
}
inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation, bool factor_minmax) {
static constexpr std::array<GLenum, 5> blend_equation_table{{
GL_FUNC_ADD, // BlendEquation::Add
GL_FUNC_SUBTRACT, // BlendEquation::Subtract
@ -103,6 +103,13 @@ inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
GL_MIN, // BlendEquation::Min
GL_MAX, // BlendEquation::Max
}};
static constexpr std::array<GLenum, 5> blend_equation_table_minmax{{
GL_FUNC_ADD, // BlendEquation::Add
GL_FUNC_SUBTRACT, // BlendEquation::Subtract
GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
GL_FACTOR_MIN_AMD, // BlendEquation::Min
GL_FACTOR_MAX_AMD, // BlendEquation::Max
}};
const auto index = static_cast<std::size_t>(equation);
@ -114,7 +121,7 @@ inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
return GL_FUNC_ADD;
}
return blend_equation_table[index];
return (factor_minmax ? blend_equation_table_minmax : blend_equation_table)[index];
}
inline GLenum BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {

View File

@ -311,19 +311,17 @@ Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src,
case FramebufferRegs::BlendEquation::ReverseSubtract:
result = (dst_result - src_result) / 255;
break;
// TODO: How do these two actually work? OpenGL doesn't include the blend factors in the
// min/max computations, but is this what the 3DS actually does?
case FramebufferRegs::BlendEquation::Min:
result.r() = std::min(src.r(), dest.r());
result.g() = std::min(src.g(), dest.g());
result.b() = std::min(src.b(), dest.b());
result.a() = std::min(src.a(), dest.a());
result.r() = std::min(src_result.r(), dst_result.r()) / 255;
result.g() = std::min(src_result.g(), dst_result.g()) / 255;
result.b() = std::min(src_result.b(), dst_result.b()) / 255;
result.a() = std::min(src_result.a(), dst_result.a()) / 255;
break;
case FramebufferRegs::BlendEquation::Max:
result.r() = std::max(src.r(), dest.r());
result.g() = std::max(src.g(), dest.g());
result.b() = std::max(src.b(), dest.b());
result.a() = std::max(src.a(), dest.a());
result.r() = std::max(src_result.r(), dst_result.r()) / 255;
result.g() = std::max(src_result.g(), dst_result.g()) / 255;
result.b() = std::max(src_result.b(), dst_result.b()) / 255;
result.a() = std::max(src_result.a(), dst_result.a()) / 255;
break;
default:
LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation);

View File

@ -69,6 +69,7 @@ layout ({}std140) uniform shader_data {{
vec3 tex_lod_bias;
vec4 tex_border_color[3];
vec4 clip_coef;
vec4 blend_color;
}};
)";

View File

@ -66,9 +66,10 @@ struct UniformData {
alignas(16) Common::Vec3f tex_lod_bias;
alignas(16) Common::Vec4f tex_border_color[3];
alignas(16) Common::Vec4f clip_coef;
alignas(16) Common::Vec4f blend_color;
};
static_assert(sizeof(UniformData) == 0x530,
static_assert(sizeof(UniformData) == 0x540,
"The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");