diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 11bc61e144..5317719e8e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,10 +1,8 @@ set(SRCS - clipper.cpp command_processor.cpp debug_utils/debug_utils.cpp pica.cpp primitive_assembly.cpp - rasterizer.cpp regs.cpp renderer_base.cpp renderer_opengl/gl_rasterizer.cpp @@ -15,7 +13,11 @@ set(SRCS renderer_opengl/renderer_opengl.cpp shader/shader.cpp shader/shader_interpreter.cpp - swrasterizer.cpp + swrasterizer/clipper.cpp + swrasterizer/framebuffer.cpp + swrasterizer/rasterizer.cpp + swrasterizer/swrasterizer.cpp + swrasterizer/texturing.cpp texture/etc1.cpp texture/texture_decode.cpp vertex_loader.cpp @@ -23,7 +25,6 @@ set(SRCS ) set(HEADERS - clipper.h command_processor.h debug_utils/debug_utils.h gpu_debugger.h @@ -31,7 +32,6 @@ set(HEADERS pica_state.h pica_types.h primitive_assembly.h - rasterizer.h rasterizer_interface.h regs.h regs_framebuffer.h @@ -52,7 +52,11 @@ set(HEADERS shader/debug_data.h shader/shader.h shader/shader_interpreter.h - swrasterizer.h + swrasterizer/clipper.h + swrasterizer/framebuffer.h + swrasterizer/rasterizer.h + swrasterizer/swrasterizer.h + swrasterizer/texturing.h texture/etc1.h texture/texture_decode.h utils.h diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index fd38175b36..f6ece5c4b9 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -6,7 +6,7 @@ #include #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/swrasterizer.h" +#include "video_core/swrasterizer/swrasterizer.h" #include "video_core/video_core.h" void RendererBase::RefreshRasterizerSetting() { diff --git a/src/video_core/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp similarity index 98% rename from src/video_core/clipper.cpp rename to src/video_core/swrasterizer/clipper.cpp index 1e8e751ba7..2d80822d9f 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -11,11 +11,11 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/vector_math.h" -#include "video_core/clipper.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" -#include "video_core/rasterizer.h" #include "video_core/shader/shader.h" +#include "video_core/swrasterizer/clipper.h" +#include "video_core/swrasterizer/rasterizer.h" using Pica::Rasterizer::Vertex; diff --git a/src/video_core/clipper.h b/src/video_core/swrasterizer/clipper.h similarity index 100% rename from src/video_core/clipper.h rename to src/video_core/swrasterizer/clipper.h diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp new file mode 100644 index 0000000000..7de3aac751 --- /dev/null +++ b/src/video_core/swrasterizer/framebuffer.cpp @@ -0,0 +1,358 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "common/color.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "core/memory.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/swrasterizer/framebuffer.h" +#include "video_core/utils.h" + +namespace Pica { +namespace Rasterizer { + +void DrawPixel(int x, int y, const Math::Vec4& color) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); + + // Similarly to textures, the render framebuffer is laid out from bottom to top, too. + // NOTE: The framebuffer height register contains the actual FB height minus one. + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = + GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * framebuffer.width * bytes_per_pixel; + u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; + + switch (framebuffer.color_format) { + case FramebufferRegs::ColorFormat::RGBA8: + Color::EncodeRGBA8(color, dst_pixel); + break; + + case FramebufferRegs::ColorFormat::RGB8: + Color::EncodeRGB8(color, dst_pixel); + break; + + case FramebufferRegs::ColorFormat::RGB5A1: + Color::EncodeRGB5A1(color, dst_pixel); + break; + + case FramebufferRegs::ColorFormat::RGB565: + Color::EncodeRGB565(color, dst_pixel); + break; + + case FramebufferRegs::ColorFormat::RGBA4: + Color::EncodeRGBA4(color, dst_pixel); + break; + + default: + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", + framebuffer.color_format.Value()); + UNIMPLEMENTED(); + } +} + +const Math::Vec4 GetPixel(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = + GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * framebuffer.width * bytes_per_pixel; + u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; + + switch (framebuffer.color_format) { + case FramebufferRegs::ColorFormat::RGBA8: + return Color::DecodeRGBA8(src_pixel); + + case FramebufferRegs::ColorFormat::RGB8: + return Color::DecodeRGB8(src_pixel); + + case FramebufferRegs::ColorFormat::RGB5A1: + return Color::DecodeRGB5A1(src_pixel); + + case FramebufferRegs::ColorFormat::RGB565: + return Color::DecodeRGB565(src_pixel); + + case FramebufferRegs::ColorFormat::RGBA4: + return Color::DecodeRGBA4(src_pixel); + + default: + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", + framebuffer.color_format.Value()); + UNIMPLEMENTED(); + } + + return {0, 0, 0, 0}; +} + +u32 GetDepth(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case FramebufferRegs::DepthFormat::D16: + return Color::DecodeD16(src_pixel); + case FramebufferRegs::DepthFormat::D24: + return Color::DecodeD24(src_pixel); + case FramebufferRegs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).x; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + return 0; + } +} + +u8 GetStencil(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case FramebufferRegs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; + + default: + LOG_WARNING( + HW_GPU, + "GetStencil called for function which doesn't have a stencil component (format %u)", + framebuffer.depth_format); + return 0; + } +} + +void SetDepth(int x, int y, u32 value) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case FramebufferRegs::DepthFormat::D16: + Color::EncodeD16(value, dst_pixel); + break; + + case FramebufferRegs::DepthFormat::D24: + Color::EncodeD24(value, dst_pixel); + break; + + case FramebufferRegs::DepthFormat::D24S8: + Color::EncodeD24X8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +void SetStencil(int x, int y, u8 value) { + const auto& framebuffer = g_state.regs.framebuffer.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case Pica::FramebufferRegs::DepthFormat::D16: + case Pica::FramebufferRegs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::FramebufferRegs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { + switch (action) { + case FramebufferRegs::StencilAction::Keep: + return old_stencil; + + case FramebufferRegs::StencilAction::Zero: + return 0; + + case FramebufferRegs::StencilAction::Replace: + return ref; + + case FramebufferRegs::StencilAction::Increment: + // Saturated increment + return std::min(old_stencil, 254) + 1; + + case FramebufferRegs::StencilAction::Decrement: + // Saturated decrement + return std::max(old_stencil, 1) - 1; + + case FramebufferRegs::StencilAction::Invert: + return ~old_stencil; + + case FramebufferRegs::StencilAction::IncrementWrap: + return old_stencil + 1; + + case FramebufferRegs::StencilAction::DecrementWrap: + return old_stencil - 1; + + default: + LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return 0; + } +} + +Math::Vec4 EvaluateBlendEquation(const Math::Vec4& src, const Math::Vec4& srcfactor, + const Math::Vec4& dest, const Math::Vec4& destfactor, + FramebufferRegs::BlendEquation equation) { + Math::Vec4 result; + + auto src_result = (src * srcfactor).Cast(); + auto dst_result = (dest * destfactor).Cast(); + + switch (equation) { + case FramebufferRegs::BlendEquation::Add: + result = (src_result + dst_result) / 255; + break; + + case FramebufferRegs::BlendEquation::Subtract: + result = (src_result - dst_result) / 255; + break; + + case FramebufferRegs::BlendEquation::ReverseSubtract: + result = (dst_result - src_result) / 255; + break; + + // TODO: How do these two actually work? OpenGL doesn't include the blend factors in the + // min/max computations, but is this what the 3DS actually does? + case FramebufferRegs::BlendEquation::Min: + result.r() = std::min(src.r(), dest.r()); + result.g() = std::min(src.g(), dest.g()); + result.b() = std::min(src.b(), dest.b()); + result.a() = std::min(src.a(), dest.a()); + break; + + case FramebufferRegs::BlendEquation::Max: + result.r() = std::max(src.r(), dest.r()); + result.g() = std::max(src.g(), dest.g()); + result.b() = std::max(src.b(), dest.b()); + result.a() = std::max(src.a(), dest.a()); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); + UNIMPLEMENTED(); + } + + return Math::Vec4(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), + MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); +}; + +u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { + switch (op) { + case FramebufferRegs::LogicOp::Clear: + return 0; + + case FramebufferRegs::LogicOp::And: + return src & dest; + + case FramebufferRegs::LogicOp::AndReverse: + return src & ~dest; + + case FramebufferRegs::LogicOp::Copy: + return src; + + case FramebufferRegs::LogicOp::Set: + return 255; + + case FramebufferRegs::LogicOp::CopyInverted: + return ~src; + + case FramebufferRegs::LogicOp::NoOp: + return dest; + + case FramebufferRegs::LogicOp::Invert: + return ~dest; + + case FramebufferRegs::LogicOp::Nand: + return ~(src & dest); + + case FramebufferRegs::LogicOp::Or: + return src | dest; + + case FramebufferRegs::LogicOp::Nor: + return ~(src | dest); + + case FramebufferRegs::LogicOp::Xor: + return src ^ dest; + + case FramebufferRegs::LogicOp::Equiv: + return ~(src ^ dest); + + case FramebufferRegs::LogicOp::AndInverted: + return ~src & dest; + + case FramebufferRegs::LogicOp::OrReverse: + return src | ~dest; + + case FramebufferRegs::LogicOp::OrInverted: + return ~src | dest; + } +}; + +} // namespace Rasterizer +} // namespace Pica diff --git a/src/video_core/swrasterizer/framebuffer.h b/src/video_core/swrasterizer/framebuffer.h new file mode 100644 index 0000000000..4a32a49797 --- /dev/null +++ b/src/video_core/swrasterizer/framebuffer.h @@ -0,0 +1,29 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/vector_math.h" +#include "video_core/regs_framebuffer.h" + +namespace Pica { +namespace Rasterizer { + +void DrawPixel(int x, int y, const Math::Vec4& color); +const Math::Vec4 GetPixel(int x, int y); +u32 GetDepth(int x, int y); +u8 GetStencil(int x, int y); +void SetDepth(int x, int y, u32 value); +void SetStencil(int x, int y, u8 value); +u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); + +Math::Vec4 EvaluateBlendEquation(const Math::Vec4& src, const Math::Vec4& srcfactor, + const Math::Vec4& dest, const Math::Vec4& destfactor, + FramebufferRegs::BlendEquation equation); + +u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op); + +} // namespace Rasterizer +} // namespace Pica diff --git a/src/video_core/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp similarity index 60% rename from src/video_core/rasterizer.cpp rename to src/video_core/swrasterizer/rasterizer.cpp index 83a08ebd7f..7557fcb892 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -18,254 +18,19 @@ #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" -#include "video_core/rasterizer.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" +#include "video_core/swrasterizer/framebuffer.h" +#include "video_core/swrasterizer/rasterizer.h" +#include "video_core/swrasterizer/texturing.h" #include "video_core/texture/texture_decode.h" #include "video_core/utils.h" namespace Pica { - namespace Rasterizer { -static void DrawPixel(int x, int y, const Math::Vec4& color) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); - - // Similarly to textures, the render framebuffer is laid out from bottom to top, too. - // NOTE: The framebuffer height register contains the actual FB height minus one. - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = - GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * framebuffer.width * bytes_per_pixel; - u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; - - switch (framebuffer.color_format) { - case FramebufferRegs::ColorFormat::RGBA8: - Color::EncodeRGBA8(color, dst_pixel); - break; - - case FramebufferRegs::ColorFormat::RGB8: - Color::EncodeRGB8(color, dst_pixel); - break; - - case FramebufferRegs::ColorFormat::RGB5A1: - Color::EncodeRGB5A1(color, dst_pixel); - break; - - case FramebufferRegs::ColorFormat::RGB565: - Color::EncodeRGB565(color, dst_pixel); - break; - - case FramebufferRegs::ColorFormat::RGBA4: - Color::EncodeRGBA4(color, dst_pixel); - break; - - default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", - framebuffer.color_format.Value()); - UNIMPLEMENTED(); - } -} - -static const Math::Vec4 GetPixel(int x, int y) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); - - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = - GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); - u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * framebuffer.width * bytes_per_pixel; - u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; - - switch (framebuffer.color_format) { - case FramebufferRegs::ColorFormat::RGBA8: - return Color::DecodeRGBA8(src_pixel); - - case FramebufferRegs::ColorFormat::RGB8: - return Color::DecodeRGB8(src_pixel); - - case FramebufferRegs::ColorFormat::RGB5A1: - return Color::DecodeRGB5A1(src_pixel); - - case FramebufferRegs::ColorFormat::RGB565: - return Color::DecodeRGB565(src_pixel); - - case FramebufferRegs::ColorFormat::RGBA4: - return Color::DecodeRGBA4(src_pixel); - - default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", - framebuffer.color_format.Value()); - UNIMPLEMENTED(); - } - - return {0, 0, 0, 0}; -} - -static u32 GetDepth(int x, int y) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPhysicalPointer(addr); - - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); - u32 stride = framebuffer.width * bytes_per_pixel; - - u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; - u8* src_pixel = depth_buffer + src_offset; - - switch (framebuffer.depth_format) { - case FramebufferRegs::DepthFormat::D16: - return Color::DecodeD16(src_pixel); - case FramebufferRegs::DepthFormat::D24: - return Color::DecodeD24(src_pixel); - case FramebufferRegs::DepthFormat::D24S8: - return Color::DecodeD24S8(src_pixel).x; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - return 0; - } -} - -static u8 GetStencil(int x, int y) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPhysicalPointer(addr); - - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); - u32 stride = framebuffer.width * bytes_per_pixel; - - u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; - u8* src_pixel = depth_buffer + src_offset; - - switch (framebuffer.depth_format) { - case FramebufferRegs::DepthFormat::D24S8: - return Color::DecodeD24S8(src_pixel).y; - - default: - LOG_WARNING( - HW_GPU, - "GetStencil called for function which doesn't have a stencil component (format %u)", - framebuffer.depth_format); - return 0; - } -} - -static void SetDepth(int x, int y, u32 value) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPhysicalPointer(addr); - - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); - u32 stride = framebuffer.width * bytes_per_pixel; - - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; - u8* dst_pixel = depth_buffer + dst_offset; - - switch (framebuffer.depth_format) { - case FramebufferRegs::DepthFormat::D16: - Color::EncodeD16(value, dst_pixel); - break; - - case FramebufferRegs::DepthFormat::D24: - Color::EncodeD24(value, dst_pixel); - break; - - case FramebufferRegs::DepthFormat::D24S8: - Color::EncodeD24X8(value, dst_pixel); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - break; - } -} - -static void SetStencil(int x, int y, u8 value) { - const auto& framebuffer = g_state.regs.framebuffer.framebuffer; - const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPhysicalPointer(addr); - - y = framebuffer.height - y; - - const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); - u32 stride = framebuffer.width * bytes_per_pixel; - - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; - u8* dst_pixel = depth_buffer + dst_offset; - - switch (framebuffer.depth_format) { - case Pica::FramebufferRegs::DepthFormat::D16: - case Pica::FramebufferRegs::DepthFormat::D24: - // Nothing to do - break; - - case Pica::FramebufferRegs::DepthFormat::D24S8: - Color::EncodeX24S8(value, dst_pixel); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - break; - } -} - -static u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { - switch (action) { - case FramebufferRegs::StencilAction::Keep: - return old_stencil; - - case FramebufferRegs::StencilAction::Zero: - return 0; - - case FramebufferRegs::StencilAction::Replace: - return ref; - - case FramebufferRegs::StencilAction::Increment: - // Saturated increment - return std::min(old_stencil, 254) + 1; - - case FramebufferRegs::StencilAction::Decrement: - // Saturated decrement - return std::max(old_stencil, 1) - 1; - - case FramebufferRegs::StencilAction::Invert: - return ~old_stencil; - - case FramebufferRegs::StencilAction::IncrementWrap: - return old_stencil + 1; - - case FramebufferRegs::StencilAction::DecrementWrap: - return old_stencil - 1; - - default: - LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); - UNIMPLEMENTED(); - return 0; - } -} - // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -539,34 +304,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int t = (int)(v * float24::FromFloat32(static_cast(texture.config.height))) .ToFloat32(); - static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode, - int val, unsigned size) { - switch (mode) { - case TexturingRegs::TextureConfig::ClampToEdge: - val = std::max(val, 0); - val = std::min(val, (int)size - 1); - return val; - - case TexturingRegs::TextureConfig::ClampToBorder: - return val; - - case TexturingRegs::TextureConfig::Repeat: - return (int)((unsigned)val % size); - - case TexturingRegs::TextureConfig::MirroredRepeat: { - unsigned int coord = ((unsigned)val % (2 * size)); - if (coord >= size) - coord = 2 * size - 1 - coord; - return (int)coord; - } - - default: - LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); - UNIMPLEMENTED(); - return 0; - } - }; - if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && (s < 0 || static_cast(s) >= texture.config.width)) || (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && @@ -615,9 +352,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve ++tev_stage_index) { const auto& tev_stage = tev_stages[tev_stage_index]; using Source = TexturingRegs::TevStageConfig::Source; - using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; - using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; - using Operation = TexturingRegs::TevStageConfig::Operation; auto GetSource = [&](Source source) -> Math::Vec4 { switch (source) { @@ -657,187 +391,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve } }; - static auto GetColorModifier = [](ColorModifier factor, - const Math::Vec4& values) -> Math::Vec3 { - switch (factor) { - case ColorModifier::SourceColor: - return values.rgb(); - - case ColorModifier::OneMinusSourceColor: - return (Math::Vec3(255, 255, 255) - values.rgb()).Cast(); - - case ColorModifier::SourceAlpha: - return values.aaa(); - - case ColorModifier::OneMinusSourceAlpha: - return (Math::Vec3(255, 255, 255) - values.aaa()).Cast(); - - case ColorModifier::SourceRed: - return values.rrr(); - - case ColorModifier::OneMinusSourceRed: - return (Math::Vec3(255, 255, 255) - values.rrr()).Cast(); - - case ColorModifier::SourceGreen: - return values.ggg(); - - case ColorModifier::OneMinusSourceGreen: - return (Math::Vec3(255, 255, 255) - values.ggg()).Cast(); - - case ColorModifier::SourceBlue: - return values.bbb(); - - case ColorModifier::OneMinusSourceBlue: - return (Math::Vec3(255, 255, 255) - values.bbb()).Cast(); - } - }; - - static auto GetAlphaModifier = [](AlphaModifier factor, - const Math::Vec4& values) -> u8 { - switch (factor) { - case AlphaModifier::SourceAlpha: - return values.a(); - - case AlphaModifier::OneMinusSourceAlpha: - return 255 - values.a(); - - case AlphaModifier::SourceRed: - return values.r(); - - case AlphaModifier::OneMinusSourceRed: - return 255 - values.r(); - - case AlphaModifier::SourceGreen: - return values.g(); - - case AlphaModifier::OneMinusSourceGreen: - return 255 - values.g(); - - case AlphaModifier::SourceBlue: - return values.b(); - - case AlphaModifier::OneMinusSourceBlue: - return 255 - values.b(); - } - }; - - static auto ColorCombine = [](Operation op, - const Math::Vec3 input[3]) -> Math::Vec3 { - switch (op) { - case Operation::Replace: - return input[0]; - - case Operation::Modulate: - return ((input[0] * input[1]) / 255).Cast(); - - case Operation::Add: { - auto result = input[0] + input[1]; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - return result.Cast(); - } - - case Operation::AddSigned: { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to - // (byte) 128 is correct - auto result = input[0].Cast() + input[1].Cast() - - Math::MakeVec(128, 128, 128); - result.r() = MathUtil::Clamp(result.r(), 0, 255); - result.g() = MathUtil::Clamp(result.g(), 0, 255); - result.b() = MathUtil::Clamp(result.b(), 0, 255); - return result.Cast(); - } - - case Operation::Lerp: - return ((input[0] * input[2] + - input[1] * - (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / - 255) - .Cast(); - - case Operation::Subtract: { - auto result = input[0].Cast() - input[1].Cast(); - result.r() = std::max(0, result.r()); - result.g() = std::max(0, result.g()); - result.b() = std::max(0, result.b()); - return result.Cast(); - } - - case Operation::MultiplyThenAdd: { - auto result = (input[0] * input[1] + 255 * input[2].Cast()) / 255; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - return result.Cast(); - } - - case Operation::AddThenMultiply: { - auto result = input[0] + input[1]; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - result = (result * input[2].Cast()) / 255; - return result.Cast(); - } - case Operation::Dot3_RGB: { - // Not fully accurate. - // Worst case scenario seems to yield a +/-3 error - // Some HW results indicate that the per-component computation can't have a - // higher precision than 1/256, - // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( - // (0x80,g0,b0),(0x80,g1,b1) ) give different results - int result = - ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + - ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + - ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; - result = std::max(0, std::min(255, result)); - return {(u8)result, (u8)result, (u8)result}; - } - default: - LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); - UNIMPLEMENTED(); - return {0, 0, 0}; - } - }; - - static auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { - switch (op) { - case Operation::Replace: - return input[0]; - - case Operation::Modulate: - return input[0] * input[1] / 255; - - case Operation::Add: - return std::min(255, input[0] + input[1]); - - case Operation::AddSigned: { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to - // (byte) 128 is correct - auto result = static_cast(input[0]) + static_cast(input[1]) - 128; - return static_cast(MathUtil::Clamp(result, 0, 255)); - } - - case Operation::Lerp: - return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; - - case Operation::Subtract: - return std::max(0, (int)input[0] - (int)input[1]); - - case Operation::MultiplyThenAdd: - return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); - - case Operation::AddThenMultiply: - return (std::min(255, (input[0] + input[1])) * input[2]) / 255; - - default: - LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); - UNIMPLEMENTED(); - return 0; - } - }; - // color combiner // NOTE: Not sure if the alpha combiner might use the color output of the previous // stage as input. Hence, we currently don't directly write the result to @@ -1152,56 +705,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve return combiner_output[channel]; }; - static auto EvaluateBlendEquation = []( - const Math::Vec4& src, const Math::Vec4& srcfactor, - const Math::Vec4& dest, const Math::Vec4& destfactor, - FramebufferRegs::BlendEquation equation) { - - Math::Vec4 result; - - auto src_result = (src * srcfactor).Cast(); - auto dst_result = (dest * destfactor).Cast(); - - switch (equation) { - case FramebufferRegs::BlendEquation::Add: - result = (src_result + dst_result) / 255; - break; - - case FramebufferRegs::BlendEquation::Subtract: - result = (src_result - dst_result) / 255; - break; - - case FramebufferRegs::BlendEquation::ReverseSubtract: - result = (dst_result - src_result) / 255; - break; - - // TODO: How do these two actually work? - // OpenGL doesn't include the blend factors in the min/max computations, - // but is this what the 3DS actually does? - case FramebufferRegs::BlendEquation::Min: - result.r() = std::min(src.r(), dest.r()); - result.g() = std::min(src.g(), dest.g()); - result.b() = std::min(src.b(), dest.b()); - result.a() = std::min(src.a(), dest.a()); - break; - - case FramebufferRegs::BlendEquation::Max: - result.r() = std::max(src.r(), dest.r()); - result.g() = std::max(src.g(), dest.g()); - result.b() = std::max(src.b(), dest.b()); - result.a() = std::max(src.a(), dest.a()); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); - UNIMPLEMENTED(); - } - - return Math::Vec4( - MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), - MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); - }; - auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), LookupFactor(1, params.factor_source_rgb), LookupFactor(2, params.factor_source_rgb), @@ -1218,58 +721,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve dstfactor, params.blend_equation_a) .a(); } else { - static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 { - switch (op) { - case FramebufferRegs::LogicOp::Clear: - return 0; - - case FramebufferRegs::LogicOp::And: - return src & dest; - - case FramebufferRegs::LogicOp::AndReverse: - return src & ~dest; - - case FramebufferRegs::LogicOp::Copy: - return src; - - case FramebufferRegs::LogicOp::Set: - return 255; - - case FramebufferRegs::LogicOp::CopyInverted: - return ~src; - - case FramebufferRegs::LogicOp::NoOp: - return dest; - - case FramebufferRegs::LogicOp::Invert: - return ~dest; - - case FramebufferRegs::LogicOp::Nand: - return ~(src & dest); - - case FramebufferRegs::LogicOp::Or: - return src | dest; - - case FramebufferRegs::LogicOp::Nor: - return ~(src | dest); - - case FramebufferRegs::LogicOp::Xor: - return src ^ dest; - - case FramebufferRegs::LogicOp::Equiv: - return ~(src ^ dest); - - case FramebufferRegs::LogicOp::AndInverted: - return ~src & dest; - - case FramebufferRegs::LogicOp::OrReverse: - return src | ~dest; - - case FramebufferRegs::LogicOp::OrInverted: - return ~src | dest; - } - }; - blend_output = Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), diff --git a/src/video_core/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h similarity index 100% rename from src/video_core/rasterizer.h rename to src/video_core/swrasterizer/rasterizer.h diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer/swrasterizer.cpp similarity index 81% rename from src/video_core/swrasterizer.cpp rename to src/video_core/swrasterizer/swrasterizer.cpp index 9cd21f72b5..402b705ddc 100644 --- a/src/video_core/swrasterizer.cpp +++ b/src/video_core/swrasterizer/swrasterizer.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "video_core/clipper.h" -#include "video_core/swrasterizer.h" +#include "video_core/swrasterizer/clipper.h" +#include "video_core/swrasterizer/swrasterizer.h" namespace VideoCore { diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h similarity index 100% rename from src/video_core/swrasterizer.h rename to src/video_core/swrasterizer/swrasterizer.h diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp new file mode 100644 index 0000000000..eb18e4ba48 --- /dev/null +++ b/src/video_core/swrasterizer/texturing.cpp @@ -0,0 +1,228 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/math_util.h" +#include "common/vector_math.h" +#include "video_core/regs_texturing.h" +#include "video_core/swrasterizer/texturing.h" + +namespace Pica { +namespace Rasterizer { + +using TevStageConfig = TexturingRegs::TevStageConfig; + +int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { + switch (mode) { + case TexturingRegs::TextureConfig::ClampToEdge: + val = std::max(val, 0); + val = std::min(val, (int)size - 1); + return val; + + case TexturingRegs::TextureConfig::ClampToBorder: + return val; + + case TexturingRegs::TextureConfig::Repeat: + return (int)((unsigned)val % size); + + case TexturingRegs::TextureConfig::MirroredRepeat: { + unsigned int coord = ((unsigned)val % (2 * size)); + if (coord >= size) + coord = 2 * size - 1 - coord; + return (int)coord; + } + + default: + LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); + UNIMPLEMENTED(); + return 0; + } +}; + +Math::Vec3 GetColorModifier(TevStageConfig::ColorModifier factor, + const Math::Vec4& values) { + using ColorModifier = TevStageConfig::ColorModifier; + + switch (factor) { + case ColorModifier::SourceColor: + return values.rgb(); + + case ColorModifier::OneMinusSourceColor: + return (Math::Vec3(255, 255, 255) - values.rgb()).Cast(); + + case ColorModifier::SourceAlpha: + return values.aaa(); + + case ColorModifier::OneMinusSourceAlpha: + return (Math::Vec3(255, 255, 255) - values.aaa()).Cast(); + + case ColorModifier::SourceRed: + return values.rrr(); + + case ColorModifier::OneMinusSourceRed: + return (Math::Vec3(255, 255, 255) - values.rrr()).Cast(); + + case ColorModifier::SourceGreen: + return values.ggg(); + + case ColorModifier::OneMinusSourceGreen: + return (Math::Vec3(255, 255, 255) - values.ggg()).Cast(); + + case ColorModifier::SourceBlue: + return values.bbb(); + + case ColorModifier::OneMinusSourceBlue: + return (Math::Vec3(255, 255, 255) - values.bbb()).Cast(); + } +}; + +u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4& values) { + using AlphaModifier = TevStageConfig::AlphaModifier; + + switch (factor) { + case AlphaModifier::SourceAlpha: + return values.a(); + + case AlphaModifier::OneMinusSourceAlpha: + return 255 - values.a(); + + case AlphaModifier::SourceRed: + return values.r(); + + case AlphaModifier::OneMinusSourceRed: + return 255 - values.r(); + + case AlphaModifier::SourceGreen: + return values.g(); + + case AlphaModifier::OneMinusSourceGreen: + return 255 - values.g(); + + case AlphaModifier::SourceBlue: + return values.b(); + + case AlphaModifier::OneMinusSourceBlue: + return 255 - values.b(); + } +}; + +Math::Vec3 ColorCombine(TevStageConfig::Operation op, const Math::Vec3 input[3]) { + using Operation = TevStageConfig::Operation; + + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return ((input[0] * input[1]) / 255).Cast(); + + case Operation::Add: { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast(); + } + + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to + // (byte) 128 is correct + auto result = + input[0].Cast() + input[1].Cast() - Math::MakeVec(128, 128, 128); + result.r() = MathUtil::Clamp(result.r(), 0, 255); + result.g() = MathUtil::Clamp(result.g(), 0, 255); + result.b() = MathUtil::Clamp(result.b(), 0, 255); + return result.Cast(); + } + + case Operation::Lerp: + return ((input[0] * input[2] + + input[1] * (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / + 255) + .Cast(); + + case Operation::Subtract: { + auto result = input[0].Cast() - input[1].Cast(); + result.r() = std::max(0, result.r()); + result.g() = std::max(0, result.g()); + result.b() = std::max(0, result.b()); + return result.Cast(); + } + + case Operation::MultiplyThenAdd: { + auto result = (input[0] * input[1] + 255 * input[2].Cast()) / 255; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast(); + } + + case Operation::AddThenMultiply: { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + result = (result * input[2].Cast()) / 255; + return result.Cast(); + } + case Operation::Dot3_RGB: { + // Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results + // indicate that the per-component computation can't have a higher precision than 1/256, + // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give + // different results. + int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + + ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + + ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; + result = std::max(0, std::min(255, result)); + return {(u8)result, (u8)result, (u8)result}; + } + default: + LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); + UNIMPLEMENTED(); + return {0, 0, 0}; + } +}; + +u8 AlphaCombine(TevStageConfig::Operation op, const std::array& input) { + switch (op) { + using Operation = TevStageConfig::Operation; + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return input[0] * input[1] / 255; + + case Operation::Add: + return std::min(255, input[0] + input[1]); + + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = static_cast(input[0]) + static_cast(input[1]) - 128; + return static_cast(MathUtil::Clamp(result, 0, 255)); + } + + case Operation::Lerp: + return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + + case Operation::Subtract: + return std::max(0, (int)input[0] - (int)input[1]); + + case Operation::MultiplyThenAdd: + return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); + + case Operation::AddThenMultiply: + return (std::min(255, (input[0] + input[1])) * input[2]) / 255; + + default: + LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); + UNIMPLEMENTED(); + return 0; + } +}; + +} // namespace Rasterizer +} // namespace Pica diff --git a/src/video_core/swrasterizer/texturing.h b/src/video_core/swrasterizer/texturing.h new file mode 100644 index 0000000000..24f74a5a33 --- /dev/null +++ b/src/video_core/swrasterizer/texturing.h @@ -0,0 +1,28 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/vector_math.h" +#include "video_core/regs_texturing.h" + +namespace Pica { +namespace Rasterizer { + +int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size); + +Math::Vec3 GetColorModifier(TexturingRegs::TevStageConfig::ColorModifier factor, + const Math::Vec4& values); + +u8 GetAlphaModifier(TexturingRegs::TevStageConfig::AlphaModifier factor, + const Math::Vec4& values); + +Math::Vec3 ColorCombine(TexturingRegs::TevStageConfig::Operation op, + const Math::Vec3 input[3]); + +u8 AlphaCombine(TexturingRegs::TevStageConfig::Operation op, const std::array& input); + +} // namespace Rasterizer +} // namespace Pica