From 0f35755572fe63534813528de9a0710193f2e335 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 4 Aug 2017 17:03:17 +0300 Subject: [PATCH] pica/command_processor: build geometry pipeline and run geometry shader The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes: - no GS mode: sends VS output directly to the primitive assembler (what citra currently does) - GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler - GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size. - GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size. hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode. In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input. In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense). Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future. --- src/video_core/CMakeLists.txt | 2 + src/video_core/command_processor.cpp | 54 +++--- src/video_core/geometry_pipeline.cpp | 274 +++++++++++++++++++++++++++ src/video_core/geometry_pipeline.h | 49 +++++ src/video_core/pica.cpp | 21 +- src/video_core/pica_state.h | 11 ++ 6 files changed, 383 insertions(+), 28 deletions(-) create mode 100644 src/video_core/geometry_pipeline.cpp create mode 100644 src/video_core/geometry_pipeline.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cffa4c952..82f47d8a9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ set(SRCS command_processor.cpp debug_utils/debug_utils.cpp + geometry_pipeline.cpp pica.cpp primitive_assembly.cpp regs.cpp @@ -29,6 +30,7 @@ set(SRCS set(HEADERS command_processor.h debug_utils/debug_utils.h + geometry_pipeline.h gpu_debugger.h pica.h pica_state.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f98ca3302..fb65a3a0a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): g_state.immediate.current_attribute = 0; + g_state.immediate.reset_geometry_pipeline = true; default_attr_counter = 0; break; @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->Run(g_state.vs, shader_unit); shader_unit.WriteOutput(regs.vs, output); - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - g_state.primitive_assembler.SubmitVertex( - Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), - AddTriangle); + // Send to geometry pipeline + if (g_state.immediate.reset_geometry_pipeline) { + g_state.geometry_pipeline.Reconfigure(); + g_state.immediate.reset_geometry_pipeline = false; + } + ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); + g_state.geometry_pipeline.Setup(shader_engine); + g_state.geometry_pipeline.SubmitVertex(output); } } } @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // The size has been tuned for optimal balance between hit-rate and the cost of lookup const size_t VERTEX_CACHE_SIZE = 32; std::array vertex_cache_ids; - std::array vertex_cache; - Shader::OutputVertex output_vertex; + std::array vertex_cache; + Shader::AttributeBuffer vs_output; unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + g_state.geometry_pipeline.Reconfigure(); + g_state.geometry_pipeline.Setup(shader_engine); + if (g_state.geometry_pipeline.NeedIndexInput()) + ASSERT(is_indexed); + for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { // Indexed rendering doesn't use the start offset unsigned int vertex = @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { bool vertex_cache_hit = false; if (is_indexed) { + if (g_state.geometry_pipeline.NeedIndexInput()) { + g_state.geometry_pipeline.SubmitIndex(vertex); + continue; + } + if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; memory_accesses.AddAccess(base_address + index_info.offset + size * index, @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { if (vertex == vertex_cache_ids[i]) { - output_vertex = vertex_cache[i]; + vs_output = vertex_cache[i]; vertex_cache_hit = true; break; } @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex - Shader::AttributeBuffer input, output{}; + Shader::AttributeBuffer input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); // Send to vertex shader @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { (void*)&input); shader_unit.LoadInput(regs.vs, input); shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Retrieve vertex from register data - output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); + shader_unit.WriteOutput(regs.vs, vs_output); if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_vertex; + vertex_cache[vertex_cache_pos] = vs_output; vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; } } - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + // Send to geometry pipeline + g_state.geometry_pipeline.SubmitVertex(vs_output); } for (auto& range : memory_accesses.ranges) { diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp new file mode 100644 index 000000000..b146e2ecb --- /dev/null +++ b/src/video_core/geometry_pipeline.cpp @@ -0,0 +1,274 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/geometry_pipeline.h" +#include "video_core/pica_state.h" +#include "video_core/regs.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" + +namespace Pica { + +/// An attribute buffering interface for different pipeline modes +class GeometryPipelineBackend { +public: + virtual ~GeometryPipelineBackend() = default; + + /// Checks if there is no incomplete data transfer + virtual bool IsEmpty() const = 0; + + /// Checks if the pipeline needs a direct input from index buffer + virtual bool NeedIndexInput() const = 0; + + /// Submits an index from index buffer + virtual void SubmitIndex(unsigned int val) = 0; + + /** + * Submits vertex attributes + * @param input attributes of a vertex output from vertex shader + * @return if the buffer is full and the geometry shader should be invoked + */ + virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; +}; + +// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. +// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is +// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry +// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices +// for one geometry shader invocation. +// TODO: what happens when the input size is not divisible by the output size? +class GeometryPipeline_Point : public GeometryPipelineBackend { +public: + GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 0); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + size_t gs_input_num = regs.gs.max_input_attribute_index + 1; + ASSERT(gs_input_num % vs_output_num == 0); + buffer_cur = attribute_buffer.attr; + buffer_end = attribute_buffer.attr + gs_input_num; + } + + bool IsEmpty() const override { + return buffer_cur == attribute_buffer.attr; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = attribute_buffer.attr; + unit.LoadInput(regs.gs, attribute_buffer); + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::GSUnitState& unit; + Shader::AttributeBuffer attribute_buffer; + Math::Vec4* buffer_cur; + Math::Vec4* buffer_end; + unsigned int vs_output_num; +}; + +// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the +// geometry shader unit. The number of vertex is variable, which is specified by the first index +// value in the batch. This mode is usually used for subdivision. +class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 1); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + } + + bool IsEmpty() const override { + return need_index; + } + + bool NeedIndexInput() const override { + return need_index; + } + + void SubmitIndex(unsigned int val) override { + DEBUG_ASSERT(need_index); + + // The number of vertex input is put to the uniform register + float24 vertex_num = float24::FromFloat32(val); + setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); + + // The second uniform register and so on are used for receiving input vertices + buffer_cur = setup.uniforms.f + 1; + + main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; + total_vertex_num = val; + need_index = false; + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + DEBUG_ASSERT(!need_index); + if (main_vertex_num != 0) { + // For main vertices, receive all attributes + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + --main_vertex_num; + } else { + // For other vertices, only receive the first attribute (usually the position) + *(buffer_cur++) = input.attr[0]; + } + --total_vertex_num; + + if (total_vertex_num == 0) { + need_index = true; + return true; + } + + return false; + } + +private: + bool need_index = true; + const Regs& regs; + Shader::ShaderSetup& setup; + unsigned int main_vertex_num; + unsigned int total_vertex_num; + Math::Vec4* buffer_cur; + unsigned int vs_output_num; +}; + +// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry +// shader unit. The number of vertex per shader invocation is constant. This is usually used for +// particle system. +class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); + size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; + buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; + buffer_end = buffer_begin + vs_output_num * vertex_num; + } + + bool IsEmpty() const override { + return buffer_cur == buffer_begin; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = buffer_begin; + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::ShaderSetup& setup; + Math::Vec4* buffer_begin; + Math::Vec4* buffer_cur; + Math::Vec4* buffer_end; + unsigned int vs_output_num; +}; + +GeometryPipeline::GeometryPipeline(State& state) : state(state) {} + +GeometryPipeline::~GeometryPipeline() = default; + +void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { + this->vertex_handler = vertex_handler; +} + +void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { + if (!backend) + return; + + this->shader_engine = shader_engine; + shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); +} + +void GeometryPipeline::Reconfigure() { + ASSERT(!backend || backend->IsEmpty()); + + if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { + backend = nullptr; + return; + } + + ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); + + // The following assumes that when geometry shader is in use, the shader unit 3 is configured as + // a geometry shader unit. + // TODO: what happens if this is not true? + ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); + ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); + + state.gs_unit.ConfigOutput(state.regs.gs); + + ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == + state.regs.pipeline.vs_outmap_total_minus_1_b); + + switch (state.regs.pipeline.gs_config.mode) { + case PipelineRegs::GSMode::Point: + backend = std::make_unique(state.regs, state.gs_unit); + break; + case PipelineRegs::GSMode::VariablePrimitive: + backend = std::make_unique(state.regs, state.gs); + break; + case PipelineRegs::GSMode::FixedPrimitive: + backend = std::make_unique(state.regs, state.gs); + break; + default: + UNREACHABLE(); + } +} + +bool GeometryPipeline::NeedIndexInput() const { + if (!backend) + return false; + return backend->NeedIndexInput(); +} + +void GeometryPipeline::SubmitIndex(unsigned int val) { + backend->SubmitIndex(val); +} + +void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { + if (!backend) { + // No backend means the geometry shader is disabled, so we send the vertex shader output + // directly to the primitive assembler. + vertex_handler(input); + } else { + if (backend->SubmitVertex(input)) { + shader_engine->Run(state.gs, state.gs_unit); + + // The uniform b15 is set to true after every geometry shader invocation. This is useful + // for the shader to know if this is the first invocation in a batch, if the program set + // b15 to false first. + state.gs.uniforms.b[15] = true; + } + } +} + +} // namespace Pica diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h new file mode 100644 index 000000000..91fdd3192 --- /dev/null +++ b/src/video_core/geometry_pipeline.h @@ -0,0 +1,49 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "video_core/shader/shader.h" + +namespace Pica { + +struct State; + +class GeometryPipelineBackend; + +/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler +class GeometryPipeline { +public: + explicit GeometryPipeline(State& state); + ~GeometryPipeline(); + + /// Sets the handler for receiving vertex outputs from vertex shader + void SetVertexHandler(Shader::VertexHandler vertex_handler); + + /** + * Setup the geometry shader unit if it is in use + * @param shader_engine the shader engine for the geometry shader to run + */ + void Setup(Shader::ShaderEngine* shader_engine); + + /// Reconfigures the pipeline according to current register settings + void Reconfigure(); + + /// Checks if the pipeline needs a direct input from index buffer + bool NeedIndexInput() const; + + /// Submits an index from index buffer. Call this only when NeedIndexInput returns true + void SubmitIndex(unsigned int val); + + /// Submits vertex attributes output from vertex shader + void SubmitVertex(const Shader::AttributeBuffer& input); + +private: + Shader::VertexHandler vertex_handler; + Shader::ShaderEngine* shader_engine; + std::unique_ptr backend; + State& state; +}; +} // namespace Pica diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index b95148a6a..218e06883 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -3,9 +3,11 @@ // Refer to the license.txt file included. #include +#include "video_core/geometry_pipeline.h" #include "video_core/pica.h" #include "video_core/pica_state.h" -#include "video_core/regs_pipeline.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Pica { @@ -24,6 +26,23 @@ void Zero(T& o) { memset(&o, 0, sizeof(o)); } +State::State() : geometry_pipeline(*this) { + auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { + using Pica::Shader::OutputVertex; + auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; + primitive_assembler.SubmitVertex( + Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); + }; + + auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; + + g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); + g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); +} + void State::Reset() { Zero(regs); Zero(vs); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 864a2c9e6..c6634a0bc 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -8,6 +8,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "common/vector_math.h" +#include "video_core/geometry_pipeline.h" #include "video_core/primitive_assembly.h" #include "video_core/regs.h" #include "video_core/shader/shader.h" @@ -16,6 +17,7 @@ namespace Pica { /// Struct used to describe current Pica state struct State { + State(); void Reset(); /// Pica registers @@ -137,8 +139,17 @@ struct State { Shader::AttributeBuffer input_vertex; // Index of the next attribute to be loaded into `input_vertex`. u32 current_attribute = 0; + // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure + bool reset_geometry_pipeline = true; } immediate; + // the geometry shader needs to be kept in the global state because some shaders relie on + // preserved register value across shader invocation. + // TODO: also bring the three vertex shader units here and implement the shader scheduler. + Shader::GSUnitState gs_unit; + + GeometryPipeline geometry_pipeline; + // This is constructed with a dummy triangle topology PrimitiveAssembler primitive_assembler; };