pica/command_processor: build geometry pipeline and run geometry shader

The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes:
 - no GS mode: sends VS output directly to the primitive assembler (what citra currently does)
 - GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler
 - GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size.
 - GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size.
hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode.
In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input.
In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense).
Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future.
This commit is contained in:
wwylele 2017-08-04 17:03:17 +03:00
parent 8285ca4ad8
commit 0f35755572
6 changed files with 383 additions and 28 deletions

View File

@ -1,6 +1,7 @@
set(SRCS set(SRCS
command_processor.cpp command_processor.cpp
debug_utils/debug_utils.cpp debug_utils/debug_utils.cpp
geometry_pipeline.cpp
pica.cpp pica.cpp
primitive_assembly.cpp primitive_assembly.cpp
regs.cpp regs.cpp
@ -29,6 +30,7 @@ set(SRCS
set(HEADERS set(HEADERS
command_processor.h command_processor.h
debug_utils/debug_utils.h debug_utils/debug_utils.h
geometry_pipeline.h
gpu_debugger.h gpu_debugger.h
pica.h pica.h
pica_state.h pica_state.h

View File

@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
g_state.immediate.current_attribute = 0; g_state.immediate.current_attribute = 0;
g_state.immediate.reset_geometry_pipeline = true;
default_attr_counter = 0; default_attr_counter = 0;
break; break;
@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output); shader_unit.WriteOutput(regs.vs, output);
// Send to renderer // Send to geometry pipeline
using Pica::Shader::OutputVertex; if (g_state.immediate.reset_geometry_pipeline) {
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, g_state.geometry_pipeline.Reconfigure();
const OutputVertex& v2) { g_state.immediate.reset_geometry_pipeline = false;
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }
}; ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
g_state.geometry_pipeline.Setup(shader_engine);
g_state.primitive_assembler.SubmitVertex( g_state.geometry_pipeline.SubmitVertex(output);
Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output),
AddTriangle);
} }
} }
} }
@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// The size has been tuned for optimal balance between hit-rate and the cost of lookup // The size has been tuned for optimal balance between hit-rate and the cost of lookup
const size_t VERTEX_CACHE_SIZE = 32; const size_t VERTEX_CACHE_SIZE = 32;
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
Shader::OutputVertex output_vertex; Shader::AttributeBuffer vs_output;
unsigned int vertex_cache_pos = 0; unsigned int vertex_cache_pos = 0;
vertex_cache_ids.fill(-1); vertex_cache_ids.fill(-1);
@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
g_state.geometry_pipeline.Reconfigure();
g_state.geometry_pipeline.Setup(shader_engine);
if (g_state.geometry_pipeline.NeedIndexInput())
ASSERT(is_indexed);
for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset // Indexed rendering doesn't use the start offset
unsigned int vertex = unsigned int vertex =
@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
bool vertex_cache_hit = false; bool vertex_cache_hit = false;
if (is_indexed) { if (is_indexed) {
if (g_state.geometry_pipeline.NeedIndexInput()) {
g_state.geometry_pipeline.SubmitIndex(vertex);
continue;
}
if (g_debug_context && Pica::g_debug_context->recorder) { if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1; int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index, memory_accesses.AddAccess(base_address + index_info.offset + size * index,
@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex == vertex_cache_ids[i]) { if (vertex == vertex_cache_ids[i]) {
output_vertex = vertex_cache[i]; vs_output = vertex_cache[i];
vertex_cache_hit = true; vertex_cache_hit = true;
break; break;
} }
@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) { if (!vertex_cache_hit) {
// Initialize data for the current vertex // Initialize data for the current vertex
Shader::AttributeBuffer input, output{}; Shader::AttributeBuffer input;
loader.LoadVertex(base_address, index, vertex, input, memory_accesses); loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
// Send to vertex shader // Send to vertex shader
@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
(void*)&input); (void*)&input);
shader_unit.LoadInput(regs.vs, input); shader_unit.LoadInput(regs.vs, input);
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output); shader_unit.WriteOutput(regs.vs, vs_output);
// Retrieve vertex from register data
output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output);
if (is_indexed) { if (is_indexed) {
vertex_cache[vertex_cache_pos] = output_vertex; vertex_cache[vertex_cache_pos] = vs_output;
vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
} }
} }
// Send to renderer // Send to geometry pipeline
using Pica::Shader::OutputVertex; g_state.geometry_pipeline.SubmitVertex(vs_output);
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
} }
for (auto& range : memory_accesses.ranges) { for (auto& range : memory_accesses.ranges) {

View File

@ -0,0 +1,274 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/geometry_pipeline.h"
#include "video_core/pica_state.h"
#include "video_core/regs.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Pica {
/// An attribute buffering interface for different pipeline modes
class GeometryPipelineBackend {
public:
virtual ~GeometryPipelineBackend() = default;
/// Checks if there is no incomplete data transfer
virtual bool IsEmpty() const = 0;
/// Checks if the pipeline needs a direct input from index buffer
virtual bool NeedIndexInput() const = 0;
/// Submits an index from index buffer
virtual void SubmitIndex(unsigned int val) = 0;
/**
* Submits vertex attributes
* @param input attributes of a vertex output from vertex shader
* @return if the buffer is full and the geometry shader should be invoked
*/
virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0;
};
// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit.
// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is
// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry
// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices
// for one geometry shader invocation.
// TODO: what happens when the input size is not divisible by the output size?
class GeometryPipeline_Point : public GeometryPipelineBackend {
public:
GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 0);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
size_t gs_input_num = regs.gs.max_input_attribute_index + 1;
ASSERT(gs_input_num % vs_output_num == 0);
buffer_cur = attribute_buffer.attr;
buffer_end = attribute_buffer.attr + gs_input_num;
}
bool IsEmpty() const override {
return buffer_cur == attribute_buffer.attr;
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(unsigned int val) override {
UNREACHABLE();
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = attribute_buffer.attr;
unit.LoadInput(regs.gs, attribute_buffer);
return true;
}
return false;
}
private:
const Regs& regs;
Shader::GSUnitState& unit;
Shader::AttributeBuffer attribute_buffer;
Math::Vec4<float24>* buffer_cur;
Math::Vec4<float24>* buffer_end;
unsigned int vs_output_num;
};
// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the
// geometry shader unit. The number of vertex is variable, which is specified by the first index
// value in the batch. This mode is usually used for subdivision.
class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup)
: regs(regs), setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 1);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
}
bool IsEmpty() const override {
return need_index;
}
bool NeedIndexInput() const override {
return need_index;
}
void SubmitIndex(unsigned int val) override {
DEBUG_ASSERT(need_index);
// The number of vertex input is put to the uniform register
float24 vertex_num = float24::FromFloat32(val);
setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
// The second uniform register and so on are used for receiving input vertices
buffer_cur = setup.uniforms.f + 1;
main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1;
total_vertex_num = val;
need_index = false;
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
DEBUG_ASSERT(!need_index);
if (main_vertex_num != 0) {
// For main vertices, receive all attributes
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
--main_vertex_num;
} else {
// For other vertices, only receive the first attribute (usually the position)
*(buffer_cur++) = input.attr[0];
}
--total_vertex_num;
if (total_vertex_num == 0) {
need_index = true;
return true;
}
return false;
}
private:
bool need_index = true;
const Regs& regs;
Shader::ShaderSetup& setup;
unsigned int main_vertex_num;
unsigned int total_vertex_num;
Math::Vec4<float24>* buffer_cur;
unsigned int vs_output_num;
};
// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry
// shader unit. The number of vertex per shader invocation is constant. This is usually used for
// particle system.
class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup)
: regs(regs), setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1);
size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1;
buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index;
buffer_end = buffer_begin + vs_output_num * vertex_num;
}
bool IsEmpty() const override {
return buffer_cur == buffer_begin;
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(unsigned int val) override {
UNREACHABLE();
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = buffer_begin;
return true;
}
return false;
}
private:
const Regs& regs;
Shader::ShaderSetup& setup;
Math::Vec4<float24>* buffer_begin;
Math::Vec4<float24>* buffer_cur;
Math::Vec4<float24>* buffer_end;
unsigned int vs_output_num;
};
GeometryPipeline::GeometryPipeline(State& state) : state(state) {}
GeometryPipeline::~GeometryPipeline() = default;
void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) {
this->vertex_handler = vertex_handler;
}
void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) {
if (!backend)
return;
this->shader_engine = shader_engine;
shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset);
}
void GeometryPipeline::Reconfigure() {
ASSERT(!backend || backend->IsEmpty());
if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) {
backend = nullptr;
return;
}
ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes);
// The following assumes that when geometry shader is in use, the shader unit 3 is configured as
// a geometry shader unit.
// TODO: what happens if this is not true?
ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1);
ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS);
state.gs_unit.ConfigOutput(state.regs.gs);
ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a ==
state.regs.pipeline.vs_outmap_total_minus_1_b);
switch (state.regs.pipeline.gs_config.mode) {
case PipelineRegs::GSMode::Point:
backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit);
break;
case PipelineRegs::GSMode::VariablePrimitive:
backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs);
break;
case PipelineRegs::GSMode::FixedPrimitive:
backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs);
break;
default:
UNREACHABLE();
}
}
bool GeometryPipeline::NeedIndexInput() const {
if (!backend)
return false;
return backend->NeedIndexInput();
}
void GeometryPipeline::SubmitIndex(unsigned int val) {
backend->SubmitIndex(val);
}
void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) {
if (!backend) {
// No backend means the geometry shader is disabled, so we send the vertex shader output
// directly to the primitive assembler.
vertex_handler(input);
} else {
if (backend->SubmitVertex(input)) {
shader_engine->Run(state.gs, state.gs_unit);
// The uniform b15 is set to true after every geometry shader invocation. This is useful
// for the shader to know if this is the first invocation in a batch, if the program set
// b15 to false first.
state.gs.uniforms.b[15] = true;
}
}
}
} // namespace Pica

View File

@ -0,0 +1,49 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/shader/shader.h"
namespace Pica {
struct State;
class GeometryPipelineBackend;
/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler
class GeometryPipeline {
public:
explicit GeometryPipeline(State& state);
~GeometryPipeline();
/// Sets the handler for receiving vertex outputs from vertex shader
void SetVertexHandler(Shader::VertexHandler vertex_handler);
/**
* Setup the geometry shader unit if it is in use
* @param shader_engine the shader engine for the geometry shader to run
*/
void Setup(Shader::ShaderEngine* shader_engine);
/// Reconfigures the pipeline according to current register settings
void Reconfigure();
/// Checks if the pipeline needs a direct input from index buffer
bool NeedIndexInput() const;
/// Submits an index from index buffer. Call this only when NeedIndexInput returns true
void SubmitIndex(unsigned int val);
/// Submits vertex attributes output from vertex shader
void SubmitVertex(const Shader::AttributeBuffer& input);
private:
Shader::VertexHandler vertex_handler;
Shader::ShaderEngine* shader_engine;
std::unique_ptr<GeometryPipelineBackend> backend;
State& state;
};
} // namespace Pica

View File

@ -3,9 +3,11 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <cstring> #include <cstring>
#include "video_core/geometry_pipeline.h"
#include "video_core/pica.h" #include "video_core/pica.h"
#include "video_core/pica_state.h" #include "video_core/pica_state.h"
#include "video_core/regs_pipeline.h" #include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Pica { namespace Pica {
@ -24,6 +26,23 @@ void Zero(T& o) {
memset(&o, 0, sizeof(o)); memset(&o, 0, sizeof(o));
} }
State::State() : geometry_pipeline(*this) {
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
using Pica::Shader::OutputVertex;
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
primitive_assembler.SubmitVertex(
Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle);
};
auto SetWinding = [this]() { primitive_assembler.SetWinding(); };
g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding);
g_state.geometry_pipeline.SetVertexHandler(SubmitVertex);
}
void State::Reset() { void State::Reset() {
Zero(regs); Zero(regs);
Zero(vs); Zero(vs);

View File

@ -8,6 +8,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/vector_math.h" #include "common/vector_math.h"
#include "video_core/geometry_pipeline.h"
#include "video_core/primitive_assembly.h" #include "video_core/primitive_assembly.h"
#include "video_core/regs.h" #include "video_core/regs.h"
#include "video_core/shader/shader.h" #include "video_core/shader/shader.h"
@ -16,6 +17,7 @@ namespace Pica {
/// Struct used to describe current Pica state /// Struct used to describe current Pica state
struct State { struct State {
State();
void Reset(); void Reset();
/// Pica registers /// Pica registers
@ -137,8 +139,17 @@ struct State {
Shader::AttributeBuffer input_vertex; Shader::AttributeBuffer input_vertex;
// Index of the next attribute to be loaded into `input_vertex`. // Index of the next attribute to be loaded into `input_vertex`.
u32 current_attribute = 0; u32 current_attribute = 0;
// Indicates the immediate mode just started and the geometry pipeline needs to reconfigure
bool reset_geometry_pipeline = true;
} immediate; } immediate;
// the geometry shader needs to be kept in the global state because some shaders relie on
// preserved register value across shader invocation.
// TODO: also bring the three vertex shader units here and implement the shader scheduler.
Shader::GSUnitState gs_unit;
GeometryPipeline geometry_pipeline;
// This is constructed with a dummy triangle topology // This is constructed with a dummy triangle topology
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
}; };