shader: Add integer attribute get optimization pass

Works around an nvidia driver bug, where casting the integer attributes to float and back to an integer always returned 0.
This commit is contained in:
ameerj 2021-12-24 20:00:28 -05:00
parent 640fc1418b
commit 14ac0c2923
9 changed files with 86 additions and 0 deletions

View File

@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
}
}
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) {
switch (attr) {
case IR::Attribute::PrimitiveId:
ctx.Add("MOV.S {}.x,primitive.id;", inst);
break;
case IR::Attribute::InstanceId:
ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
break;
case IR::Attribute::VertexId:
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
[[maybe_unused]] ScalarU32 vertex) {
const u32 element{static_cast<u32>(attr) % 4};

View File

@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);

View File

@ -221,6 +221,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
}
}
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) {
switch (attr) {
case IR::Attribute::PrimitiveId:
ctx.AddU32("{}=uint(gl_PrimitiveID);", inst);
break;
case IR::Attribute::InstanceId:
ctx.AddU32("{}=uint(gl_InstanceID);", inst);
break;
case IR::Attribute::VertexId:
ctx.AddU32("{}=uint(gl_VertexID);", inst);
break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
[[maybe_unused]] std::string_view vertex) {
if (IR::IsGeneric(attr)) {

View File

@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
const IR::Value& offset);
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
std::string_view vertex);
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
std::string_view vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
std::string_view vertex);
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,

View File

@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
}
}
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
switch (attr) {
case IR::Attribute::PrimitiveId:
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
} else {
const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
return ctx.OpISub(ctx.U32[1], index, base);
}
case IR::Attribute::VertexId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
} else {
const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
return ctx.OpISub(ctx.U32[1], index, base);
}
default:
throw NotImplementedException("Read U32 attribute {}", attr);
}
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
if (!output) {

View File

@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);

View File

@ -40,6 +40,7 @@ OPCODE(GetCbufU32, U32, U32,
OPCODE(GetCbufF32, F32, U32, U32, )
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
OPCODE(GetAttribute, F32, Attribute, U32, )
OPCODE(GetAttributeU32, U32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
OPCODE(GetAttributeIndexed, F32, U32, U32, )
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )

View File

@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
info.uses_demote_to_helper_invocation = true;
break;
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32:
info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
break;
case IR::Opcode::SetAttribute:

View File

@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
return;
}
}
if constexpr (op == IR::Opcode::BitCastU32F32) {
// Workaround for new NVIDIA driver bug, where:
// uint attr = ftou(itof(gl_InstanceID));
// always returned 0.
// We can instead manually optimize this and work around the driver bug:
// uint attr = uint(gl_InstanceID);
if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) {
const IR::Attribute attr{arg_inst->Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PrimitiveId:
case IR::Attribute::InstanceId:
case IR::Attribute::VertexId:
break;
default:
return;
}
// Replace the bitcasts with an integer attribute get
inst.ReplaceOpcode(IR::Opcode::GetAttributeU32);
inst.SetArg(0, arg_inst->Arg(0));
inst.SetArg(1, arg_inst->Arg(1));
return;
}
}
}
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {