mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
shader: Implement FSWZADD
This commit is contained in:
parent
34aba9627a
commit
6c51f49632
14 changed files with 87 additions and 4 deletions
|
@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC
|
|||
frontend/maxwell/translate/impl/floating_point_multiply.cpp
|
||||
frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
|
||||
frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
|
||||
frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
|
||||
frontend/maxwell/translate/impl/half_floating_point_add.cpp
|
||||
frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
|
||||
frontend/maxwell/translate/impl/half_floating_point_helper.cpp
|
||||
|
|
|
@ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
subgroup_local_invocation_id =
|
||||
DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
|
||||
}
|
||||
if (info.uses_fswzadd) {
|
||||
const Id f32_one{Constant(F32[1], 1.0f)};
|
||||
const Id f32_minus_one{Constant(F32[1], -1.0f)};
|
||||
const Id f32_zero{Constant(F32[1], 0.0f)};
|
||||
fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
|
||||
fswzadd_lut_b =
|
||||
ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
|
||||
}
|
||||
if (info.loads_position) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
|
|
|
@ -103,6 +103,8 @@ public:
|
|||
Id vertex_index{};
|
||||
Id base_vertex{};
|
||||
Id front_face{};
|
||||
Id fswzadd_lut_a{};
|
||||
Id fswzadd_lut_b{};
|
||||
|
||||
Id local_memory{};
|
||||
|
||||
|
|
|
@ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
|
|||
Id segmentation_mask);
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask);
|
||||
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id
|
|||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
|
||||
const Id three{ctx.Constant(ctx.U32[1], 3)};
|
||||
Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
|
||||
mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
|
||||
mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1));
|
||||
mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
|
||||
mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
|
||||
|
||||
const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
|
||||
const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
|
||||
|
||||
const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
|
||||
const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
|
||||
return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons
|
|||
const IR::U32& seg_mask) {
|
||||
return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
|
||||
}
|
||||
F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
|
||||
return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
|
||||
}
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -277,6 +277,8 @@ public:
|
|||
const IR::U32& seg_mask);
|
||||
[[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
|
||||
const IR::U32& clamp, const IR::U32& seg_mask);
|
||||
[[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
|
||||
FpControl control = {});
|
||||
|
||||
private:
|
||||
IR::Block::iterator insertion_point;
|
||||
|
|
|
@ -408,3 +408,4 @@ OPCODE(ShuffleIndex, U32, U32,
|
|||
OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
|
||||
OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
|
||||
OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
|
||||
OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::FSWZADD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<28, 8, u64> swizzle;
|
||||
BitField<38, 1, u64> ndv;
|
||||
BitField<39, 2, FpRounding> round;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const fswzadd{insn};
|
||||
|
||||
if (fswzadd.ndv != 0) {
|
||||
throw NotImplementedException("FSWZADD NDV");
|
||||
}
|
||||
|
||||
const IR::F32 src_a{GetFloatReg8(insn)};
|
||||
const IR::F32 src_b{GetFloatReg20(insn)};
|
||||
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
|
||||
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction{false},
|
||||
.rounding{CastFpRounding(fswzadd.round)},
|
||||
.fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
||||
};
|
||||
|
||||
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
|
||||
F(fswzadd.dest_reg, result);
|
||||
|
||||
if (fswzadd.cc != 0) {
|
||||
throw NotImplementedException("FSWZADD CC");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
|
|||
return X(reg.index);
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
|
||||
return ir.BitCast<IR::F32>(GetReg8(insn));
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
|
||||
return ir.BitCast<IR::F32>(GetReg20(insn));
|
||||
}
|
||||
|
|
|
@ -353,6 +353,7 @@ public:
|
|||
[[nodiscard]] IR::U32 GetReg8(u64 insn);
|
||||
[[nodiscard]] IR::U32 GetReg20(u64 insn);
|
||||
[[nodiscard]] IR::U32 GetReg39(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
|
||||
[[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
|
||||
|
|
|
@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) {
|
|||
ThrowNotImplemented(Opcode::FCHK_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FSWZADD(u64) {
|
||||
ThrowNotImplemented(Opcode::FSWZADD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::GETCRSPTR(u64) {
|
||||
ThrowNotImplemented(Opcode::GETCRSPTR);
|
||||
}
|
||||
|
|
|
@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::SubgroupBallot:
|
||||
info.uses_subgroup_vote = true;
|
||||
break;
|
||||
case IR::Opcode::FSwizzleAdd:
|
||||
info.uses_fswzadd = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -94,6 +94,7 @@ struct Info {
|
|||
bool uses_sparse_residency{};
|
||||
bool uses_demote_to_helper_invocation{};
|
||||
bool uses_subgroup_vote{};
|
||||
bool uses_fswzadd{};
|
||||
|
||||
IR::Type used_constant_buffer_types{};
|
||||
|
||||
|
|
Loading…
Reference in a new issue