mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
shader/video: Partially implement VMNMX
Implements the common usages for VMNMX. Inputs with a different size than 32 bits are not supported and sign mismatches aren't supported either. VMNMX works as follows: It grabs Ra and Rb and applies a maximum/minimum on them (this is defined by .MX), having in mind the input sign. This result can then be saturated. After the intermediate result is calculated, it applies another operation on it using Rc. These operations are merges, accumulations or another min/max pass. This instruction allows to implement with a more flexible approach GCN's min3 and max3 instructions (for instance).
This commit is contained in:
parent
08470d261d
commit
76f178ba6e
3 changed files with 116 additions and 0 deletions
|
@ -302,6 +302,23 @@ enum class VmadShr : u64 {
|
||||||
Shr15 = 2,
|
Shr15 = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class VmnmxType : u64 {
|
||||||
|
Bits8,
|
||||||
|
Bits16,
|
||||||
|
Bits32,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class VmnmxOperation : u64 {
|
||||||
|
Mrg_16H = 0,
|
||||||
|
Mrg_16L = 1,
|
||||||
|
Mrg_8B0 = 2,
|
||||||
|
Mrg_8B2 = 3,
|
||||||
|
Acc = 4,
|
||||||
|
Min = 5,
|
||||||
|
Max = 6,
|
||||||
|
Nop = 7,
|
||||||
|
};
|
||||||
|
|
||||||
enum class XmadMode : u64 {
|
enum class XmadMode : u64 {
|
||||||
None = 0,
|
None = 0,
|
||||||
CLo = 1,
|
CLo = 1,
|
||||||
|
@ -1662,6 +1679,42 @@ union Instruction {
|
||||||
BitField<47, 1, u64> cc;
|
BitField<47, 1, u64> cc;
|
||||||
} vmad;
|
} vmad;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<54, 1, u64> is_dest_signed;
|
||||||
|
BitField<48, 1, u64> is_src_a_signed;
|
||||||
|
BitField<49, 1, u64> is_src_b_signed;
|
||||||
|
BitField<37, 2, u64> src_format_a;
|
||||||
|
BitField<29, 2, u64> src_format_b;
|
||||||
|
BitField<56, 1, u64> mx;
|
||||||
|
BitField<55, 1, u64> sat;
|
||||||
|
BitField<36, 2, u64> selector_a;
|
||||||
|
BitField<28, 2, u64> selector_b;
|
||||||
|
BitField<50, 1, u64> is_op_b_register;
|
||||||
|
BitField<51, 3, VmnmxOperation> operation;
|
||||||
|
|
||||||
|
VmnmxType SourceFormatA() const {
|
||||||
|
switch (src_format_a) {
|
||||||
|
case 0b11:
|
||||||
|
return VmnmxType::Bits32;
|
||||||
|
case 0b10:
|
||||||
|
return VmnmxType::Bits16;
|
||||||
|
default:
|
||||||
|
return VmnmxType::Bits8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VmnmxType SourceFormatB() const {
|
||||||
|
switch (src_format_b) {
|
||||||
|
case 0b11:
|
||||||
|
return VmnmxType::Bits32;
|
||||||
|
case 0b10:
|
||||||
|
return VmnmxType::Bits16;
|
||||||
|
default:
|
||||||
|
return VmnmxType::Bits8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} vmnmx;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<20, 16, u64> imm20_16;
|
BitField<20, 16, u64> imm20_16;
|
||||||
BitField<35, 1, u64> high_b_rr; // used on RR
|
BitField<35, 1, u64> high_b_rr; // used on RR
|
||||||
|
@ -1773,6 +1826,7 @@ public:
|
||||||
MEMBAR,
|
MEMBAR,
|
||||||
VMAD,
|
VMAD,
|
||||||
VSETP,
|
VSETP,
|
||||||
|
VMNMX,
|
||||||
FFMA_IMM, // Fused Multiply and Add
|
FFMA_IMM, // Fused Multiply and Add
|
||||||
FFMA_CR,
|
FFMA_CR,
|
||||||
FFMA_RC,
|
FFMA_RC,
|
||||||
|
@ -2078,6 +2132,7 @@ private:
|
||||||
INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
|
INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
|
||||||
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
|
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
|
||||||
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
|
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
|
||||||
|
INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
|
||||||
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
|
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
|
||||||
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
|
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
|
||||||
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
|
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
|
||||||
|
|
|
@ -10,16 +10,24 @@
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
using std::move;
|
||||||
using Tegra::Shader::Instruction;
|
using Tegra::Shader::Instruction;
|
||||||
using Tegra::Shader::OpCode;
|
using Tegra::Shader::OpCode;
|
||||||
using Tegra::Shader::Pred;
|
using Tegra::Shader::Pred;
|
||||||
using Tegra::Shader::VideoType;
|
using Tegra::Shader::VideoType;
|
||||||
using Tegra::Shader::VmadShr;
|
using Tegra::Shader::VmadShr;
|
||||||
|
using Tegra::Shader::VmnmxOperation;
|
||||||
|
using Tegra::Shader::VmnmxType;
|
||||||
|
|
||||||
u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
||||||
const Instruction instr = {program_code[pc]};
|
const Instruction instr = {program_code[pc]};
|
||||||
const auto opcode = OpCode::Decode(instr);
|
const auto opcode = OpCode::Decode(instr);
|
||||||
|
|
||||||
|
if (opcode->get().GetId() == OpCode::Id::VMNMX) {
|
||||||
|
DecodeVMNMX(bb, instr);
|
||||||
|
return pc;
|
||||||
|
}
|
||||||
|
|
||||||
const Node op_a =
|
const Node op_a =
|
||||||
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
|
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
|
||||||
instr.video.type_a, instr.video.byte_height_a);
|
instr.video.type_a, instr.video.byte_height_a);
|
||||||
|
@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
|
||||||
|
UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
|
||||||
|
UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
|
||||||
|
UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
|
||||||
|
UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
|
||||||
|
UNIMPLEMENTED_IF(instr.vmnmx.sat);
|
||||||
|
UNIMPLEMENTED_IF(instr.generates_cc);
|
||||||
|
|
||||||
|
Node op_a = GetRegister(instr.gpr8);
|
||||||
|
Node op_b = GetRegister(instr.gpr20);
|
||||||
|
Node op_c = GetRegister(instr.gpr39);
|
||||||
|
|
||||||
|
const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
|
||||||
|
const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
|
||||||
|
|
||||||
|
const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
|
||||||
|
Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
|
||||||
|
|
||||||
|
switch (instr.vmnmx.operation) {
|
||||||
|
case VmnmxOperation::Mrg_16H:
|
||||||
|
value = BitfieldInsert(move(op_c), move(value), 16, 16);
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Mrg_16L:
|
||||||
|
value = BitfieldInsert(move(op_c), move(value), 0, 16);
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Mrg_8B0:
|
||||||
|
value = BitfieldInsert(move(op_c), move(value), 0, 8);
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Mrg_8B2:
|
||||||
|
value = BitfieldInsert(move(op_c), move(value), 16, 8);
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Acc:
|
||||||
|
value = Operation(OperationCode::IAdd, move(value), move(op_c));
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Min:
|
||||||
|
value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Max:
|
||||||
|
value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
|
||||||
|
break;
|
||||||
|
case VmnmxOperation::Nop:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetRegister(bb, instr.gpr0, move(value));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -350,6 +350,9 @@ private:
|
||||||
/// Marks the usage of a input or output attribute.
|
/// Marks the usage of a input or output attribute.
|
||||||
void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
|
void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
|
||||||
|
|
||||||
|
/// Decodes VMNMX instruction and inserts its code into the passed basic block.
|
||||||
|
void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
|
||||||
|
|
||||||
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||||
const Node4& components);
|
const Node4& components);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue