mirror of
https://git.suyu.dev/suyu/suyu
synced 2024-12-23 18:02:46 -06:00
Merge pull request #2083 from ReinUsesLisp/shader-ir-cbuf-tracking
shader/track: Add a more permissive global memory tracking
This commit is contained in:
commit
f09d1dffd1
30 changed files with 140 additions and 126 deletions
|
@ -171,7 +171,7 @@ public:
|
|||
code.AddLine(fmt::format("case 0x{:x}u: {{", address));
|
||||
++code.scope;
|
||||
|
||||
VisitBasicBlock(bb);
|
||||
VisitBlock(bb);
|
||||
|
||||
--code.scope;
|
||||
code.AddLine('}');
|
||||
|
@ -423,7 +423,7 @@ private:
|
|||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void VisitBasicBlock(const BasicBlock& bb) {
|
||||
void VisitBlock(const NodeBlock& bb) {
|
||||
for (const Node node : bb) {
|
||||
if (const std::string expr = Visit(node); !expr.empty()) {
|
||||
code.AddLine(expr);
|
||||
|
@ -575,7 +575,7 @@ private:
|
|||
code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
|
||||
++code.scope;
|
||||
|
||||
VisitBasicBlock(conditional->GetCode());
|
||||
VisitBlock(conditional->GetCode());
|
||||
|
||||
--code.scope;
|
||||
code.AddLine('}');
|
||||
|
|
|
@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
|||
return exit_method = ExitMethod::AlwaysReturn;
|
||||
}
|
||||
|
||||
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
BasicBlock basic_block;
|
||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
NodeBlock basic_block;
|
||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||
pc = DecodeInstr(basic_block, pc);
|
||||
}
|
||||
return basic_block;
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
||||
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
// Ignore sched instructions when generating code.
|
||||
if (IsSchedInstruction(pc, main_offset)) {
|
||||
return pc + 1;
|
||||
|
@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
|||
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
|
||||
"NeverExecute predicate not implemented");
|
||||
|
||||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
|
||||
decoders = {
|
||||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
|
||||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
|
||||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
|
||||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
|
||||
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
|
||||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
|
||||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
|
||||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
|
||||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
|
||||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
|
||||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
|
||||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
|
||||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
|
||||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
|
||||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
|
||||
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
|
||||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
|
||||
};
|
||||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
|
||||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
|
||||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
|
||||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
|
||||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
|
||||
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
|
||||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
|
||||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
|
||||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
|
||||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
|
||||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
|
||||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
|
||||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
|
||||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
|
||||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
|
||||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
|
||||
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
|
||||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
|
||||
};
|
||||
|
||||
std::vector<Node> tmp_block;
|
||||
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
|
||||
pc = (this->*decoder->second)(tmp_block, bb, pc);
|
||||
pc = (this->*decoder->second)(tmp_block, pc);
|
||||
} else {
|
||||
pc = DecodeOther(tmp_block, bb, pc);
|
||||
pc = DecodeOther(tmp_block, pc);
|
||||
}
|
||||
|
||||
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
|
||||
|
@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
|||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
|
||||
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
|
||||
bb.push_back(
|
||||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
|
||||
const Node conditional =
|
||||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
|
||||
global_code.push_back(conditional);
|
||||
bb.push_back(conditional);
|
||||
} else {
|
||||
for (auto& node : tmp_block) {
|
||||
bb.push_back(std::move(node));
|
||||
global_code.push_back(node);
|
||||
bb.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::SubOp;
|
||||
|
||||
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
|
|||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
|
|||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
|
||||
void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
|
||||
Node imm_lut, bool sets_cc) {
|
||||
constexpr u32 lop_iterations = 32;
|
||||
const Node one = Immediate(1);
|
||||
|
|
|
@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
|
|||
using Tegra::Shader::PredicateResultMode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&
|
|||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
|
||||
Node op_a, Node op_b, PredicateResultMode predicate_mode,
|
||||
Pred predicate, bool sets_cc) {
|
||||
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
|
||||
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
|
||||
bool sets_cc) {
|
||||
const Node result = [&]() {
|
||||
switch (logic_op) {
|
||||
case LogicOperation::And:
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
|
|||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
|||
}();
|
||||
|
||||
const Node addr_register = GetRegister(instr.gpr8);
|
||||
const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
|
||||
const Node base_address =
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
|
||||
const auto cbuf = std::get_if<CbufNode>(base_address);
|
||||
ASSERT(cbuf != nullptr);
|
||||
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
|
||||
|
@ -464,8 +465,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
|
|||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
|
@ -480,7 +480,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
|
|||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
@ -504,7 +504,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
|
|||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
|
|
@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
|||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ using Tegra::Shader::Pred;
|
|||
using Tegra::Shader::VideoType;
|
||||
using Tegra::Shader::VmadShr;
|
||||
|
||||
u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
|
|
@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
|
|||
}
|
||||
}
|
||||
|
||||
void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) {
|
||||
void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
|
||||
}
|
||||
|
||||
void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) {
|
||||
void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) {
|
||||
void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
|
||||
}
|
||||
|
||||
void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) {
|
||||
void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
|
||||
bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
|
||||
}
|
||||
|
||||
void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) {
|
||||
void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
|
||||
SetRegister(bb, Register::ZeroIndex + 1 + id, value);
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) {
|
||||
void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c
|
|||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) {
|
||||
void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ using NodeData =
|
|||
PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
|
||||
using Node = const NodeData*;
|
||||
using Node4 = std::array<Node, 4>;
|
||||
using BasicBlock = std::vector<Node>;
|
||||
using NodeBlock = std::vector<Node>;
|
||||
|
||||
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
|
||||
|
||||
|
@ -539,7 +539,7 @@ public:
|
|||
Decode();
|
||||
}
|
||||
|
||||
const std::map<u32, BasicBlock>& GetBasicBlocks() const {
|
||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||
return basic_blocks;
|
||||
}
|
||||
|
||||
|
@ -590,7 +590,7 @@ private:
|
|||
|
||||
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
|
||||
|
||||
BasicBlock DecodeRange(u32 begin, u32 end);
|
||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||
|
||||
/**
|
||||
* Decodes a single instruction from Tegra to IR.
|
||||
|
@ -598,33 +598,33 @@ private:
|
|||
* @param pc Program counter. Offset to decode.
|
||||
* @return Next address to decode.
|
||||
*/
|
||||
u32 DecodeInstr(BasicBlock& bb, u32 pc);
|
||||
u32 DecodeInstr(NodeBlock& bb, u32 pc);
|
||||
|
||||
u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc);
|
||||
u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeBfe(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeBfi(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeShift(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFfma(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
|
||||
u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeVideo(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeXmad(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeOther(NodeBlock& bb, u32 pc);
|
||||
|
||||
/// Internalizes node's data and returns a managed pointer to a clone of that node
|
||||
Node StoreNode(NodeData&& node_data);
|
||||
|
@ -673,20 +673,20 @@ private:
|
|||
Node GetTemporal(u32 id);
|
||||
|
||||
/// Sets a register. src value must be a number-evaluated node.
|
||||
void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src);
|
||||
void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
|
||||
/// Sets a predicate. src value must be a bool-evaluated node
|
||||
void SetPredicate(BasicBlock& bb, u64 dest, Node src);
|
||||
void SetPredicate(NodeBlock& bb, u64 dest, Node src);
|
||||
/// Sets an internal flag. src value must be a bool-evaluated node
|
||||
void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value);
|
||||
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
|
||||
/// Sets a local memory address. address and value must be a number-evaluated node
|
||||
void SetLocalMemory(BasicBlock& bb, Node address, Node value);
|
||||
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
|
||||
/// Sets a temporal. Internally it uses a post-RZ register
|
||||
void SetTemporal(BasicBlock& bb, u32 id, Node value);
|
||||
void SetTemporal(NodeBlock& bb, u32 id, Node value);
|
||||
|
||||
/// Sets internal flags from a float
|
||||
void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true);
|
||||
void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
|
||||
/// Sets internal flags from an integer
|
||||
void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true);
|
||||
void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
|
||||
|
||||
/// Conditionally absolute/negated float. Absolute is applied first
|
||||
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
|
||||
|
@ -727,12 +727,12 @@ private:
|
|||
/// Extracts a sequence of bits from a node
|
||||
Node BitfieldExtract(Node value, u32 offset, u32 bits);
|
||||
|
||||
void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
|
||||
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components);
|
||||
|
||||
void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
|
||||
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components);
|
||||
void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
|
||||
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components);
|
||||
|
||||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
|
@ -761,16 +761,16 @@ private:
|
|||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
||||
u64 byte_height);
|
||||
|
||||
void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest,
|
||||
void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
|
||||
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
|
||||
Tegra::Shader::PredicateResultMode predicate_mode,
|
||||
Tegra::Shader::Pred predicate, bool sets_cc);
|
||||
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||
Node op_c, Node imm_lut, bool sets_cc);
|
||||
|
||||
Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
|
||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
template <typename... T>
|
||||
Node Operation(OperationCode code, const T*... operands) {
|
||||
|
@ -812,7 +812,8 @@ private:
|
|||
u32 coverage_end{};
|
||||
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
|
||||
|
||||
std::map<u32, BasicBlock> basic_blocks;
|
||||
std::map<u32, NodeBlock> basic_blocks;
|
||||
NodeBlock global_code;
|
||||
|
||||
std::vector<std::unique_ptr<NodeData>> stored_nodes;
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
|
||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const Node node = code[cursor];
|
||||
|
@ -19,12 +19,19 @@ std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
|
|||
if (operation->GetCode() == operation_code)
|
||||
return {node, cursor};
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
|
||||
const auto& code = conditional->GetCode();
|
||||
const auto [found, internal_cursor] =
|
||||
FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
|
||||
if (found)
|
||||
return {found, cursor};
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
|
||||
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
|
||||
// Cbuf found, but it has to be immediate
|
||||
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
|
||||
|
@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
|
||||
const auto& code = conditional->GetCode();
|
||||
return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
|
||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
|
||||
|
|
Loading…
Reference in a new issue