mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-27 01:22:37 -06:00
Merge pull request #1546 from bunnei/refactor-shader-jit
Shader JIT Part 2
This commit is contained in:
commit
d89e48679e
7 changed files with 271 additions and 162 deletions
|
@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr)
|
||||||
Write32(u32(distance));
|
Write32(u32(distance));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FixupBranch XEmitter::CALL()
|
||||||
|
{
|
||||||
|
FixupBranch branch;
|
||||||
|
branch.type = 1;
|
||||||
|
branch.ptr = code + 5;
|
||||||
|
|
||||||
|
Write8(0xE8);
|
||||||
|
Write32(0);
|
||||||
|
|
||||||
|
return branch;
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch XEmitter::J(bool force5bytes)
|
FixupBranch XEmitter::J(bool force5bytes)
|
||||||
{
|
{
|
||||||
FixupBranch branch;
|
FixupBranch branch;
|
||||||
|
@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
|
||||||
|
{
|
||||||
|
if (branch.type == 0)
|
||||||
|
{
|
||||||
|
s64 distance = (s64)(target - branch.ptr);
|
||||||
|
ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
|
||||||
|
branch.ptr[-1] = (u8)(s8)distance;
|
||||||
|
}
|
||||||
|
else if (branch.type == 1)
|
||||||
|
{
|
||||||
|
s64 distance = (s64)(target - branch.ptr);
|
||||||
|
ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
|
||||||
|
((s32*)branch.ptr)[-1] = (s32)distance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Single byte opcodes
|
//Single byte opcodes
|
||||||
//There is no PUSHAD/POPAD in 64-bit mode.
|
//There is no PUSHAD/POPAD in 64-bit mode.
|
||||||
void XEmitter::INT3() {Write8(0xCC);}
|
void XEmitter::INT3() {Write8(0xCC);}
|
||||||
|
|
|
@ -425,12 +425,14 @@ public:
|
||||||
#undef CALL
|
#undef CALL
|
||||||
#endif
|
#endif
|
||||||
void CALL(const void* fnptr);
|
void CALL(const void* fnptr);
|
||||||
|
FixupBranch CALL();
|
||||||
void CALLptr(OpArg arg);
|
void CALLptr(OpArg arg);
|
||||||
|
|
||||||
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
|
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
|
||||||
void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
|
void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
|
||||||
|
|
||||||
void SetJumpTarget(const FixupBranch& branch);
|
void SetJumpTarget(const FixupBranch& branch);
|
||||||
|
void SetJumpTarget(const FixupBranch& branch, const u8* target);
|
||||||
|
|
||||||
void SETcc(CCFlags flag, OpArg dest);
|
void SETcc(CCFlags flag, OpArg dest);
|
||||||
// Note: CMOV brings small if any benefit on current cpus.
|
// Note: CMOV brings small if any benefit on current cpus.
|
||||||
|
|
|
@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
immediate_attribute_id = 0;
|
immediate_attribute_id = 0;
|
||||||
|
|
||||||
Shader::UnitState<false> shader_unit;
|
Shader::UnitState<false> shader_unit;
|
||||||
Shader::Setup(shader_unit);
|
Shader::Setup();
|
||||||
|
|
||||||
if (g_debug_context)
|
if (g_debug_context)
|
||||||
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
|
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
|
||||||
|
@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
vertex_cache_ids.fill(-1);
|
vertex_cache_ids.fill(-1);
|
||||||
|
|
||||||
Shader::UnitState<false> shader_unit;
|
Shader::UnitState<false> shader_unit;
|
||||||
Shader::Setup(shader_unit);
|
Shader::Setup();
|
||||||
|
|
||||||
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
||||||
{
|
{
|
||||||
|
|
|
@ -28,36 +28,24 @@ namespace Pica {
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
static std::unordered_map<u64, CompiledShader*> shader_map;
|
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
|
||||||
static JitCompiler jit;
|
static const JitShader* jit_shader;
|
||||||
static CompiledShader* jit_shader;
|
|
||||||
|
|
||||||
static void ClearCache() {
|
|
||||||
shader_map.clear();
|
|
||||||
jit.Clear();
|
|
||||||
LOG_INFO(HW_GPU, "Shader JIT cache cleared");
|
|
||||||
}
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
|
||||||
void Setup(UnitState<false>& state) {
|
void Setup() {
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
if (VideoCore::g_shader_jit_enabled) {
|
if (VideoCore::g_shader_jit_enabled) {
|
||||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
|
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
||||||
g_state.regs.vs.main_offset);
|
|
||||||
|
|
||||||
auto iter = shader_map.find(cache_key);
|
auto iter = shader_map.find(cache_key);
|
||||||
if (iter != shader_map.end()) {
|
if (iter != shader_map.end()) {
|
||||||
jit_shader = iter->second;
|
jit_shader = iter->second.get();
|
||||||
} else {
|
} else {
|
||||||
// Check if remaining JIT code space is enough for at least one more (massive) shader
|
auto shader = std::make_unique<JitShader>();
|
||||||
if (jit.GetSpaceLeft() < jit_shader_size) {
|
shader->Compile();
|
||||||
// If not, clear the cache of all previously compiled shaders
|
jit_shader = shader.get();
|
||||||
ClearCache();
|
shader_map[cache_key] = std::move(shader);
|
||||||
}
|
|
||||||
|
|
||||||
jit_shader = jit.Compile();
|
|
||||||
shader_map.emplace(cache_key, jit_shader);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // ARCHITECTURE_x86_64
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
@ -65,7 +53,7 @@ void Setup(UnitState<false>& state) {
|
||||||
|
|
||||||
void Shutdown() {
|
void Shutdown() {
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
ClearCache();
|
shader_map.clear();
|
||||||
#endif // ARCHITECTURE_x86_64
|
#endif // ARCHITECTURE_x86_64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
if (VideoCore::g_shader_jit_enabled)
|
if (VideoCore::g_shader_jit_enabled)
|
||||||
jit_shader(&state.registers);
|
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
|
||||||
else
|
else
|
||||||
RunInterpreter(state);
|
RunInterpreter(state);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -339,9 +339,8 @@ struct UnitState {
|
||||||
/**
|
/**
|
||||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
|
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
|
||||||
* vertex, which would happen within the `Run` function).
|
* vertex, which would happen within the `Run` function).
|
||||||
* @param state Shader unit state, must be setup per shader and per shader unit
|
|
||||||
*/
|
*/
|
||||||
void Setup(UnitState<false>& state);
|
void Setup();
|
||||||
|
|
||||||
/// Performs any cleanup when the emulator is shutdown
|
/// Performs any cleanup when the emulator is shutdown
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
|
|
||||||
#include "common/x64/abi.h"
|
#include "common/x64/abi.h"
|
||||||
|
@ -19,73 +20,73 @@ namespace Shader {
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
typedef void (JitCompiler::*JitFunction)(Instruction instr);
|
typedef void (JitShader::*JitFunction)(Instruction instr);
|
||||||
|
|
||||||
const JitFunction instr_table[64] = {
|
const JitFunction instr_table[64] = {
|
||||||
&JitCompiler::Compile_ADD, // add
|
&JitShader::Compile_ADD, // add
|
||||||
&JitCompiler::Compile_DP3, // dp3
|
&JitShader::Compile_DP3, // dp3
|
||||||
&JitCompiler::Compile_DP4, // dp4
|
&JitShader::Compile_DP4, // dp4
|
||||||
&JitCompiler::Compile_DPH, // dph
|
&JitShader::Compile_DPH, // dph
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_EX2, // ex2
|
&JitShader::Compile_EX2, // ex2
|
||||||
&JitCompiler::Compile_LG2, // lg2
|
&JitShader::Compile_LG2, // lg2
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_MUL, // mul
|
&JitShader::Compile_MUL, // mul
|
||||||
&JitCompiler::Compile_SGE, // sge
|
&JitShader::Compile_SGE, // sge
|
||||||
&JitCompiler::Compile_SLT, // slt
|
&JitShader::Compile_SLT, // slt
|
||||||
&JitCompiler::Compile_FLR, // flr
|
&JitShader::Compile_FLR, // flr
|
||||||
&JitCompiler::Compile_MAX, // max
|
&JitShader::Compile_MAX, // max
|
||||||
&JitCompiler::Compile_MIN, // min
|
&JitShader::Compile_MIN, // min
|
||||||
&JitCompiler::Compile_RCP, // rcp
|
&JitShader::Compile_RCP, // rcp
|
||||||
&JitCompiler::Compile_RSQ, // rsq
|
&JitShader::Compile_RSQ, // rsq
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_MOVA, // mova
|
&JitShader::Compile_MOVA, // mova
|
||||||
&JitCompiler::Compile_MOV, // mov
|
&JitShader::Compile_MOV, // mov
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_DPH, // dphi
|
&JitShader::Compile_DPH, // dphi
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_SGE, // sgei
|
&JitShader::Compile_SGE, // sgei
|
||||||
&JitCompiler::Compile_SLT, // slti
|
&JitShader::Compile_SLT, // slti
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
nullptr, // unknown
|
nullptr, // unknown
|
||||||
&JitCompiler::Compile_NOP, // nop
|
&JitShader::Compile_NOP, // nop
|
||||||
&JitCompiler::Compile_END, // end
|
&JitShader::Compile_END, // end
|
||||||
nullptr, // break
|
nullptr, // break
|
||||||
&JitCompiler::Compile_CALL, // call
|
&JitShader::Compile_CALL, // call
|
||||||
&JitCompiler::Compile_CALLC, // callc
|
&JitShader::Compile_CALLC, // callc
|
||||||
&JitCompiler::Compile_CALLU, // callu
|
&JitShader::Compile_CALLU, // callu
|
||||||
&JitCompiler::Compile_IF, // ifu
|
&JitShader::Compile_IF, // ifu
|
||||||
&JitCompiler::Compile_IF, // ifc
|
&JitShader::Compile_IF, // ifc
|
||||||
&JitCompiler::Compile_LOOP, // loop
|
&JitShader::Compile_LOOP, // loop
|
||||||
nullptr, // emit
|
nullptr, // emit
|
||||||
nullptr, // sete
|
nullptr, // sete
|
||||||
&JitCompiler::Compile_JMP, // jmpc
|
&JitShader::Compile_JMP, // jmpc
|
||||||
&JitCompiler::Compile_JMP, // jmpu
|
&JitShader::Compile_JMP, // jmpu
|
||||||
&JitCompiler::Compile_CMP, // cmp
|
&JitShader::Compile_CMP, // cmp
|
||||||
&JitCompiler::Compile_CMP, // cmp
|
&JitShader::Compile_CMP, // cmp
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // madi
|
&JitShader::Compile_MAD, // madi
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
&JitCompiler::Compile_MAD, // mad
|
&JitShader::Compile_MAD, // mad
|
||||||
};
|
};
|
||||||
|
|
||||||
// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
|
// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
|
||||||
|
@ -137,6 +138,25 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
|
||||||
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
||||||
static const u8 NO_DEST_REG_MASK = 0xf;
|
static const u8 NO_DEST_REG_MASK = 0xf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the vertex shader instruction for a given offset in the current shader program
|
||||||
|
* @param offset Offset in the current shader program of the instruction
|
||||||
|
* @return Instruction at the specified offset
|
||||||
|
*/
|
||||||
|
static Instruction GetVertexShaderInstruction(size_t offset) {
|
||||||
|
return { g_state.vs.program_code[offset] };
|
||||||
|
}
|
||||||
|
|
||||||
|
static void LogCritical(const char* msg) {
|
||||||
|
LOG_CRITICAL(HW_GPU, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitShader::Compile_Assert(bool condition, const char* msg) {
|
||||||
|
if (!condition) {
|
||||||
|
ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads and swizzles a source register into the specified XMM register.
|
* Loads and swizzles a source register into the specified XMM register.
|
||||||
* @param instr VS instruction, used for determining how to load the source register
|
* @param instr VS instruction, used for determining how to load the source register
|
||||||
|
@ -144,7 +164,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
|
||||||
* @param src_reg SourceRegister object corresponding to the source register to load
|
* @param src_reg SourceRegister object corresponding to the source register to load
|
||||||
* @param dest Destination XMM register to store the loaded, swizzled source register
|
* @param dest Destination XMM register to store the loaded, swizzled source register
|
||||||
*/
|
*/
|
||||||
void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
|
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
|
||||||
X64Reg src_ptr;
|
X64Reg src_ptr;
|
||||||
size_t src_offset;
|
size_t src_offset;
|
||||||
|
|
||||||
|
@ -216,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||||
DestRegister dest;
|
DestRegister dest;
|
||||||
unsigned operand_desc_id;
|
unsigned operand_desc_id;
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
||||||
|
@ -263,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
|
void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
|
||||||
MOVAPS(scratch, R(src1));
|
MOVAPS(scratch, R(src1));
|
||||||
CMPPS(scratch, R(src2), CMP_ORD);
|
CMPPS(scratch, R(src2), CMP_ORD);
|
||||||
|
|
||||||
|
@ -276,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::
|
||||||
ANDPS(src1, R(scratch));
|
ANDPS(src1, R(scratch));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
|
void JitShader::Compile_EvaluateCondition(Instruction instr) {
|
||||||
// Note: NXOR is used below to check for equality
|
// Note: NXOR is used below to check for equality
|
||||||
switch (instr.flow_control.op) {
|
switch (instr.flow_control.op) {
|
||||||
case Instruction::FlowControlType::Or:
|
case Instruction::FlowControlType::Or:
|
||||||
|
@ -307,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_UniformCondition(Instruction instr) {
|
void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||||
int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
|
int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
|
||||||
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
|
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet32 JitCompiler::PersistentCallerSavedRegs() {
|
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
||||||
return persistent_regs & ABI_ALL_CALLER_SAVED;
|
return persistent_regs & ABI_ALL_CALLER_SAVED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_ADD(Instruction instr) {
|
void JitShader::Compile_ADD(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
ADDPS(SRC1, R(SRC2));
|
ADDPS(SRC1, R(SRC2));
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_DP3(Instruction instr) {
|
void JitShader::Compile_DP3(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
|
|
||||||
|
@ -342,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_DP4(Instruction instr) {
|
void JitShader::Compile_DP4(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
|
|
||||||
|
@ -359,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_DPH(Instruction instr) {
|
void JitShader::Compile_DPH(Instruction instr) {
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
||||||
|
@ -391,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_EX2(Instruction instr) {
|
void JitShader::Compile_EX2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
MOVSS(XMM0, R(SRC1));
|
MOVSS(XMM0, R(SRC1));
|
||||||
|
|
||||||
|
@ -404,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_LG2(Instruction instr) {
|
void JitShader::Compile_LG2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
MOVSS(XMM0, R(SRC1));
|
MOVSS(XMM0, R(SRC1));
|
||||||
|
|
||||||
|
@ -417,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MUL(Instruction instr) {
|
void JitShader::Compile_MUL(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_SGE(Instruction instr) {
|
void JitShader::Compile_SGE(Instruction instr) {
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
||||||
|
@ -439,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC2);
|
Compile_DestEnable(instr, SRC2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_SLT(Instruction instr) {
|
void JitShader::Compile_SLT(Instruction instr) {
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
|
||||||
|
@ -454,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_FLR(Instruction instr) {
|
void JitShader::Compile_FLR(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (Common::GetCPUCaps().sse4_1) {
|
||||||
|
@ -467,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MAX(Instruction instr) {
|
void JitShader::Compile_MAX(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
// SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
|
// SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
|
||||||
|
@ -475,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MIN(Instruction instr) {
|
void JitShader::Compile_MIN(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
// SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
|
// SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
|
||||||
|
@ -483,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MOVA(Instruction instr) {
|
void JitShader::Compile_MOVA(Instruction instr) {
|
||||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
|
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
|
||||||
|
|
||||||
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
||||||
|
@ -528,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MOV(Instruction instr) {
|
void JitShader::Compile_MOV(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_RCP(Instruction instr) {
|
void JitShader::Compile_RCP(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
|
||||||
// TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
|
// TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
|
||||||
|
@ -544,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_RSQ(Instruction instr) {
|
void JitShader::Compile_RSQ(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
|
||||||
// TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
|
// TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
|
||||||
|
@ -555,36 +575,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_NOP(Instruction instr) {
|
void JitShader::Compile_NOP(Instruction instr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_END(Instruction instr) {
|
void JitShader::Compile_END(Instruction instr) {
|
||||||
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_CALL(Instruction instr) {
|
void JitShader::Compile_CALL(Instruction instr) {
|
||||||
unsigned offset = instr.flow_control.dest_offset;
|
// Push offset of the return
|
||||||
while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) {
|
PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
|
||||||
Compile_NextInstr(&offset);
|
|
||||||
}
|
// Call the subroutine
|
||||||
|
FixupBranch b = CALL();
|
||||||
|
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
|
||||||
|
|
||||||
|
// Skip over the return offset that's on the stack
|
||||||
|
ADD(64, R(RSP), Imm32(8));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_CALLC(Instruction instr) {
|
void JitShader::Compile_CALLC(Instruction instr) {
|
||||||
Compile_EvaluateCondition(instr);
|
Compile_EvaluateCondition(instr);
|
||||||
FixupBranch b = J_CC(CC_Z, true);
|
FixupBranch b = J_CC(CC_Z, true);
|
||||||
Compile_CALL(instr);
|
Compile_CALL(instr);
|
||||||
SetJumpTarget(b);
|
SetJumpTarget(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_CALLU(Instruction instr) {
|
void JitShader::Compile_CALLU(Instruction instr) {
|
||||||
Compile_UniformCondition(instr);
|
Compile_UniformCondition(instr);
|
||||||
FixupBranch b = J_CC(CC_Z, true);
|
FixupBranch b = J_CC(CC_Z, true);
|
||||||
Compile_CALL(instr);
|
Compile_CALL(instr);
|
||||||
SetJumpTarget(b);
|
SetJumpTarget(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_CMP(Instruction instr) {
|
void JitShader::Compile_CMP(Instruction instr) {
|
||||||
using Op = Instruction::Common::CompareOpType::Op;
|
using Op = Instruction::Common::CompareOpType::Op;
|
||||||
Op op_x = instr.common.compare_op.x;
|
Op op_x = instr.common.compare_op.x;
|
||||||
Op op_y = instr.common.compare_op.y;
|
Op op_y = instr.common.compare_op.y;
|
||||||
|
@ -627,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) {
|
||||||
SHR(64, R(COND1), Imm8(63));
|
SHR(64, R(COND1), Imm8(63));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_MAD(Instruction instr) {
|
void JitShader::Compile_MAD(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
|
||||||
|
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
||||||
|
@ -644,9 +669,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_IF(Instruction instr) {
|
void JitShader::Compile_IF(Instruction instr) {
|
||||||
ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported",
|
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
|
||||||
*offset_ptr, instr.flow_control.dest_offset.Value());
|
|
||||||
|
|
||||||
// Evaluate the "IF" condition
|
// Evaluate the "IF" condition
|
||||||
if (instr.opcode.Value() == OpCode::Id::IFU) {
|
if (instr.opcode.Value() == OpCode::Id::IFU) {
|
||||||
|
@ -676,10 +700,9 @@ void JitCompiler::Compile_IF(Instruction instr) {
|
||||||
SetJumpTarget(b2);
|
SetJumpTarget(b2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_LOOP(Instruction instr) {
|
void JitShader::Compile_LOOP(Instruction instr) {
|
||||||
ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported",
|
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
|
||||||
*offset_ptr, instr.flow_control.dest_offset.Value());
|
Compile_Assert(!looping, "Nested loops not supported");
|
||||||
ASSERT_MSG(!looping, "Nested loops not supported");
|
|
||||||
|
|
||||||
looping = true;
|
looping = true;
|
||||||
|
|
||||||
|
@ -705,10 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
|
||||||
looping = false;
|
looping = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_JMP(Instruction instr) {
|
void JitShader::Compile_JMP(Instruction instr) {
|
||||||
ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
|
|
||||||
*offset_ptr, instr.flow_control.dest_offset.Value());
|
|
||||||
|
|
||||||
if (instr.opcode.Value() == OpCode::Id::JMPC)
|
if (instr.opcode.Value() == OpCode::Id::JMPC)
|
||||||
Compile_EvaluateCondition(instr);
|
Compile_EvaluateCondition(instr);
|
||||||
else if (instr.opcode.Value() == OpCode::Id::JMPU)
|
else if (instr.opcode.Value() == OpCode::Id::JMPU)
|
||||||
|
@ -718,30 +738,38 @@ void JitCompiler::Compile_JMP(Instruction instr) {
|
||||||
|
|
||||||
bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
|
bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
|
||||||
(instr.flow_control.num_instructions & 1);
|
(instr.flow_control.num_instructions & 1);
|
||||||
|
|
||||||
FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
|
FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
|
||||||
|
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
|
||||||
|
}
|
||||||
|
|
||||||
Compile_Block(instr.flow_control.dest_offset);
|
void JitShader::Compile_Block(unsigned end) {
|
||||||
|
while (program_counter < end) {
|
||||||
|
Compile_NextInstr();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitShader::Compile_Return() {
|
||||||
|
// Peek return offset on the stack and check if we're at that offset
|
||||||
|
MOV(64, R(RAX), MDisp(RSP, 8));
|
||||||
|
CMP(32, R(RAX), Imm32(program_counter));
|
||||||
|
|
||||||
|
// If so, jump back to before CALL
|
||||||
|
FixupBranch b = J_CC(CC_NZ, true);
|
||||||
|
RET();
|
||||||
SetJumpTarget(b);
|
SetJumpTarget(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_Block(unsigned end) {
|
void JitShader::Compile_NextInstr() {
|
||||||
// Save current offset pointer
|
if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
|
||||||
unsigned* prev_offset_ptr = offset_ptr;
|
Compile_Return();
|
||||||
unsigned offset = *prev_offset_ptr;
|
}
|
||||||
|
|
||||||
while (offset < end)
|
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
|
||||||
Compile_NextInstr(&offset);
|
code_ptr[program_counter] = GetCodePtr();
|
||||||
|
|
||||||
// Restore current offset pointer
|
Instruction instr = GetVertexShaderInstruction(program_counter++);
|
||||||
offset_ptr = prev_offset_ptr;
|
|
||||||
*offset_ptr = offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitCompiler::Compile_NextInstr(unsigned* offset) {
|
|
||||||
offset_ptr = offset;
|
|
||||||
|
|
||||||
Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
|
|
||||||
OpCode::Id opcode = instr.opcode.Value();
|
OpCode::Id opcode = instr.opcode.Value();
|
||||||
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
||||||
|
|
||||||
|
@ -755,9 +783,35 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledShader* JitCompiler::Compile() {
|
void JitShader::FindReturnOffsets() {
|
||||||
const u8* start = GetCodePtr();
|
return_offsets.clear();
|
||||||
unsigned offset = g_state.regs.vs.main_offset;
|
|
||||||
|
for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
|
||||||
|
Instruction instr = GetVertexShaderInstruction(offset);
|
||||||
|
|
||||||
|
switch (instr.opcode.Value()) {
|
||||||
|
case OpCode::Id::CALL:
|
||||||
|
case OpCode::Id::CALLC:
|
||||||
|
case OpCode::Id::CALLU:
|
||||||
|
return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort for efficient binary search later
|
||||||
|
std::sort(return_offsets.begin(), return_offsets.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitShader::Compile() {
|
||||||
|
// Reset flow control state
|
||||||
|
program = (CompiledShader*)GetCodePtr();
|
||||||
|
program_counter = 0;
|
||||||
|
looping = false;
|
||||||
|
code_ptr.fill(nullptr);
|
||||||
|
fixup_branches.clear();
|
||||||
|
|
||||||
|
// Find all `CALL` instructions and identify return locations
|
||||||
|
FindReturnOffsets();
|
||||||
|
|
||||||
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
||||||
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
@ -780,21 +834,31 @@ CompiledShader* JitCompiler::Compile() {
|
||||||
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
|
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
|
||||||
MOVAPS(NEGBIT, MatR(RAX));
|
MOVAPS(NEGBIT, MatR(RAX));
|
||||||
|
|
||||||
looping = false;
|
// Jump to start of the shader program
|
||||||
|
JMPptr(R(ABI_PARAM2));
|
||||||
|
|
||||||
while (offset < g_state.vs.program_code.size()) {
|
// Compile entire program
|
||||||
Compile_NextInstr(&offset);
|
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
|
||||||
|
|
||||||
|
// Set the target for any incomplete branches now that the entire shader program has been emitted
|
||||||
|
for (const auto& branch : fixup_branches) {
|
||||||
|
SetJumpTarget(branch.first, code_ptr[branch.second]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (CompiledShader*)start;
|
// Free memory that's no longer needed
|
||||||
|
return_offsets.clear();
|
||||||
|
return_offsets.shrink_to_fit();
|
||||||
|
fixup_branches.clear();
|
||||||
|
fixup_branches.shrink_to_fit();
|
||||||
|
|
||||||
|
uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
|
||||||
|
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
||||||
|
|
||||||
|
LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
JitCompiler::JitCompiler() {
|
JitShader::JitShader() {
|
||||||
AllocCodeSpace(jit_cache_size);
|
AllocCodeSpace(MAX_SHADER_SIZE);
|
||||||
}
|
|
||||||
|
|
||||||
void JitCompiler::Clear() {
|
|
||||||
ClearCodeSpace();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
@ -4,6 +4,9 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <nihstro/shader_bytecode.h>
|
#include <nihstro/shader_bytecode.h>
|
||||||
|
|
||||||
#include "common/x64/emitter.h"
|
#include "common/x64/emitter.h"
|
||||||
|
@ -19,24 +22,22 @@ namespace Pica {
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
|
/// Memory allocated for each compiled shader (64Kb)
|
||||||
constexpr size_t jit_shader_size = 1024 * 512;
|
constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
|
||||||
/// Memory allocated for the JIT code space cache
|
|
||||||
constexpr size_t jit_cache_size = 1024 * 1024 * 8;
|
|
||||||
|
|
||||||
using CompiledShader = void(void* registers);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
|
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
|
||||||
* code that can be executed on the host machine directly.
|
* code that can be executed on the host machine directly.
|
||||||
*/
|
*/
|
||||||
class JitCompiler : public Gen::XCodeBlock {
|
class JitShader : public Gen::XCodeBlock {
|
||||||
public:
|
public:
|
||||||
JitCompiler();
|
JitShader();
|
||||||
|
|
||||||
CompiledShader* Compile();
|
void Run(void* registers, unsigned offset) const {
|
||||||
|
program(registers, code_ptr[offset]);
|
||||||
|
}
|
||||||
|
|
||||||
void Clear();
|
void Compile();
|
||||||
|
|
||||||
void Compile_ADD(Instruction instr);
|
void Compile_ADD(Instruction instr);
|
||||||
void Compile_DP3(Instruction instr);
|
void Compile_DP3(Instruction instr);
|
||||||
|
@ -66,8 +67,9 @@ public:
|
||||||
void Compile_MAD(Instruction instr);
|
void Compile_MAD(Instruction instr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
void Compile_Block(unsigned end);
|
void Compile_Block(unsigned end);
|
||||||
void Compile_NextInstr(unsigned* offset);
|
void Compile_NextInstr();
|
||||||
|
|
||||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
|
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
|
||||||
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
|
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
|
||||||
|
@ -81,13 +83,39 @@ private:
|
||||||
void Compile_EvaluateCondition(Instruction instr);
|
void Compile_EvaluateCondition(Instruction instr);
|
||||||
void Compile_UniformCondition(Instruction instr);
|
void Compile_UniformCondition(Instruction instr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
|
||||||
|
*/
|
||||||
|
void Compile_Return();
|
||||||
|
|
||||||
BitSet32 PersistentCallerSavedRegs();
|
BitSet32 PersistentCallerSavedRegs();
|
||||||
|
|
||||||
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
|
/**
|
||||||
unsigned* offset_ptr = nullptr;
|
* Assertion evaluated at compile-time, but only triggered if executed at runtime.
|
||||||
|
* @param msg Message to be logged if the assertion fails.
|
||||||
|
*/
|
||||||
|
void Compile_Assert(bool condition, const char* msg);
|
||||||
|
|
||||||
/// Set to true if currently in a loop, used to check for the existence of nested loops
|
/**
|
||||||
bool looping = false;
|
* Analyzes the entire shader program for `CALL` instructions before emitting any code,
|
||||||
|
* identifying the locations where a return needs to be inserted.
|
||||||
|
*/
|
||||||
|
void FindReturnOffsets();
|
||||||
|
|
||||||
|
/// Mapping of Pica VS instructions to pointers in the emitted code
|
||||||
|
std::array<const u8*, 1024> code_ptr;
|
||||||
|
|
||||||
|
/// Offsets in code where a return needs to be inserted
|
||||||
|
std::vector<unsigned> return_offsets;
|
||||||
|
|
||||||
|
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
||||||
|
bool looping = false; ///< True if compiling a loop, used to check for nested loops
|
||||||
|
|
||||||
|
/// Branches that need to be fixed up once the entire shader program is compiled
|
||||||
|
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
|
||||||
|
|
||||||
|
using CompiledShader = void(void* registers, const u8* start_addr);
|
||||||
|
CompiledShader* program = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Shader
|
} // Shader
|
||||||
|
|
Loading…
Reference in a new issue