mirror of
https://github.com/Lime3DS/Lime3DS
synced 2025-01-09 13:43:27 +00:00
Merge pull request #3662 from wwylele/shader-hash-cache
shader: avoid recomputing hash for the same program
This commit is contained in:
commit
048b0fc0d3
5 changed files with 62 additions and 24 deletions
|
@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
|
||||
} else {
|
||||
g_state.gs.program_code[offset] = value;
|
||||
g_state.gs.MarkProgramCodeDirty();
|
||||
offset++;
|
||||
}
|
||||
break;
|
||||
|
@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
|
||||
} else {
|
||||
g_state.gs.swizzle_data[offset] = value;
|
||||
g_state.gs.MarkSwizzleDataDirty();
|
||||
offset++;
|
||||
}
|
||||
break;
|
||||
|
@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
|
||||
} else {
|
||||
g_state.vs.program_code[offset] = value;
|
||||
g_state.vs.MarkProgramCodeDirty();
|
||||
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
||||
g_state.gs.program_code[offset] = value;
|
||||
g_state.gs.MarkProgramCodeDirty();
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
|
||||
} else {
|
||||
g_state.vs.swizzle_data[offset] = value;
|
||||
g_state.vs.MarkSwizzleDataDirty();
|
||||
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
||||
g_state.gs.swizzle_data[offset] = value;
|
||||
g_state.gs.MarkSwizzleDataDirty();
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
|
@ -173,27 +174,29 @@ struct GSUnitState : public UnitState {
|
|||
GSEmitter emitter;
|
||||
};
|
||||
|
||||
struct ShaderSetup {
|
||||
struct {
|
||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||
// therefore required to be 16-byte aligned.
|
||||
alignas(16) Math::Vec4<float24> f[96];
|
||||
struct Uniforms {
|
||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||
// therefore required to be 16-byte aligned.
|
||||
alignas(16) Math::Vec4<float24> f[96];
|
||||
|
||||
std::array<bool, 16> b;
|
||||
std::array<Math::Vec4<u8>, 4> i;
|
||||
} uniforms;
|
||||
std::array<bool, 16> b;
|
||||
std::array<Math::Vec4<u8>, 4> i;
|
||||
|
||||
static size_t GetFloatUniformOffset(unsigned index) {
|
||||
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
|
||||
return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>);
|
||||
}
|
||||
|
||||
static size_t GetBoolUniformOffset(unsigned index) {
|
||||
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
|
||||
return offsetof(Uniforms, b) + index * sizeof(bool);
|
||||
}
|
||||
|
||||
static size_t GetIntUniformOffset(unsigned index) {
|
||||
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
|
||||
return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>);
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderSetup {
|
||||
Uniforms uniforms;
|
||||
|
||||
std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
|
||||
std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
|
||||
|
@ -204,6 +207,36 @@ struct ShaderSetup {
|
|||
/// Used by the JIT, points to a compiled shader object.
|
||||
const void* cached_shader = nullptr;
|
||||
} engine_data;
|
||||
|
||||
void MarkProgramCodeDirty() {
|
||||
program_code_hash_dirty = true;
|
||||
}
|
||||
|
||||
void MarkSwizzleDataDirty() {
|
||||
swizzle_data_hash_dirty = true;
|
||||
}
|
||||
|
||||
u64 GetProgramCodeHash() {
|
||||
if (program_code_hash_dirty) {
|
||||
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
|
||||
program_code_hash_dirty = false;
|
||||
}
|
||||
return program_code_hash;
|
||||
}
|
||||
|
||||
u64 GetSwizzleDataHash() {
|
||||
if (swizzle_data_hash_dirty) {
|
||||
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
|
||||
swizzle_data_hash_dirty = false;
|
||||
}
|
||||
return swizzle_data_hash;
|
||||
}
|
||||
|
||||
private:
|
||||
bool program_code_hash_dirty = true;
|
||||
bool swizzle_data_hash_dirty = true;
|
||||
u64 program_code_hash = 0xDEADC0DE;
|
||||
u64 swizzle_data_hash = 0xDEADC0DE;
|
||||
};
|
||||
|
||||
class ShaderEngine {
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/hash.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader/shader_jit_x64.h"
|
||||
|
@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
|
|||
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
||||
setup.engine_data.entry_point = entry_point;
|
||||
|
||||
u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
|
||||
u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data));
|
||||
u64 code_hash = setup.GetProgramCodeHash();
|
||||
u64 swizzle_hash = setup.GetSwizzleDataHash();
|
||||
|
||||
u64 cache_key = code_hash ^ swizzle_hash;
|
||||
auto iter = cache.find(cache_key);
|
||||
|
|
|
@ -104,7 +104,7 @@ const JitFunction instr_table[64] = {
|
|||
// purposes, as documented below:
|
||||
|
||||
/// Pointer to the uniform memory
|
||||
static const Reg64 SETUP = r9;
|
||||
static const Reg64 UNIFORMS = r9;
|
||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
||||
static const Reg64 ADDROFFS_REG_0 = r10;
|
||||
static const Reg64 ADDROFFS_REG_1 = r11;
|
||||
|
@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15;
|
|||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||
static const BitSet32 persistent_regs = BuildRegSet({
|
||||
// Pointers to register blocks
|
||||
SETUP,
|
||||
UNIFORMS,
|
||||
STATE,
|
||||
// Cached registers
|
||||
ADDROFFS_REG_0,
|
||||
|
@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||
size_t src_offset;
|
||||
|
||||
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
|
||||
src_ptr = SETUP;
|
||||
src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
|
||||
src_ptr = UNIFORMS;
|
||||
src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex());
|
||||
} else {
|
||||
src_ptr = STATE;
|
||||
src_offset = UnitState::InputOffset(src_reg);
|
||||
|
@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
|
|||
}
|
||||
|
||||
void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||
size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
|
||||
cmp(byte[SETUP + offset], 0);
|
||||
size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
|
||||
cmp(byte[UNIFORMS + offset], 0);
|
||||
}
|
||||
|
||||
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
||||
|
@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
|||
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
|
||||
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
|
||||
// 4 bits) to be used as an offset into the 16-byte vector registers later
|
||||
size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
|
||||
mov(LOOPCOUNT, dword[SETUP + offset]);
|
||||
size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id);
|
||||
mov(LOOPCOUNT, dword[UNIFORMS + offset]);
|
||||
mov(LOOPCOUNT_REG, LOOPCOUNT);
|
||||
shr(LOOPCOUNT_REG, 4);
|
||||
and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
|
||||
|
@ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
|||
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||
mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
mov(SETUP, ABI_PARAM1);
|
||||
mov(UNIFORMS, ABI_PARAM1);
|
||||
mov(STATE, ABI_PARAM2);
|
||||
|
||||
// Zero address/loop registers
|
||||
|
|
|
@ -34,7 +34,7 @@ public:
|
|||
JitShader();
|
||||
|
||||
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
|
||||
program(&setup, &state, instruction_labels[offset].getAddress());
|
||||
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
|
||||
}
|
||||
|
||||
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,
|
||||
|
|
Loading…
Reference in a new issue