mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-27 17:42:40 -06:00
Shader: Use a POD struct for registers.
This commit is contained in:
parent
b39c053785
commit
db97090cad
5 changed files with 43 additions and 40 deletions
|
@ -67,29 +67,29 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
|
||||||
// Setup input register table
|
// Setup input register table
|
||||||
const auto& attribute_register_map = config.input_register_map;
|
const auto& attribute_register_map = config.input_register_map;
|
||||||
|
|
||||||
if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0];
|
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
|
||||||
if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1];
|
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
|
||||||
if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2];
|
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
|
||||||
if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3];
|
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
|
||||||
if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4];
|
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
|
||||||
if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5];
|
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
|
||||||
if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6];
|
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
|
||||||
if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7];
|
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
|
||||||
if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8];
|
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
|
||||||
if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9];
|
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
|
||||||
if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10];
|
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
|
||||||
if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11];
|
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
|
||||||
if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12];
|
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
|
||||||
if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13];
|
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
|
||||||
if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14];
|
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
|
||||||
if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15];
|
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
|
||||||
|
|
||||||
state.conditional_code[0] = false;
|
state.conditional_code[0] = false;
|
||||||
state.conditional_code[1] = false;
|
state.conditional_code[1] = false;
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
if (VideoCore::g_shader_jit_enabled)
|
if (VideoCore::g_shader_jit_enabled)
|
||||||
jit_shader(&state);
|
jit_shader(&state.registers);
|
||||||
else
|
else
|
||||||
RunInterpreter(state);
|
RunInterpreter(state);
|
||||||
#else
|
#else
|
||||||
|
@ -117,7 +117,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
|
||||||
for (int comp = 0; comp < 4; ++comp) {
|
for (int comp = 0; comp < 4; ++comp) {
|
||||||
float24* out = ((float24*)&ret) + semantics[comp];
|
float24* out = ((float24*)&ret) + semantics[comp];
|
||||||
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
|
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
|
||||||
*out = state.output_registers[i][comp];
|
*out = state.registers.output[i][comp];
|
||||||
} else {
|
} else {
|
||||||
// Zero output so that attributes which aren't output won't have denormals in them,
|
// Zero output so that attributes which aren't output won't have denormals in them,
|
||||||
// which would slow us down later.
|
// which would slow us down later.
|
||||||
|
|
|
@ -79,11 +79,14 @@ static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has inva
|
||||||
* here will make it easier for us to parallelize the shader processing later.
|
* here will make it easier for us to parallelize the shader processing later.
|
||||||
*/
|
*/
|
||||||
struct UnitState {
|
struct UnitState {
|
||||||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
struct Registers {
|
||||||
// required to be 16-byte aligned.
|
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
||||||
Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]);
|
// required to be 16-byte aligned.
|
||||||
Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]);
|
Math::Vec4<float24> MEMORY_ALIGNED16(input[16]);
|
||||||
Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]);
|
Math::Vec4<float24> MEMORY_ALIGNED16(output[16]);
|
||||||
|
Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]);
|
||||||
|
} registers;
|
||||||
|
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
|
||||||
|
|
||||||
u32 program_counter;
|
u32 program_counter;
|
||||||
bool conditional_code[2];
|
bool conditional_code[2];
|
||||||
|
@ -116,10 +119,10 @@ struct UnitState {
|
||||||
static int InputOffset(const SourceRegister& reg) {
|
static int InputOffset(const SourceRegister& reg) {
|
||||||
switch (reg.GetRegisterType()) {
|
switch (reg.GetRegisterType()) {
|
||||||
case RegisterType::Input:
|
case RegisterType::Input:
|
||||||
return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return (int)offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
case RegisterType::Temporary:
|
case RegisterType::Temporary:
|
||||||
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -130,10 +133,10 @@ struct UnitState {
|
||||||
static int OutputOffset(const DestRegister& reg) {
|
static int OutputOffset(const DestRegister& reg) {
|
||||||
switch (reg.GetRegisterType()) {
|
switch (reg.GetRegisterType()) {
|
||||||
case RegisterType::Output:
|
case RegisterType::Output:
|
||||||
return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return (int)offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
case RegisterType::Temporary:
|
case RegisterType::Temporary:
|
||||||
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
|
|
@ -62,10 +62,10 @@ void RunInterpreter(UnitState& state) {
|
||||||
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
||||||
switch (source_reg.GetRegisterType()) {
|
switch (source_reg.GetRegisterType()) {
|
||||||
case RegisterType::Input:
|
case RegisterType::Input:
|
||||||
return &state.input_registers[source_reg.GetIndex()].x;
|
return &state.registers.input[source_reg.GetIndex()].x;
|
||||||
|
|
||||||
case RegisterType::Temporary:
|
case RegisterType::Temporary:
|
||||||
return &state.temporary_registers[source_reg.GetIndex()].x;
|
return &state.registers.temporary[source_reg.GetIndex()].x;
|
||||||
|
|
||||||
case RegisterType::FloatUniform:
|
case RegisterType::FloatUniform:
|
||||||
return &uniforms.f[source_reg.GetIndex()].x;
|
return &uniforms.f[source_reg.GetIndex()].x;
|
||||||
|
@ -114,8 +114,8 @@ void RunInterpreter(UnitState& state) {
|
||||||
src2[3] = src2[3] * float24::FromFloat32(-1);
|
src2[3] = src2[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
|
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
|
||||||
: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
|
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
|
@ -355,8 +355,8 @@ void RunInterpreter(UnitState& state) {
|
||||||
src3[3] = src3[3] * float24::FromFloat32(-1);
|
src3[3] = src3[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
|
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
|
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
|
|
@ -106,7 +106,7 @@ static const X64Reg COND0 = R13;
|
||||||
/// Result of the previous CMP instruction for the Y-component comparison
|
/// Result of the previous CMP instruction for the Y-component comparison
|
||||||
static const X64Reg COND1 = R14;
|
static const X64Reg COND1 = R14;
|
||||||
/// Pointer to the UnitState instance for the current VS unit
|
/// Pointer to the UnitState instance for the current VS unit
|
||||||
static const X64Reg STATE = R15;
|
static const X64Reg REGISTERS = R15;
|
||||||
/// SIMD scratch register
|
/// SIMD scratch register
|
||||||
static const X64Reg SCRATCH = XMM0;
|
static const X64Reg SCRATCH = XMM0;
|
||||||
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
||||||
|
@ -140,7 +140,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
|
||||||
src_ptr = UNIFORMS;
|
src_ptr = UNIFORMS;
|
||||||
src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
|
src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
|
||||||
} else {
|
} else {
|
||||||
src_ptr = STATE;
|
src_ptr = REGISTERS;
|
||||||
src_offset = UnitState::InputOffset(src_reg);
|
src_offset = UnitState::InputOffset(src_reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||||
// If all components are enabled, write the result to the destination register
|
// If all components are enabled, write the result to the destination register
|
||||||
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
||||||
// Store dest back to memory
|
// Store dest back to memory
|
||||||
MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src);
|
MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Not all components are enabled, so mask the result when storing to the destination register...
|
// Not all components are enabled, so mask the result when storing to the destination register...
|
||||||
MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest)));
|
MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest)));
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (Common::GetCPUCaps().sse4_1) {
|
||||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||||
|
@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store dest back to memory
|
// Store dest back to memory
|
||||||
MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH);
|
MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,7 +635,7 @@ CompiledShader* JitCompiler::Compile() {
|
||||||
|
|
||||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
|
||||||
MOV(PTRBITS, R(STATE), R(ABI_PARAM1));
|
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
|
||||||
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
||||||
|
|
||||||
// Zero address/loop registers
|
// Zero address/loop registers
|
||||||
|
|
|
@ -20,7 +20,7 @@ namespace Pica {
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
using CompiledShader = void(void* state);
|
using CompiledShader = void(void* registers);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
|
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
|
||||||
|
|
Loading…
Reference in a new issue