mirror of
https://github.com/Lime3DS/Lime3DS
synced 2025-01-09 13:43:27 +00:00
shader/jit: Use xbyak::util::Cpu
for Host capabilities (#6643)
Xbyak has a complete utility-class for determining the host-processor's ISA-features such as SSE4.1, AVX, AVX2, AVX512{F,VL,DQ,VBMI,etc}, and so on for further potential optimizations.
This commit is contained in:
parent
3d0a3c2c45
commit
71aea7e571
1 changed files with 6 additions and 3 deletions
|
@ -10,6 +10,7 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <nihstro/shader_bytecode.h>
|
#include <nihstro/shader_bytecode.h>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
|
#include <xbyak/xbyak_util.h>
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
@ -32,6 +33,8 @@ using Xbyak::Xmm;
|
||||||
using nihstro::DestRegister;
|
using nihstro::DestRegister;
|
||||||
using nihstro::RegisterType;
|
using nihstro::RegisterType;
|
||||||
|
|
||||||
|
static const Xbyak::util::Cpu host_caps;
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
|
|
||||||
typedef void (JitShader::*JitFunction)(Instruction instr);
|
typedef void (JitShader::*JitFunction)(Instruction instr);
|
||||||
|
@ -306,7 +309,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
|
||||||
// register...
|
// register...
|
||||||
movaps(SCRATCH, xword[STATE + dest_offset_disp]);
|
movaps(SCRATCH, xword[STATE + dest_offset_disp]);
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (host_caps.has(Cpu::tSSE41)) {
|
||||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
|
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
|
||||||
((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||||
blendps(SCRATCH, src, mask);
|
blendps(SCRATCH, src, mask);
|
||||||
|
@ -437,7 +440,7 @@ void JitShader::Compile_DPH(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (host_caps.has(Cpu::tSSE41)) {
|
||||||
// Set 4th component to 1.0
|
// Set 4th component to 1.0
|
||||||
blendps(SRC1, ONE, 0b1000);
|
blendps(SRC1, ONE, 0b1000);
|
||||||
} else {
|
} else {
|
||||||
|
@ -507,7 +510,7 @@ void JitShader::Compile_SLT(Instruction instr) {
|
||||||
void JitShader::Compile_FLR(Instruction instr) {
|
void JitShader::Compile_FLR(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (host_caps.has(Cpu::tSSE41)) {
|
||||||
roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
|
roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
|
||||||
} else {
|
} else {
|
||||||
cvttps2dq(SRC1, SRC1);
|
cvttps2dq(SRC1, SRC1);
|
||||||
|
|
Loading…
Reference in a new issue