mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
spirv: Add lower fp16 to fp32 pass
This commit is contained in:
parent
85cce78583
commit
6db69990da
32 changed files with 479 additions and 285 deletions
|
@ -7,6 +7,7 @@ add_library(shader_recompiler STATIC
|
||||||
backend/spirv/emit_spirv_composite.cpp
|
backend/spirv/emit_spirv_composite.cpp
|
||||||
backend/spirv/emit_spirv_context_get_set.cpp
|
backend/spirv/emit_spirv_context_get_set.cpp
|
||||||
backend/spirv/emit_spirv_control_flow.cpp
|
backend/spirv/emit_spirv_control_flow.cpp
|
||||||
|
backend/spirv/emit_spirv_convert.cpp
|
||||||
backend/spirv/emit_spirv_floating_point.cpp
|
backend/spirv/emit_spirv_floating_point.cpp
|
||||||
backend/spirv/emit_spirv_integer.cpp
|
backend/spirv/emit_spirv_integer.cpp
|
||||||
backend/spirv/emit_spirv_logical.cpp
|
backend/spirv/emit_spirv_logical.cpp
|
||||||
|
@ -82,6 +83,7 @@ add_library(shader_recompiler STATIC
|
||||||
ir_opt/dead_code_elimination_pass.cpp
|
ir_opt/dead_code_elimination_pass.cpp
|
||||||
ir_opt/global_memory_to_storage_buffer_pass.cpp
|
ir_opt/global_memory_to_storage_buffer_pass.cpp
|
||||||
ir_opt/identity_removal_pass.cpp
|
ir_opt/identity_removal_pass.cpp
|
||||||
|
ir_opt/lower_fp16_to_fp32.cpp
|
||||||
ir_opt/passes.h
|
ir_opt/passes.h
|
||||||
ir_opt/ssa_rewrite_pass.cpp
|
ir_opt/ssa_rewrite_pass.cpp
|
||||||
ir_opt/verification_pass.cpp
|
ir_opt/verification_pass.cpp
|
||||||
|
|
|
@ -30,8 +30,11 @@ EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) {
|
||||||
DefineCommonTypes(program.info);
|
DefineCommonTypes(program.info);
|
||||||
DefineCommonConstants();
|
DefineCommonConstants();
|
||||||
DefineSpecialVariables(program.info);
|
DefineSpecialVariables(program.info);
|
||||||
DefineConstantBuffers(program.info);
|
|
||||||
DefineStorageBuffers(program.info);
|
u32 binding{};
|
||||||
|
DefineConstantBuffers(program.info, binding);
|
||||||
|
DefineStorageBuffers(program.info, binding);
|
||||||
|
|
||||||
DefineLabels(program);
|
DefineLabels(program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,6 +61,12 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
||||||
|
|
||||||
U1 = Name(TypeBool(), "u1");
|
U1 = Name(TypeBool(), "u1");
|
||||||
|
|
||||||
|
// TODO: Conditionally define these
|
||||||
|
AddCapability(spv::Capability::Int16);
|
||||||
|
AddCapability(spv::Capability::Int64);
|
||||||
|
U16 = Name(TypeInt(16, false), "u16");
|
||||||
|
U64 = Name(TypeInt(64, false), "u64");
|
||||||
|
|
||||||
F32.Define(*this, TypeFloat(32), "f32");
|
F32.Define(*this, TypeFloat(32), "f32");
|
||||||
U32.Define(*this, TypeInt(32, false), "u32");
|
U32.Define(*this, TypeInt(32, false), "u32");
|
||||||
|
|
||||||
|
@ -95,12 +104,12 @@ void EmitContext::DefineSpecialVariables(const Info& info) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineConstantBuffers(const Info& info) {
|
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
||||||
if (info.constant_buffer_descriptors.empty()) {
|
if (info.constant_buffer_descriptors.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))};
|
const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))};
|
||||||
Decorate(array_type, spv::Decoration::ArrayStride, 16U);
|
Decorate(array_type, spv::Decoration::ArrayStride, 4U);
|
||||||
|
|
||||||
const Id struct_type{TypeStruct(array_type)};
|
const Id struct_type{TypeStruct(array_type)};
|
||||||
Name(struct_type, "cbuf_block");
|
Name(struct_type, "cbuf_block");
|
||||||
|
@ -111,18 +120,19 @@ void EmitContext::DefineConstantBuffers(const Info& info) {
|
||||||
const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)};
|
const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)};
|
||||||
uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
|
uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
|
||||||
|
|
||||||
u32 binding{};
|
u32 index{};
|
||||||
for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
|
for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
|
||||||
const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
|
const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
|
||||||
Decorate(id, spv::Decoration::Binding, binding);
|
Decorate(id, spv::Decoration::Binding, binding);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
Name(id, fmt::format("c{}", desc.index));
|
Name(id, fmt::format("c{}", desc.index));
|
||||||
std::fill_n(cbufs.data() + desc.index, desc.count, id);
|
std::fill_n(cbufs.data() + desc.index, desc.count, id);
|
||||||
|
index += desc.count;
|
||||||
binding += desc.count;
|
binding += desc.count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineStorageBuffers(const Info& info) {
|
void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
||||||
if (info.storage_buffers_descriptors.empty()) {
|
if (info.storage_buffers_descriptors.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -140,13 +150,14 @@ void EmitContext::DefineStorageBuffers(const Info& info) {
|
||||||
const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
||||||
storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||||
|
|
||||||
u32 binding{};
|
u32 index{};
|
||||||
for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
|
for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
|
||||||
const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
|
const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
|
||||||
Decorate(id, spv::Decoration::Binding, binding);
|
Decorate(id, spv::Decoration::Binding, binding);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
Name(id, fmt::format("ssbo{}", binding));
|
Name(id, fmt::format("ssbo{}", index));
|
||||||
std::fill_n(ssbos.data() + binding, desc.count, id);
|
std::fill_n(ssbos.data() + index, desc.count, id);
|
||||||
|
index += desc.count;
|
||||||
binding += desc.count;
|
binding += desc.count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,8 @@ public:
|
||||||
|
|
||||||
Id void_id{};
|
Id void_id{};
|
||||||
Id U1{};
|
Id U1{};
|
||||||
|
Id U16{};
|
||||||
|
Id U64{};
|
||||||
VectorTypes F32;
|
VectorTypes F32;
|
||||||
VectorTypes U32;
|
VectorTypes U32;
|
||||||
VectorTypes F16;
|
VectorTypes F16;
|
||||||
|
@ -59,8 +61,8 @@ private:
|
||||||
void DefineCommonTypes(const Info& info);
|
void DefineCommonTypes(const Info& info);
|
||||||
void DefineCommonConstants();
|
void DefineCommonConstants();
|
||||||
void DefineSpecialVariables(const Info& info);
|
void DefineSpecialVariables(const Info& info);
|
||||||
void DefineConstantBuffers(const Info& info);
|
void DefineConstantBuffers(const Info& info, u32& binding);
|
||||||
void DefineStorageBuffers(const Info& info);
|
void DefineStorageBuffers(const Info& info, u32& binding);
|
||||||
void DefineLabels(IR::Program& program);
|
void DefineLabels(IR::Program& program);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
||||||
#include "shader_recompiler/frontend/ir/program.h"
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
|
|
||||||
|
#pragma optimize("", off)
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
template <class Func>
|
template <class Func>
|
||||||
|
|
|
@ -79,26 +79,27 @@ void EmitWriteStorageU16(EmitContext& ctx);
|
||||||
void EmitWriteStorageS16(EmitContext& ctx);
|
void EmitWriteStorageS16(EmitContext& ctx);
|
||||||
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value);
|
Id value);
|
||||||
void EmitWriteStorage64(EmitContext& ctx);
|
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
|
Id value);
|
||||||
void EmitWriteStorage128(EmitContext& ctx);
|
void EmitWriteStorage128(EmitContext& ctx);
|
||||||
void EmitCompositeConstructU32x2(EmitContext& ctx);
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
|
||||||
void EmitCompositeConstructU32x3(EmitContext& ctx);
|
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||||
void EmitCompositeConstructU32x4(EmitContext& ctx);
|
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||||
void EmitCompositeExtractU32x2(EmitContext& ctx);
|
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index);
|
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeExtractU32x4(EmitContext& ctx);
|
Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeConstructF16x2(EmitContext& ctx);
|
void EmitCompositeConstructF16x2(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF16x3(EmitContext& ctx);
|
void EmitCompositeConstructF16x3(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF16x4(EmitContext& ctx);
|
void EmitCompositeConstructF16x4(EmitContext& ctx);
|
||||||
void EmitCompositeExtractF16x2(EmitContext& ctx);
|
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeExtractF16x3(EmitContext& ctx);
|
Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeExtractF16x4(EmitContext& ctx);
|
Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeConstructF32x2(EmitContext& ctx);
|
void EmitCompositeConstructF32x2(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF32x3(EmitContext& ctx);
|
void EmitCompositeConstructF32x3(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF32x4(EmitContext& ctx);
|
void EmitCompositeConstructF32x4(EmitContext& ctx);
|
||||||
void EmitCompositeExtractF32x2(EmitContext& ctx);
|
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeExtractF32x3(EmitContext& ctx);
|
Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeExtractF32x4(EmitContext& ctx);
|
Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
void EmitCompositeConstructF64x2(EmitContext& ctx);
|
void EmitCompositeConstructF64x2(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF64x3(EmitContext& ctx);
|
void EmitCompositeConstructF64x3(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF64x4(EmitContext& ctx);
|
void EmitCompositeConstructF64x4(EmitContext& ctx);
|
||||||
|
@ -116,11 +117,13 @@ void EmitBitCastF16U16(EmitContext& ctx);
|
||||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
|
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
|
||||||
void EmitBitCastF64U64(EmitContext& ctx);
|
void EmitBitCastF64U64(EmitContext& ctx);
|
||||||
void EmitPackUint2x32(EmitContext& ctx);
|
void EmitPackUint2x32(EmitContext& ctx);
|
||||||
void EmitUnpackUint2x32(EmitContext& ctx);
|
Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
|
||||||
void EmitPackFloat2x16(EmitContext& ctx);
|
Id EmitPackFloat2x16(EmitContext& ctx, Id value);
|
||||||
void EmitUnpackFloat2x16(EmitContext& ctx);
|
Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
|
||||||
void EmitPackDouble2x32(EmitContext& ctx);
|
Id EmitPackHalf2x16(EmitContext& ctx, Id value);
|
||||||
void EmitUnpackDouble2x32(EmitContext& ctx);
|
Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
|
||||||
|
Id EmitPackDouble2x32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
|
||||||
void EmitGetZeroFromOp(EmitContext& ctx);
|
void EmitGetZeroFromOp(EmitContext& ctx);
|
||||||
void EmitGetSignFromOp(EmitContext& ctx);
|
void EmitGetSignFromOp(EmitContext& ctx);
|
||||||
void EmitGetCarryFromOp(EmitContext& ctx);
|
void EmitGetCarryFromOp(EmitContext& ctx);
|
||||||
|
@ -159,18 +162,18 @@ void EmitFPLog2(EmitContext& ctx);
|
||||||
void EmitFPSaturate16(EmitContext& ctx);
|
void EmitFPSaturate16(EmitContext& ctx);
|
||||||
void EmitFPSaturate32(EmitContext& ctx);
|
void EmitFPSaturate32(EmitContext& ctx);
|
||||||
void EmitFPSaturate64(EmitContext& ctx);
|
void EmitFPSaturate64(EmitContext& ctx);
|
||||||
void EmitFPRoundEven16(EmitContext& ctx);
|
Id EmitFPRoundEven16(EmitContext& ctx, Id value);
|
||||||
void EmitFPRoundEven32(EmitContext& ctx);
|
Id EmitFPRoundEven32(EmitContext& ctx, Id value);
|
||||||
void EmitFPRoundEven64(EmitContext& ctx);
|
Id EmitFPRoundEven64(EmitContext& ctx, Id value);
|
||||||
void EmitFPFloor16(EmitContext& ctx);
|
Id EmitFPFloor16(EmitContext& ctx, Id value);
|
||||||
void EmitFPFloor32(EmitContext& ctx);
|
Id EmitFPFloor32(EmitContext& ctx, Id value);
|
||||||
void EmitFPFloor64(EmitContext& ctx);
|
Id EmitFPFloor64(EmitContext& ctx, Id value);
|
||||||
void EmitFPCeil16(EmitContext& ctx);
|
Id EmitFPCeil16(EmitContext& ctx, Id value);
|
||||||
void EmitFPCeil32(EmitContext& ctx);
|
Id EmitFPCeil32(EmitContext& ctx, Id value);
|
||||||
void EmitFPCeil64(EmitContext& ctx);
|
Id EmitFPCeil64(EmitContext& ctx, Id value);
|
||||||
void EmitFPTrunc16(EmitContext& ctx);
|
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
||||||
void EmitFPTrunc32(EmitContext& ctx);
|
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
||||||
void EmitFPTrunc64(EmitContext& ctx);
|
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
||||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
void EmitIAdd64(EmitContext& ctx);
|
void EmitIAdd64(EmitContext& ctx);
|
||||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||||
|
@ -201,25 +204,25 @@ void EmitLogicalOr(EmitContext& ctx);
|
||||||
void EmitLogicalAnd(EmitContext& ctx);
|
void EmitLogicalAnd(EmitContext& ctx);
|
||||||
void EmitLogicalXor(EmitContext& ctx);
|
void EmitLogicalXor(EmitContext& ctx);
|
||||||
void EmitLogicalNot(EmitContext& ctx);
|
void EmitLogicalNot(EmitContext& ctx);
|
||||||
void EmitConvertS16F16(EmitContext& ctx);
|
Id EmitConvertS16F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS16F32(EmitContext& ctx);
|
Id EmitConvertS16F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS16F64(EmitContext& ctx);
|
Id EmitConvertS16F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS32F16(EmitContext& ctx);
|
Id EmitConvertS32F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS32F32(EmitContext& ctx);
|
Id EmitConvertS32F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS32F64(EmitContext& ctx);
|
Id EmitConvertS32F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS64F16(EmitContext& ctx);
|
Id EmitConvertS64F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS64F32(EmitContext& ctx);
|
Id EmitConvertS64F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertS64F64(EmitContext& ctx);
|
Id EmitConvertS64F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU16F16(EmitContext& ctx);
|
Id EmitConvertU16F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU16F32(EmitContext& ctx);
|
Id EmitConvertU16F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU16F64(EmitContext& ctx);
|
Id EmitConvertU16F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU32F16(EmitContext& ctx);
|
Id EmitConvertU32F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU32F32(EmitContext& ctx);
|
Id EmitConvertU32F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU32F64(EmitContext& ctx);
|
Id EmitConvertU32F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU64F16(EmitContext& ctx);
|
Id EmitConvertU64F16(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU64F32(EmitContext& ctx);
|
Id EmitConvertU64F32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU64F64(EmitContext& ctx);
|
Id EmitConvertU64F64(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU64U32(EmitContext& ctx);
|
Id EmitConvertU64U32(EmitContext& ctx, Id value);
|
||||||
void EmitConvertU32U64(EmitContext& ctx);
|
Id EmitConvertU32U64(EmitContext& ctx, Id value);
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -34,24 +34,32 @@ void EmitPackUint2x32(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitUnpackUint2x32(EmitContext&) {
|
Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpBitcast(ctx.U32[2], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPackFloat2x16(EmitContext&) {
|
Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpBitcast(ctx.U32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitUnpackFloat2x16(EmitContext&) {
|
Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpBitcast(ctx.F16[2], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPackDouble2x32(EmitContext&) {
|
Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpPackHalf2x16(ctx.U32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitUnpackDouble2x32(EmitContext&) {
|
Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpBitcast(ctx.F64[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpBitcast(ctx.U32[2], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -6,28 +6,28 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
void EmitCompositeConstructU32x2(EmitContext&) {
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructU32x3(EmitContext&) {
|
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructU32x4(EmitContext&) {
|
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractU32x2(EmitContext&) {
|
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) {
|
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
|
||||||
return ctx.OpCompositeExtract(ctx.U32[1], vector, index);
|
return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractU32x4(EmitContext&) {
|
Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF16x2(EmitContext&) {
|
void EmitCompositeConstructF16x2(EmitContext&) {
|
||||||
|
@ -42,16 +42,16 @@ void EmitCompositeConstructF16x4(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF16x2(EmitContext&) {
|
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF16x3(EmitContext&) {
|
Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF16x4(EmitContext&) {
|
Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF32x2(EmitContext&) {
|
void EmitCompositeConstructF32x2(EmitContext&) {
|
||||||
|
@ -66,16 +66,16 @@ void EmitCompositeConstructF32x4(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF32x2(EmitContext&) {
|
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF32x3(EmitContext&) {
|
Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF32x4(EmitContext&) {
|
Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF64x2(EmitContext&) {
|
void EmitCompositeConstructF64x2(EmitContext&) {
|
||||||
|
|
89
src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
Normal file
89
src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
|
|
||||||
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
Id EmitConvertS16F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS16F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS16F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS32F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS32F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS32F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS64F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS64F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertS64F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToS(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU16F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU16F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU16F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU32F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU32F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU32F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU64F16(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU64F32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU64F64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpConvertFToU(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU64U32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U64, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitConvertU32U64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Backend::SPIRV
|
|
@ -169,52 +169,52 @@ void EmitFPSaturate64(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPRoundEven16(EmitContext&) {
|
Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpRoundEven(ctx.F16[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPRoundEven32(EmitContext&) {
|
Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpRoundEven(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPRoundEven64(EmitContext&) {
|
Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpRoundEven(ctx.F64[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPFloor16(EmitContext&) {
|
Id EmitFPFloor16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpFloor(ctx.F16[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPFloor32(EmitContext&) {
|
Id EmitFPFloor32(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpFloor(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPFloor64(EmitContext&) {
|
Id EmitFPFloor64(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpFloor(ctx.F64[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPCeil16(EmitContext&) {
|
Id EmitFPCeil16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCeil(ctx.F16[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPCeil32(EmitContext&) {
|
Id EmitFPCeil32(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCeil(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPCeil64(EmitContext&) {
|
Id EmitFPCeil64(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpCeil(ctx.F64[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPTrunc16(EmitContext&) {
|
Id EmitFPTrunc16(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpTrunc(ctx.F16[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPTrunc32(EmitContext&) {
|
Id EmitFPTrunc32(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpTrunc(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPTrunc64(EmitContext&) {
|
Id EmitFPTrunc64(EmitContext& ctx, Id value) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
return ctx.OpTrunc(ctx.F64[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -113,20 +113,4 @@ Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
|
||||||
return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
|
return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitLogicalOr(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitLogicalAnd(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitLogicalXor(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitLogicalNot(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -6,83 +6,19 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
void EmitConvertS16F16(EmitContext&) {
|
void EmitLogicalOr(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitConvertS16F32(EmitContext&) {
|
void EmitLogicalAnd(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitConvertS16F64(EmitContext&) {
|
void EmitLogicalXor(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitConvertS32F16(EmitContext&) {
|
void EmitLogicalNot(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertS32F32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertS32F64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertS64F16(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertS64F32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertS64F64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU16F16(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU16F32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU16F64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU32F16(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU32F32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU32F64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU64F16(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU64F32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU64F64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU64U32(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitConvertU32U64(EmitContext&) {
|
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -94,8 +94,7 @@ void EmitLoadStorageS16(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding,
|
Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
|
||||||
const IR::Value& offset) {
|
|
||||||
if (!binding.IsImmediate()) {
|
if (!binding.IsImmediate()) {
|
||||||
throw NotImplementedException("Dynamic storage buffer indexing");
|
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||||
}
|
}
|
||||||
|
@ -129,8 +128,8 @@ void EmitWriteStorageS16(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
throw NotImplementedException("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
|
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
const IR::Value& offset, Id value) {
|
Id value) {
|
||||||
if (!binding.IsImmediate()) {
|
if (!binding.IsImmediate()) {
|
||||||
throw NotImplementedException("Dynamic storage buffer indexing");
|
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||||
}
|
}
|
||||||
|
@ -140,8 +139,19 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorage64(EmitContext&) {
|
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
Id value) {
|
||||||
|
if (!binding.IsImmediate()) {
|
||||||
|
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||||
|
}
|
||||||
|
// TODO: Support reinterpreting bindings, guaranteed to be aligned
|
||||||
|
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||||
|
const Id low_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||||
|
const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))};
|
||||||
|
const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)};
|
||||||
|
const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)};
|
||||||
|
ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
|
ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorage128(EmitContext&) {
|
void EmitWriteStorage128(EmitContext&) {
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <compare>
|
#include <compare>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
|
|
@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) {
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
|
F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<F16>(Opcode::FPSaturate16, value);
|
return Inst<F16>(Opcode::FPSaturate16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<F32>(Opcode::FPSaturate32, value);
|
return Inst<F32>(Opcode::FPSaturate32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<F64>(Opcode::FPSaturate64, value);
|
return Inst<F64>(Opcode::FPSaturate64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
|
F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<F16>(Opcode::FPRoundEven16, value);
|
return Inst<F16>(Opcode::FPRoundEven16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<F32>(Opcode::FPRoundEven32, value);
|
return Inst<F32>(Opcode::FPRoundEven32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<F64>(Opcode::FPRoundEven64, value);
|
return Inst<F64>(Opcode::FPRoundEven64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
|
F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<F16>(Opcode::FPFloor16, value);
|
return Inst<F16>(Opcode::FPFloor16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<F32>(Opcode::FPFloor32, value);
|
return Inst<F32>(Opcode::FPFloor32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<F64>(Opcode::FPFloor64, value);
|
return Inst<F64>(Opcode::FPFloor64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
|
F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<F16>(Opcode::FPCeil16, value);
|
return Inst<F16>(Opcode::FPCeil16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<F32>(Opcode::FPCeil32, value);
|
return Inst<F32>(Opcode::FPCeil32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<F64>(Opcode::FPCeil64, value);
|
return Inst<F64>(Opcode::FPCeil64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) {
|
F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<F16>(Opcode::FPTrunc16, value);
|
return Inst<F16>(Opcode::FPTrunc16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<F32>(Opcode::FPTrunc32, value);
|
return Inst<F32>(Opcode::FPTrunc32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<F64>(Opcode::FPTrunc64, value);
|
return Inst<F64>(Opcode::FPTrunc64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 16:
|
case 16:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U32>(Opcode::ConvertS16F16, value);
|
return Inst<U32>(Opcode::ConvertS16F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U32>(Opcode::ConvertS16F32, value);
|
return Inst<U32>(Opcode::ConvertS16F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U32>(Opcode::ConvertS16F64, value);
|
return Inst<U32>(Opcode::ConvertS16F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
}
|
}
|
||||||
case 32:
|
case 32:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U32>(Opcode::ConvertS32F16, value);
|
return Inst<U32>(Opcode::ConvertS32F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U32>(Opcode::ConvertS32F32, value);
|
return Inst<U32>(Opcode::ConvertS32F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U32>(Opcode::ConvertS32F64, value);
|
return Inst<U32>(Opcode::ConvertS32F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
}
|
}
|
||||||
case 64:
|
case 64:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U64>(Opcode::ConvertS64F16, value);
|
return Inst<U64>(Opcode::ConvertS64F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U64>(Opcode::ConvertS64F32, value);
|
return Inst<U64>(Opcode::ConvertS64F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U64>(Opcode::ConvertS64F64, value);
|
return Inst<U64>(Opcode::ConvertS64F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 16:
|
case 16:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U32>(Opcode::ConvertU16F16, value);
|
return Inst<U32>(Opcode::ConvertU16F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U32>(Opcode::ConvertU16F32, value);
|
return Inst<U32>(Opcode::ConvertU16F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U32>(Opcode::ConvertU16F64, value);
|
return Inst<U32>(Opcode::ConvertU16F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
}
|
}
|
||||||
case 32:
|
case 32:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U32>(Opcode::ConvertU32F16, value);
|
return Inst<U32>(Opcode::ConvertU32F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U32>(Opcode::ConvertU32F32, value);
|
return Inst<U32>(Opcode::ConvertU32F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U32>(Opcode::ConvertU32F64, value);
|
return Inst<U32>(Opcode::ConvertU32F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
}
|
}
|
||||||
case 64:
|
case 64:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U16:
|
case Type::F16:
|
||||||
return Inst<U64>(Opcode::ConvertU64F16, value);
|
return Inst<U64>(Opcode::ConvertU64F16, value);
|
||||||
case Type::U32:
|
case Type::F32:
|
||||||
return Inst<U64>(Opcode::ConvertU64F32, value);
|
return Inst<U64>(Opcode::ConvertU64F32, value);
|
||||||
case Type::U64:
|
case Type::F64:
|
||||||
return Inst<U64>(Opcode::ConvertU64F64, value);
|
return Inst<U64>(Opcode::ConvertU64F64, value);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(value.Type());
|
ThrowInvalidType(value.Type());
|
||||||
|
@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
|
||||||
case 64:
|
case 64:
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
|
return Inst<U64>(Opcode::ConvertU64U32, value);
|
||||||
|
case Type::U64:
|
||||||
// Nothing to do
|
// Nothing to do
|
||||||
return value;
|
return value;
|
||||||
case Type::U64:
|
|
||||||
return Inst<U64>(Opcode::ConvertU64U32, value);
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Inst::ReplaceOpcode(IR::Opcode opcode) {
|
||||||
|
op = opcode;
|
||||||
|
}
|
||||||
|
|
||||||
void Inst::Use(const Value& value) {
|
void Inst::Use(const Value& value) {
|
||||||
Inst* const inst{value.Inst()};
|
Inst* const inst{value.Inst()};
|
||||||
++inst->use_count;
|
++inst->use_count;
|
||||||
|
|
|
@ -86,6 +86,8 @@ public:
|
||||||
|
|
||||||
void ReplaceUsesWith(Value replacement);
|
void ReplaceUsesWith(Value replacement);
|
||||||
|
|
||||||
|
void ReplaceOpcode(IR::Opcode opcode);
|
||||||
|
|
||||||
template <typename FlagsType>
|
template <typename FlagsType>
|
||||||
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
||||||
[[nodiscard]] FlagsType Flags() const noexcept {
|
[[nodiscard]] FlagsType Flags() const noexcept {
|
||||||
|
|
|
@ -119,8 +119,10 @@ OPCODE(PackUint2x32, U64, U32x
|
||||||
OPCODE(UnpackUint2x32, U32x2, U64, )
|
OPCODE(UnpackUint2x32, U32x2, U64, )
|
||||||
OPCODE(PackFloat2x16, U32, F16x2, )
|
OPCODE(PackFloat2x16, U32, F16x2, )
|
||||||
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
||||||
OPCODE(PackDouble2x32, U64, U32x2, )
|
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||||
OPCODE(UnpackDouble2x32, U32x2, U64, )
|
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||||
|
OPCODE(PackDouble2x32, F64, U32x2, )
|
||||||
|
OPCODE(UnpackDouble2x32, U32x2, F64, )
|
||||||
|
|
||||||
// Pseudo-operation, handled specially at final emit
|
// Pseudo-operation, handled specially at final emit
|
||||||
OPCODE(GetZeroFromOp, U1, Opaque, )
|
OPCODE(GetZeroFromOp, U1, Opaque, )
|
||||||
|
|
|
@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
.post_order_blocks{},
|
.post_order_blocks{},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Optimization::LowerFp16ToFp32(program);
|
||||||
for (IR::Function& function : functions) {
|
for (IR::Function& function : functions) {
|
||||||
function.post_order_blocks = PostOrder(function.blocks);
|
function.post_order_blocks = PostOrder(function.blocks);
|
||||||
Optimization::SsaRewritePass(function.post_order_blocks);
|
Optimization::SsaRewritePass(function.post_order_blocks);
|
||||||
|
@ -69,6 +70,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
Optimization::VerificationPass(function);
|
Optimization::VerificationPass(function);
|
||||||
}
|
}
|
||||||
Optimization::CollectShaderInfoPass(program);
|
Optimization::CollectShaderInfoPass(program);
|
||||||
|
|
||||||
fmt::print(stdout, "{}\n", IR::DumpProgram(program));
|
fmt::print(stdout, "{}\n", IR::DumpProgram(program));
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ union F2I {
|
||||||
BitField<8, 2, DestFormat> dest_format;
|
BitField<8, 2, DestFormat> dest_format;
|
||||||
BitField<10, 2, SrcFormat> src_format;
|
BitField<10, 2, SrcFormat> src_format;
|
||||||
BitField<12, 1, u64> is_signed;
|
BitField<12, 1, u64> is_signed;
|
||||||
BitField<39, 1, Rounding> rounding;
|
BitField<39, 2, Rounding> rounding;
|
||||||
BitField<49, 1, u64> half;
|
BitField<49, 1, u64> half;
|
||||||
BitField<44, 1, u64> ftz;
|
BitField<44, 1, u64> ftz;
|
||||||
BitField<45, 1, u64> abs;
|
BitField<45, 1, u64> abs;
|
||||||
|
@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
|
||||||
|
union {
|
||||||
|
u64 raw;
|
||||||
|
BitField<20, 14, s64> offset;
|
||||||
|
BitField<34, 5, u64> binding;
|
||||||
|
} const cbuf{insn};
|
||||||
|
if (cbuf.binding >= 18) {
|
||||||
|
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
|
||||||
|
}
|
||||||
|
if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
|
||||||
|
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
|
||||||
|
}
|
||||||
|
if (cbuf.offset % 2 != 0) {
|
||||||
|
throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
|
||||||
|
}
|
||||||
|
const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
|
||||||
|
const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
|
||||||
|
const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
|
||||||
|
const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
|
||||||
|
return v.ir.PackDouble2x32(vector);
|
||||||
|
}
|
||||||
|
|
||||||
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
// F2I is used to convert from a floating point value to an integer
|
// F2I is used to convert from a floating point value to an integer
|
||||||
const F2I f2i{insn};
|
const F2I f2i{insn};
|
||||||
|
@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
const size_t bitsize{BitSize(f2i.dest_format)};
|
const size_t bitsize{BitSize(f2i.dest_format)};
|
||||||
const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)};
|
const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)};
|
||||||
|
|
||||||
|
if (bitsize == 64) {
|
||||||
|
const IR::Value vector{v.ir.UnpackUint2x32(result)};
|
||||||
|
v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)});
|
||||||
|
v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)});
|
||||||
|
} else {
|
||||||
v.X(f2i.dest_reg, result);
|
v.X(f2i.dest_reg, result);
|
||||||
|
}
|
||||||
|
|
||||||
if (f2i.cc != 0) {
|
if (f2i.cc != 0) {
|
||||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
throw NotImplementedException("F2I CC");
|
||||||
if (is_signed) {
|
|
||||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
|
||||||
} else {
|
|
||||||
v.ResetSFlag();
|
|
||||||
}
|
|
||||||
v.ResetCFlag();
|
|
||||||
|
|
||||||
// TODO: Investigate if out of bound conversions sets the overflow flag
|
|
||||||
v.ResetOFlag();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) {
|
||||||
f2i.base.src_format.Value());
|
f2i.base.src_format.Value());
|
||||||
}
|
}
|
||||||
}()};
|
}()};
|
||||||
|
|
||||||
TranslateF2I(*this, insn, op_a);
|
TranslateF2I(*this, insn, op_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TranslatorVisitor::F2I_cbuf(u64) {
|
void TranslatorVisitor::F2I_cbuf(u64 insn) {
|
||||||
throw NotImplementedException("{}", Opcode::F2I_cbuf);
|
const F2I f2i{insn};
|
||||||
|
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
|
||||||
|
switch (f2i.src_format) {
|
||||||
|
case SrcFormat::F16:
|
||||||
|
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
|
||||||
|
case SrcFormat::F32:
|
||||||
|
return GetCbufF(insn);
|
||||||
|
case SrcFormat::F64: {
|
||||||
|
return UnpackCbuf(*this, insn);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
|
||||||
|
}
|
||||||
|
}()};
|
||||||
|
TranslateF2I(*this, insn, op_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TranslatorVisitor::F2I_imm(u64) {
|
void TranslatorVisitor::F2I_imm(u64) {
|
||||||
|
|
|
@ -11,7 +11,7 @@ namespace Shader::Maxwell {
|
||||||
|
|
||||||
class TranslatorVisitor {
|
class TranslatorVisitor {
|
||||||
public:
|
public:
|
||||||
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {}
|
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
|
||||||
|
|
||||||
Environment& env;
|
Environment& env;
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
|
|
|
@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||||
case IR::Opcode::LocalInvocationId:
|
case IR::Opcode::LocalInvocationId:
|
||||||
info.uses_local_invocation_id = true;
|
info.uses_local_invocation_id = true;
|
||||||
break;
|
break;
|
||||||
|
case IR::Opcode::CompositeConstructF16x2:
|
||||||
|
case IR::Opcode::CompositeConstructF16x3:
|
||||||
|
case IR::Opcode::CompositeConstructF16x4:
|
||||||
|
case IR::Opcode::CompositeExtractF16x2:
|
||||||
|
case IR::Opcode::CompositeExtractF16x3:
|
||||||
|
case IR::Opcode::CompositeExtractF16x4:
|
||||||
|
case IR::Opcode::BitCastU16F16:
|
||||||
|
case IR::Opcode::BitCastF16U16:
|
||||||
|
case IR::Opcode::PackFloat2x16:
|
||||||
|
case IR::Opcode::UnpackFloat2x16:
|
||||||
|
case IR::Opcode::ConvertS16F16:
|
||||||
|
case IR::Opcode::ConvertS32F16:
|
||||||
|
case IR::Opcode::ConvertS64F16:
|
||||||
|
case IR::Opcode::ConvertU16F16:
|
||||||
|
case IR::Opcode::ConvertU32F16:
|
||||||
|
case IR::Opcode::ConvertU64F16:
|
||||||
case IR::Opcode::FPAbs16:
|
case IR::Opcode::FPAbs16:
|
||||||
case IR::Opcode::FPAdd16:
|
case IR::Opcode::FPAdd16:
|
||||||
case IR::Opcode::FPCeil16:
|
case IR::Opcode::FPCeil16:
|
||||||
|
@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||||
case IR::Opcode::FPRoundEven16:
|
case IR::Opcode::FPRoundEven16:
|
||||||
case IR::Opcode::FPSaturate16:
|
case IR::Opcode::FPSaturate16:
|
||||||
case IR::Opcode::FPTrunc16:
|
case IR::Opcode::FPTrunc16:
|
||||||
info.uses_fp16;
|
info.uses_fp16 = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::FPAbs64:
|
case IR::Opcode::FPAbs64:
|
||||||
case IR::Opcode::FPAdd64:
|
case IR::Opcode::FPAdd64:
|
||||||
|
|
|
@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) {
|
||||||
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
/*
|
/*
|
||||||
* We are looking for this pattern:
|
* We are looking for this pattern:
|
||||||
* %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1)
|
* %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
|
||||||
* %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1)
|
* %rhs_mul = IMul32 %rhs_bfe, %factor_b
|
||||||
* %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1)
|
* %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
|
||||||
* %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1)
|
* %rhs_mul = IMul32 %lhs_bfe, %factor_b
|
||||||
* %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1)
|
* %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
|
||||||
* %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10)
|
* %result = IAdd32 %lhs_shl, %rhs_mul
|
||||||
*
|
*
|
||||||
* And replacing it with
|
* And replacing it with
|
||||||
* %result = IMul32 %factor_a, %factor_b
|
* %result = IMul32 %factor_a, %factor_b
|
||||||
|
|
79
src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
Normal file
79
src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
||||||
|
#include "shader_recompiler/ir_opt/passes.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
namespace {
|
||||||
|
IR::Opcode Replace(IR::Opcode op) {
|
||||||
|
switch (op) {
|
||||||
|
case IR::Opcode::FPAbs16:
|
||||||
|
return IR::Opcode::FPAbs32;
|
||||||
|
case IR::Opcode::FPAdd16:
|
||||||
|
return IR::Opcode::FPAdd32;
|
||||||
|
case IR::Opcode::FPCeil16:
|
||||||
|
return IR::Opcode::FPCeil32;
|
||||||
|
case IR::Opcode::FPFloor16:
|
||||||
|
return IR::Opcode::FPFloor32;
|
||||||
|
case IR::Opcode::FPFma16:
|
||||||
|
return IR::Opcode::FPFma32;
|
||||||
|
case IR::Opcode::FPMul16:
|
||||||
|
return IR::Opcode::FPMul32;
|
||||||
|
case IR::Opcode::FPNeg16:
|
||||||
|
return IR::Opcode::FPNeg32;
|
||||||
|
case IR::Opcode::FPRoundEven16:
|
||||||
|
return IR::Opcode::FPRoundEven32;
|
||||||
|
case IR::Opcode::FPSaturate16:
|
||||||
|
return IR::Opcode::FPSaturate32;
|
||||||
|
case IR::Opcode::FPTrunc16:
|
||||||
|
return IR::Opcode::FPTrunc32;
|
||||||
|
case IR::Opcode::CompositeConstructF16x2:
|
||||||
|
return IR::Opcode::CompositeConstructF32x2;
|
||||||
|
case IR::Opcode::CompositeConstructF16x3:
|
||||||
|
return IR::Opcode::CompositeConstructF32x3;
|
||||||
|
case IR::Opcode::CompositeConstructF16x4:
|
||||||
|
return IR::Opcode::CompositeConstructF32x4;
|
||||||
|
case IR::Opcode::CompositeExtractF16x2:
|
||||||
|
return IR::Opcode::CompositeExtractF32x2;
|
||||||
|
case IR::Opcode::CompositeExtractF16x3:
|
||||||
|
return IR::Opcode::CompositeExtractF32x3;
|
||||||
|
case IR::Opcode::CompositeExtractF16x4:
|
||||||
|
return IR::Opcode::CompositeExtractF32x4;
|
||||||
|
case IR::Opcode::ConvertS16F16:
|
||||||
|
return IR::Opcode::ConvertS16F32;
|
||||||
|
case IR::Opcode::ConvertS32F16:
|
||||||
|
return IR::Opcode::ConvertS32F32;
|
||||||
|
case IR::Opcode::ConvertS64F16:
|
||||||
|
return IR::Opcode::ConvertS64F32;
|
||||||
|
case IR::Opcode::ConvertU16F16:
|
||||||
|
return IR::Opcode::ConvertU16F32;
|
||||||
|
case IR::Opcode::ConvertU32F16:
|
||||||
|
return IR::Opcode::ConvertU32F32;
|
||||||
|
case IR::Opcode::ConvertU64F16:
|
||||||
|
return IR::Opcode::ConvertU64F32;
|
||||||
|
case IR::Opcode::PackFloat2x16:
|
||||||
|
return IR::Opcode::PackHalf2x16;
|
||||||
|
case IR::Opcode::UnpackFloat2x16:
|
||||||
|
return IR::Opcode::UnpackHalf2x16;
|
||||||
|
default:
|
||||||
|
return op;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
void LowerFp16ToFp32(IR::Program& program) {
|
||||||
|
for (IR::Function& function : program.functions) {
|
||||||
|
for (IR::Block* const block : function.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
inst.ReplaceOpcode(Replace(inst.Opcode()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
|
@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block);
|
||||||
void DeadCodeEliminationPass(IR::Block& block);
|
void DeadCodeEliminationPass(IR::Block& block);
|
||||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||||
void IdentityRemovalPass(IR::Function& function);
|
void IdentityRemovalPass(IR::Function& function);
|
||||||
|
void LowerFp16ToFp32(IR::Program& program);
|
||||||
void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
|
void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
|
||||||
void VerificationPass(const IR::Function& function);
|
void VerificationPass(const IR::Function& function);
|
||||||
|
|
||||||
|
|
|
@ -67,8 +67,8 @@ int main() {
|
||||||
ObjectPool<IR::Inst> inst_pool;
|
ObjectPool<IR::Inst> inst_pool;
|
||||||
ObjectPool<IR::Block> block_pool;
|
ObjectPool<IR::Block> block_pool;
|
||||||
|
|
||||||
FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
|
// FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
|
||||||
// FileEnvironment env{"D:\\Shaders\\shader.bin"};
|
FileEnvironment env{"D:\\Shaders\\shader.bin"};
|
||||||
block_pool.ReleaseContents();
|
block_pool.ReleaseContents();
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
flow_block_pool.ReleaseContents();
|
flow_block_pool.ReleaseContents();
|
||||||
|
@ -76,5 +76,9 @@ int main() {
|
||||||
fmt::print(stdout, "{}\n", cfg.Dot());
|
fmt::print(stdout, "{}\n", cfg.Dot());
|
||||||
IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)};
|
IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)};
|
||||||
fmt::print(stdout, "{}\n", IR::DumpProgram(program));
|
fmt::print(stdout, "{}\n", IR::DumpProgram(program));
|
||||||
void(Backend::SPIRV::EmitSPIRV(env, program));
|
const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(env, program)};
|
||||||
|
std::FILE* const file{std::fopen("D:\\shader.spv", "wb")};
|
||||||
|
std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file);
|
||||||
|
std::fclose(file);
|
||||||
|
std::system("spirv-dis D:\\shader.spv");
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) {
|
requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
|
||||||
return std::construct_at(Memory(), std::forward<Args>(args)...);
|
return std::construct_at(Memory(), std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
|
||||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||||
.pCode = code.data(),
|
.pCode = code.data(),
|
||||||
});
|
});
|
||||||
|
/*
|
||||||
|
FIXME
|
||||||
pipeline = device.GetLogical().CreateComputePipeline({
|
pipeline = device.GetLogical().CreateComputePipeline({
|
||||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
|
@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
|
||||||
.basePipelineHandle = nullptr,
|
.basePipelineHandle = nullptr,
|
||||||
.basePipelineIndex = 0,
|
.basePipelineIndex = 0,
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
VKComputePass::~VKComputePass() = default;
|
VKComputePass::~VKComputePass() = default;
|
||||||
|
|
|
@ -31,8 +31,6 @@
|
||||||
#include "video_core/vulkan_common/vulkan_device.h"
|
#include "video_core/vulkan_common/vulkan_device.h"
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
#pragma optimize("", off)
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||||
|
|
||||||
|
@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
|
||||||
// TODO: Load from cache
|
// TODO: Load from cache
|
||||||
}
|
}
|
||||||
const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
|
const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
|
||||||
|
|
||||||
|
FILE* file = fopen("D:\\shader.spv", "wb");
|
||||||
|
fwrite(code.data(), 4, code.size(), file);
|
||||||
|
fclose(file);
|
||||||
|
std::system("spirv-dis D:\\shader.spv");
|
||||||
|
|
||||||
shader_info->unique_hash = env.ComputeHash();
|
shader_info->unique_hash = env.ComputeHash();
|
||||||
shader_info->size_bytes = env.ShaderSize();
|
shader_info->size_bytes = env.ShaderSize();
|
||||||
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
|
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
|
||||||
|
|
|
@ -36,8 +36,6 @@
|
||||||
#include "video_core/vulkan_common/vulkan_device.h"
|
#include "video_core/vulkan_common/vulkan_device.h"
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
#pragma optimize("", off)
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
|
@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
.shaderStorageImageArrayDynamicIndexing = false,
|
.shaderStorageImageArrayDynamicIndexing = false,
|
||||||
.shaderClipDistance = false,
|
.shaderClipDistance = false,
|
||||||
.shaderCullDistance = false,
|
.shaderCullDistance = false,
|
||||||
.shaderFloat64 = false,
|
.shaderFloat64 = true,
|
||||||
.shaderInt64 = false,
|
.shaderInt64 = true,
|
||||||
.shaderInt16 = false,
|
.shaderInt16 = true,
|
||||||
.shaderResourceResidency = false,
|
.shaderResourceResidency = false,
|
||||||
.shaderResourceMinLod = false,
|
.shaderResourceMinLod = false,
|
||||||
.sparseBinding = false,
|
.sparseBinding = false,
|
||||||
|
@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
}
|
}
|
||||||
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
||||||
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
|
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
|
||||||
LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
|
// LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
|
||||||
is_float16_supported = false;
|
// is_float16_supported = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
graphics_queue = logical.GetQueue(graphics_family);
|
graphics_queue = logical.GetQueue(graphics_family);
|
||||||
|
|
Loading…
Reference in a new issue