mirror of
https://git.suyu.dev/suyu/suyu
synced 2024-12-25 10:52:48 -06:00
fix: some errors in msl backend
This commit is contained in:
parent
7920249ed1
commit
055112b739
13 changed files with 122 additions and 176 deletions
|
@ -174,30 +174,18 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsPreciseType(MslVarType type) {
|
||||
switch (type) {
|
||||
case MslVarType::PrecF32:
|
||||
case MslVarType::PrecF64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void DefineVariables(const EmitContext& ctx, std::string& header) {
|
||||
for (u32 i = 0; i < static_cast<u32>(MslVarType::Void); ++i) {
|
||||
const auto type{static_cast<MslVarType>(i)};
|
||||
const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
|
||||
const auto type_name{ctx.var_alloc.GetMslType(type)};
|
||||
const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
|
||||
const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
|
||||
// Temps/return types that are never used are stored at index 0
|
||||
if (tracker.uses_temp) {
|
||||
header += fmt::format("{}{} t{}={}(0);", precise, type_name,
|
||||
ctx.var_alloc.Representation(0, type), type_name);
|
||||
header += fmt::format("{} t{}={}(0);", type_name, ctx.var_alloc.Representation(0, type),
|
||||
type_name);
|
||||
}
|
||||
for (u32 index = 0; index < tracker.num_used; ++index) {
|
||||
header += fmt::format("{}{} {}={}(0);", precise, type_name,
|
||||
header += fmt::format("{} {}={}(0);", type_name,
|
||||
ctx.var_alloc.Representation(index, type), type_name);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi
|
|||
ctx.var_alloc.Consume(offset))};
|
||||
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)};
|
||||
ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
|
||||
ctx.AddF32("{}=utof({});", inst, ret);
|
||||
ctx.AddF32("{}=as_type<float>({});", inst, ret);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi
|
|||
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
|
||||
std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
|
||||
ctx.AddU64("{}=packUint2x32(uint2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
|
||||
pointer_offset);
|
||||
ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
|
||||
pointer_offset, value, pointer_offset, value);
|
||||
|
@ -107,7 +107,7 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
|
|||
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
|
||||
std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
|
||||
ctx.AddU32x2("{}=uint2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
|
||||
ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value);
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val
|
|||
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
|
||||
|
@ -189,9 +189,9 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset));
|
||||
ctx.AddU64("{}=packInt2x32(int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||
"{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
|
||||
");}}",
|
||||
|
@ -202,7 +202,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||
|
@ -214,9 +214,9 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset));
|
||||
ctx.AddU64("{}=packInt2x32(int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||
"{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
|
||||
");}}",
|
||||
|
@ -227,7 +227,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int "
|
||||
|
@ -240,7 +240,7 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
ctx.AddU64(
|
||||
"{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
|
||||
"{}=packUint2x32(uint2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
|
||||
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
|
@ -248,7 +248,7 @@ void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
|
|||
|
||||
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
|
||||
ctx.AddU64("{}=packUint2x32(uint2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
|
||||
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
|
@ -257,7 +257,7 @@ void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& bi
|
|||
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
ctx.AddU64(
|
||||
"{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
|
||||
"{}=packUint2x32(uint2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
|
||||
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
|
@ -265,7 +265,7 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
|
|||
|
||||
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
|
||||
ctx.AddU64("{}=packUint2x32(uint2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
|
||||
"atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
|
@ -274,7 +274,7 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
|
|||
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(),
|
||||
|
@ -285,7 +285,7 @@ void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int "
|
||||
|
@ -297,7 +297,7 @@ void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||
|
@ -309,7 +309,7 @@ void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int "
|
||||
|
@ -321,7 +321,7 @@ void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}",
|
||||
|
@ -332,7 +332,7 @@ void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
ctx.AddU32x2("{}=uint2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
@ -340,7 +340,7 @@ void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
ctx.AddU32x2("{}=uint2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
@ -348,7 +348,7 @@ void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
ctx.AddU32x2("{}=uint2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
@ -356,7 +356,7 @@ void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
|||
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
|
||||
ctx.AddU32x2("{}=uint2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
|
||||
"1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
|
|
|
@ -41,7 +41,7 @@ void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
|
|||
}
|
||||
|
||||
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||
ctx.AddU32("{}=ftou({});", inst, value);
|
||||
ctx.AddU32("{}=as_type<uint>({});", inst, value);
|
||||
}
|
||||
|
||||
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||
|
@ -53,7 +53,7 @@ void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
|
|||
}
|
||||
|
||||
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||
ctx.AddF32("{}=utof({});", inst, value);
|
||||
ctx.AddF32("{}=as_type<float>({});", inst, value);
|
||||
}
|
||||
|
||||
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||
|
|
|
@ -23,17 +23,17 @@ void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view
|
|||
|
||||
void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2) {
|
||||
ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
|
||||
ctx.AddU32x2("{}=uint2({},{});", inst, e1, e2);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2, std::string_view e3) {
|
||||
ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
|
||||
ctx.AddU32x3("{}=uint3({},{},{});", inst, e1, e2, e3);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2, std::string_view e3, std::string_view e4) {
|
||||
ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
|
||||
ctx.AddU32x4("{}=uint4({},{},{},{});", inst, e1, e2, e3, e4);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
|
||||
|
@ -131,17 +131,17 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
|
|||
|
||||
void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2) {
|
||||
ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
|
||||
ctx.AddF32x2("{}=float2({},{});", inst, e1, e2);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2, std::string_view e3) {
|
||||
ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
|
||||
ctx.AddF32x3("{}=float3({},{},{});", inst, e1, e2, e3);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
|
||||
std::string_view e2, std::string_view e3, std::string_view e4) {
|
||||
ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
|
||||
ctx.AddF32x4("{}=float4({},{},{},{});", inst, e1, e2, e3, e4);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
|
||||
|
|
|
@ -111,45 +111,45 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
|
|||
|
||||
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
|
||||
GetCbuf8(ctx, inst, binding, offset, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "as_type<int>"};
|
||||
GetCbuf8(ctx, inst, binding, offset, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
|
||||
GetCbuf16(ctx, inst, binding, offset, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "as_type<int>"};
|
||||
GetCbuf16(ctx, inst, binding, offset, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
|
||||
GetCbuf(ctx, ret, binding, offset, 32, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32)};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "as_type<float>" : ""};
|
||||
GetCbuf(ctx, ret, binding, offset, 32, cast);
|
||||
}
|
||||
|
||||
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
|
||||
if (offset.IsImmediate()) {
|
||||
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
||||
static constexpr u32 cbuf_size{0x10000};
|
||||
|
@ -157,14 +157,14 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
|||
const s32 signed_offset{static_cast<s32>(offset.U32())};
|
||||
if (signed_offset < 0 || u32_offset > cbuf_size) {
|
||||
LOG_WARNING(Shader_MSL, "Immediate constant buffer offset is out of bounds");
|
||||
ctx.AddU32x2("{}=uvec2(0u);", inst);
|
||||
ctx.AddU32x2("{}=uint2(0u);", inst);
|
||||
return;
|
||||
}
|
||||
if (u32_offset % 2 == 0) {
|
||||
ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
|
||||
OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
|
||||
} else {
|
||||
ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
|
||||
ctx.AddU32x2("{}=uint2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
|
||||
u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
|
||||
(u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
|
||||
}
|
||||
|
@ -173,14 +173,14 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
|||
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
||||
const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
|
||||
if (!ctx.profile.has_gl_component_indexing_bug) {
|
||||
ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
|
||||
ctx.AddU32x2("{}=uint2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
|
||||
offset_var, cast, cbuf, offset_var);
|
||||
return;
|
||||
}
|
||||
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x2)};
|
||||
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
|
||||
ctx.Add("if(({}&3)=={}){}=uint2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
|
||||
cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
|
||||
}
|
||||
}
|
||||
|
@ -199,23 +199,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
|||
}
|
||||
return;
|
||||
}
|
||||
ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
|
||||
ctx.AddF32("{}=__in.attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
|
||||
return;
|
||||
}
|
||||
switch (attr) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_PrimitiveID);", inst);
|
||||
break;
|
||||
case IR::Attribute::Layer:
|
||||
ctx.AddF32("{}=itof(gl_Layer);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_Layer);", inst);
|
||||
break;
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW: {
|
||||
const bool is_array{IsInputArray(ctx.stage)};
|
||||
const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
|
||||
ctx.AddF32("{}={}{}.{};", inst, input_decorator, "__out.position", swizzle);
|
||||
ctx.AddF32("{}={}.{};", inst, "__out.position", swizzle);
|
||||
break;
|
||||
}
|
||||
case IR::Attribute::PointSpriteS:
|
||||
|
@ -227,22 +225,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
|||
ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
|
||||
break;
|
||||
case IR::Attribute::InstanceId:
|
||||
ctx.AddF32("{}=itof(gl_InstanceID);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_InstanceID);", inst);
|
||||
break;
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.AddF32("{}=itof(gl_VertexID);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_VertexID);", inst);
|
||||
break;
|
||||
case IR::Attribute::FrontFace:
|
||||
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_FrontFacing?-1:0);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddF32("{}=itof(gl_DrawID);", inst);
|
||||
ctx.AddF32("{}=as_type<float>(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get attribute {}", attr);
|
||||
|
@ -299,7 +297,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
|
|||
"viewport layer extension");
|
||||
break;
|
||||
}
|
||||
ctx.Add("gl_Layer=ftoi({});", value);
|
||||
ctx.Add("gl_Layer=as_type<int>({});", value);
|
||||
break;
|
||||
case IR::Attribute::ViewportIndex:
|
||||
if (ctx.stage != Stage::Geometry &&
|
||||
|
@ -308,7 +306,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
|
|||
"viewport layer extension");
|
||||
break;
|
||||
}
|
||||
ctx.Add("gl_ViewportIndex=ftoi({});", value);
|
||||
ctx.Add("gl_ViewportIndex=as_type<int>({});", value);
|
||||
break;
|
||||
case IR::Attribute::ViewportMask:
|
||||
if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
|
||||
|
@ -317,7 +315,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
|
|||
"Shader stores viewport mask but device does not support viewport mask extension");
|
||||
break;
|
||||
}
|
||||
ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
|
||||
ctx.Add("gl_ViewportMask[0]=as_type<int>({});", value);
|
||||
break;
|
||||
case IR::Attribute::PointSize:
|
||||
ctx.Add("gl_PointSize={};", value);
|
||||
|
|
|
@ -256,7 +256,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
|||
const auto texture{Texture(ctx, info, index)};
|
||||
const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
|
||||
const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
|
||||
const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
|
||||
const auto cast{needs_shadow_ext ? "float4" : "float3"};
|
||||
const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
|
||||
ctx.stage != Stage::Fragment && needs_shadow_ext};
|
||||
if (use_grad) {
|
||||
|
@ -267,7 +267,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
|||
ctx.AddF32("{}=0.0f;", inst);
|
||||
return;
|
||||
}
|
||||
const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
|
||||
const auto d_cast{info.type == TextureType::ColorArray2D ? "float2" : "float3"};
|
||||
ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
|
||||
d_cast, d_cast);
|
||||
return;
|
||||
|
@ -284,7 +284,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
|||
} else {
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
if (info.type == TextureType::ColorArrayCube) {
|
||||
ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
|
||||
ctx.AddF32("{}=texture({},float4({}),{});", inst, texture, coords, dref);
|
||||
} else {
|
||||
ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
|
||||
}
|
||||
|
@ -311,7 +311,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
|||
const auto texture{Texture(ctx, info, index)};
|
||||
const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
|
||||
const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
|
||||
const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
|
||||
const auto cast{needs_shadow_ext ? "float3" : "float3"};
|
||||
if (use_grad) {
|
||||
LOG_WARNING(Shader_MSL,
|
||||
"Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
|
||||
|
@ -320,7 +320,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
|||
ctx.AddF32("{}=0.0f;", inst);
|
||||
return;
|
||||
}
|
||||
const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
|
||||
const auto d_cast{info.type == TextureType::ColorArray2D ? "float2" : "float3"};
|
||||
ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
|
||||
d_cast, d_cast);
|
||||
return;
|
||||
|
@ -671,7 +671,7 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
|
|||
throw NotImplementedException("Non-constant texture rescaling");
|
||||
}
|
||||
const u32 image_index{index.U32()};
|
||||
ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index);
|
||||
ctx.AddU1("{}=(as_type<uint>(scaling.x)&{})!=0;", inst, 1u << image_index);
|
||||
}
|
||||
|
||||
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
|
||||
|
@ -679,7 +679,7 @@ void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index)
|
|||
throw NotImplementedException("Non-constant texture rescaling");
|
||||
}
|
||||
const u32 image_index{index.U32()};
|
||||
ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index);
|
||||
ctx.AddU1("{}=(as_type<uint>(scaling.y)&{})!=0;", inst, 1u << image_index);
|
||||
}
|
||||
|
||||
void EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||
|
|
|
@ -135,14 +135,14 @@ void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindin
|
|||
void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
|
||||
ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
|
||||
binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
|
||||
}
|
||||
|
||||
void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset) {
|
||||
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
||||
ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
|
||||
ctx.AddU32x4("{}=uint4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
|
||||
"+12)>>2]);",
|
||||
inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
|
||||
offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
|
||||
|
|
|
@ -39,11 +39,11 @@ void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset
|
|||
}
|
||||
|
||||
void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
|
||||
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
|
||||
ctx.AddU32x2("{}=uint2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
|
||||
}
|
||||
|
||||
void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
|
||||
ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
|
||||
ctx.AddU32x4("{}=uint4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
|
||||
offset, offset, offset, offset);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
|
|||
}
|
||||
ctx.Add("__Output __out;");
|
||||
if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
|
||||
ctx.Add("__out.position=vec4(0,0,0,1);");
|
||||
ctx.Add("__out.position=float4(0,0,0,1);");
|
||||
}
|
||||
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
|
||||
if (!ctx.info.stores.Generic(index)) {
|
||||
|
@ -31,7 +31,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
|
|||
size_t element{};
|
||||
while (element < info_array.size()) {
|
||||
const auto& info{info_array.at(element)};
|
||||
const auto varying_name{fmt::format("__out.{}{}", info.name, output_decorator)};
|
||||
const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
|
||||
switch (info.num_components) {
|
||||
case 1: {
|
||||
const char value{element == 3 ? '1' : '0'};
|
||||
|
|
|
@ -54,7 +54,7 @@ std::string_view BallotIndex(EmitContext& ctx) {
|
|||
|
||||
std::string GetMask(EmitContext& ctx, std::string_view mask) {
|
||||
const auto ballot_index{BallotIndex(ctx)};
|
||||
return fmt::format("uint(uvec2({}){})", mask, ballot_index);
|
||||
return fmt::format("uint(uint2({}){})", mask, ballot_index);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -68,8 +68,8 @@ void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
|||
return;
|
||||
}
|
||||
const auto ballot_index{BallotIndex(ctx)};
|
||||
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
|
||||
const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
|
||||
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
|
||||
}
|
||||
|
||||
|
@ -79,8 +79,8 @@ void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
|||
return;
|
||||
}
|
||||
const auto ballot_index{BallotIndex(ctx)};
|
||||
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
|
||||
const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
|
||||
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
|
||||
}
|
||||
|
||||
|
@ -90,15 +90,15 @@ void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
|||
return;
|
||||
}
|
||||
const auto ballot_index{BallotIndex(ctx)};
|
||||
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
|
||||
const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
|
||||
const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
|
||||
const auto value{fmt::format("({}^{})", ballot, active_mask)};
|
||||
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
|
||||
}
|
||||
|
||||
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
||||
const auto ballot_index{BallotIndex(ctx)};
|
||||
ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
|
||||
ctx.AddU32("{}=uint2(ballotARB({})){};", inst, pred, ballot_index);
|
||||
}
|
||||
|
||||
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
|
||||
|
|
|
@ -52,6 +52,7 @@ std::string OutputDecorator(Stage stage, u32 size) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO
|
||||
std::string_view GetTessMode(TessPrimitive primitive) {
|
||||
switch (primitive) {
|
||||
|
@ -107,6 +108,7 @@ std::string_view OutputPrimitive(OutputTopology topology) {
|
|||
}
|
||||
throw InvalidArgument("Invalid output topology {}", topology);
|
||||
}
|
||||
*/
|
||||
|
||||
// TODO
|
||||
std::string_view DepthSamplerType(TextureType type) {
|
||||
|
@ -233,33 +235,15 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||
break;
|
||||
case Stage::TessellationControl:
|
||||
stage_name = "kernel";
|
||||
header += fmt::format("layout(vertices={})out;", program.invocations);
|
||||
break;
|
||||
case Stage::TessellationEval:
|
||||
stage_name = "vertex";
|
||||
header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
|
||||
GetTessSpacing(runtime_info.tess_spacing),
|
||||
runtime_info.tess_clockwise ? "cw" : "ccw");
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
stage_name = "vertex";
|
||||
header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
|
||||
if (uses_geometry_passthrough) {
|
||||
header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
|
||||
break;
|
||||
} else if (program.is_geometry_passthrough &&
|
||||
!profile.support_geometry_shader_passthrough) {
|
||||
LOG_WARNING(Shader_MSL, "Passthrough geometry program used but not supported");
|
||||
}
|
||||
header += fmt::format(
|
||||
"layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
|
||||
OutputPrimitive(program.output_topology), program.output_vertices);
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
stage_name = "fragment";
|
||||
if (runtime_info.force_early_z) {
|
||||
header += "layout(early_fragment_tests)in;";
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
stage_name = "kernel";
|
||||
|
@ -300,6 +284,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||
}
|
||||
}
|
||||
header += "struct __Output {\n";
|
||||
if (stage == Stage::VertexB || stage == Stage::Geometry) {
|
||||
header += "float4 position [[position]];\n";
|
||||
}
|
||||
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
|
||||
if (info.stores.Generic(index)) {
|
||||
DefineGenericOutput(index, program.invocations);
|
||||
|
@ -332,8 +319,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
|
|||
const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size};
|
||||
if (added)
|
||||
input_str += ",";
|
||||
input_str += fmt::format("constant float4& cbuf{}[{}] [[buffer({})]]", desc.index,
|
||||
cbuf_binding_size, bindings.uniform_buffer);
|
||||
input_str += fmt::format("constant float4& {}_cbuf{}[{}] [[buffer({})]]", stage_name,
|
||||
desc.index, cbuf_binding_size, bindings.uniform_buffer);
|
||||
bindings.uniform_buffer += desc.count;
|
||||
added = true;
|
||||
}
|
||||
|
@ -346,8 +333,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
|
|||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
if (added)
|
||||
input_str += ",";
|
||||
input_str +=
|
||||
fmt::format("device uint& ssbo{}[] [[buffer({})]]", index, bindings.storage_buffer);
|
||||
input_str += fmt::format("device uint& {}_ssbo{}[] [[buffer({})]]", stage_name, index,
|
||||
bindings.storage_buffer);
|
||||
bindings.storage_buffer += desc.count;
|
||||
index += desc.count;
|
||||
added = true;
|
||||
|
@ -377,8 +364,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
|
|||
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
|
||||
if (added)
|
||||
input_str += ",";
|
||||
input_str += fmt::format("{}<{}> img{}{} [[texture({})]]", qualifier, image_type,
|
||||
bindings.image, array_decorator, bindings.image);
|
||||
input_str += fmt::format("{}<{}> {}_img{}{} [[texture({})]]", qualifier, image_type,
|
||||
stage_name, bindings.image, array_decorator, bindings.image);
|
||||
bindings.image += desc.count;
|
||||
added = true;
|
||||
}
|
||||
|
@ -404,10 +391,10 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
|
|||
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
|
||||
if (added)
|
||||
input_str += ",";
|
||||
input_str += fmt::format("{} tex{}{} [[texture({})]]", texture_type, bindings.texture,
|
||||
array_decorator, bindings.texture);
|
||||
input_str += fmt::format(",sampler samp{}{} [[sampler({})]]", bindings.texture,
|
||||
array_decorator, bindings.texture);
|
||||
input_str += fmt::format("{} {}_tex{}{} [[texture({})]]", texture_type, stage_name,
|
||||
bindings.texture, array_decorator, bindings.texture);
|
||||
input_str += fmt::format(",sampler {}_samp{}{} [[sampler({})]]", stage_name,
|
||||
bindings.texture, array_decorator, bindings.texture);
|
||||
bindings.texture += desc.count;
|
||||
added = true;
|
||||
}
|
||||
|
@ -417,48 +404,20 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
|
|||
|
||||
// TODO
|
||||
void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
|
||||
static constexpr std::string_view swizzle{"xyzw"};
|
||||
const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
|
||||
u32 element{0};
|
||||
while (element < 4) {
|
||||
std::string definition{fmt::format("layout(location={}", index)};
|
||||
const u32 remainder{4 - element};
|
||||
const TransformFeedbackVarying* xfb_varying{};
|
||||
const size_t xfb_varying_index{base_index + element};
|
||||
if (xfb_varying_index < runtime_info.xfb_count) {
|
||||
xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
|
||||
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
|
||||
}
|
||||
const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
|
||||
if (element > 0) {
|
||||
definition += fmt::format(",component={}", element);
|
||||
}
|
||||
if (xfb_varying) {
|
||||
definition +=
|
||||
fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
|
||||
xfb_varying->stride, xfb_varying->offset);
|
||||
}
|
||||
std::string name{fmt::format("out_attr{}", index)};
|
||||
if (num_components < 4 || element > 0) {
|
||||
name += fmt::format("_{}", swizzle.substr(element, num_components));
|
||||
}
|
||||
const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
|
||||
definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
|
||||
header += definition;
|
||||
const auto type{fmt::format("float{}", 4)};
|
||||
std::string name{fmt::format("attr{}", index)};
|
||||
header += fmt::format("{} {}{} [[user(locn{})]];\n", type, name,
|
||||
OutputDecorator(stage, invocations), index);
|
||||
|
||||
const GenericElementInfo element_info{
|
||||
.name = name,
|
||||
.first_element = element,
|
||||
.num_components = num_components,
|
||||
};
|
||||
std::fill_n(output_generics[index].begin() + element, num_components, element_info);
|
||||
element += num_components;
|
||||
}
|
||||
const GenericElementInfo element_info{
|
||||
.name = "__out." + name,
|
||||
.first_element = 0,
|
||||
.num_components = 4,
|
||||
};
|
||||
std::fill_n(output_generics[index].begin(), 4, element_info);
|
||||
}
|
||||
|
||||
void EmitContext::DefineHelperFunctions() {
|
||||
header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
|
||||
"#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
|
||||
if (info.uses_global_increment || info.uses_shared_increment) {
|
||||
header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
|
||||
}
|
||||
|
@ -468,7 +427,7 @@ void EmitContext::DefineHelperFunctions() {
|
|||
}
|
||||
if (info.uses_atomic_f32_add) {
|
||||
header += "uint CasFloatAdd(uint op_a,float op_b){"
|
||||
"return ftou(utof(op_a)+op_b);}";
|
||||
"return as_type<uint>(as_type<float>(op_a)+op_b);}";
|
||||
}
|
||||
if (info.uses_atomic_f32x2_add) {
|
||||
header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
|
||||
|
@ -544,8 +503,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
|
|||
for (size_t i = 0; i < addr_xy.size(); ++i) {
|
||||
const auto addr_loc{ssbo.cbuf_offset + 4 * i};
|
||||
const auto size_loc{size_cbuf_offset + 4 * i};
|
||||
addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
|
||||
size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
|
||||
addr_xy[i] =
|
||||
fmt::format("as_type<uint>({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
|
||||
size_xy[i] =
|
||||
fmt::format("as_type<uint>({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
|
||||
}
|
||||
const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)};
|
||||
const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)};
|
||||
|
|
|
@ -53,13 +53,13 @@ std::string FormatFloat(std::string_view value, IR::Type type) {
|
|||
// TODO: Confirm FP64 nan/inf
|
||||
if (type == IR::Type::F32) {
|
||||
if (value == "nan") {
|
||||
return "utof(0x7fc00000)";
|
||||
return "as_type<float>(0x7fc00000)";
|
||||
}
|
||||
if (value == "inf") {
|
||||
return "utof(0x7f800000)";
|
||||
return "as_type<float>(0x7f800000)";
|
||||
}
|
||||
if (value == "-inf") {
|
||||
return "utof(0xff800000)";
|
||||
return "as_type<float>(0xff800000)";
|
||||
}
|
||||
}
|
||||
if (value.find_first_of('e') != std::string_view::npos) {
|
||||
|
@ -203,7 +203,7 @@ std::string VarAlloc::GetMslType(MslVarType type) const {
|
|||
case MslVarType::U1:
|
||||
return "bool";
|
||||
case MslVarType::F16x2:
|
||||
return "f16vec2";
|
||||
return "half2";
|
||||
case MslVarType::U32:
|
||||
return "uint";
|
||||
case MslVarType::F32:
|
||||
|
@ -215,17 +215,17 @@ std::string VarAlloc::GetMslType(MslVarType type) const {
|
|||
case MslVarType::PrecF64:
|
||||
return "double";
|
||||
case MslVarType::U32x2:
|
||||
return "uvec2";
|
||||
return "uint2";
|
||||
case MslVarType::F32x2:
|
||||
return "vec2";
|
||||
return "float2";
|
||||
case MslVarType::U32x3:
|
||||
return "uvec3";
|
||||
return "float3";
|
||||
case MslVarType::F32x3:
|
||||
return "vec3";
|
||||
return "float3";
|
||||
case MslVarType::U32x4:
|
||||
return "uvec4";
|
||||
return "uint4";
|
||||
case MslVarType::F32x4:
|
||||
return "vec4";
|
||||
return "float4";
|
||||
case MslVarType::Void:
|
||||
return "";
|
||||
default:
|
||||
|
|
|
@ -276,8 +276,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
if (error) {
|
||||
LOG_ERROR(Render_Metal, "failed to create library: {}",
|
||||
error->description()->cString(NS::ASCIIStringEncoding));
|
||||
// HACK
|
||||
std::cout << error->description()->cString(NS::ASCIIStringEncoding) << std::endl;
|
||||
// std::cout << error->description()->cString(NS::ASCIIStringEncoding) << std::endl;
|
||||
// HACK
|
||||
throw;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue