mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-31 19:42:34 -06:00
Merge pull request #822 from bunnei/pica-improvements
Pica improvements
This commit is contained in:
commit
8852fc6a87
10 changed files with 261 additions and 71 deletions
|
@ -56,7 +56,17 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
// Trigger IRQ
|
// Trigger IRQ
|
||||||
case PICA_REG_INDEX(trigger_irq):
|
case PICA_REG_INDEX(trigger_irq):
|
||||||
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
|
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
|
||||||
return;
|
break;
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
|
||||||
|
{
|
||||||
|
unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]);
|
||||||
|
u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
|
||||||
|
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
|
||||||
|
g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// It seems like these trigger vertex rendering
|
// It seems like these trigger vertex rendering
|
||||||
case PICA_REG_INDEX(trigger_draw):
|
case PICA_REG_INDEX(trigger_draw):
|
||||||
|
@ -363,38 +373,34 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id));
|
g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id));
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
|
|
||||||
const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]);
|
|
||||||
|
|
||||||
u32* read_pointer = (u32*)first_command_word;
|
|
||||||
|
|
||||||
const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) |
|
|
||||||
((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) |
|
|
||||||
((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) |
|
|
||||||
((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u);
|
|
||||||
|
|
||||||
WritePicaReg(header.cmd_id, *read_pointer, write_mask);
|
|
||||||
read_pointer += 2;
|
|
||||||
|
|
||||||
for (unsigned int i = 1; i < 1+header.extra_data_length; ++i) {
|
|
||||||
u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
|
|
||||||
WritePicaReg(cmd, *read_pointer, write_mask);
|
|
||||||
++read_pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// align read pointer to 8 bytes
|
|
||||||
if ((first_command_word - read_pointer) % 2)
|
|
||||||
++read_pointer;
|
|
||||||
|
|
||||||
return read_pointer - first_command_word;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProcessCommandList(const u32* list, u32 size) {
|
void ProcessCommandList(const u32* list, u32 size) {
|
||||||
u32* read_pointer = (u32*)list;
|
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = list;
|
||||||
u32 list_length = size / sizeof(u32);
|
g_state.cmd_list.length = size / sizeof(u32);
|
||||||
|
|
||||||
while (read_pointer < list + list_length) {
|
while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) {
|
||||||
read_pointer += ExecuteCommandBlock(read_pointer);
|
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
|
||||||
|
static const u32 expand_bits_to_bytes[] = {
|
||||||
|
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
|
||||||
|
0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
|
||||||
|
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
|
||||||
|
0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
|
||||||
|
};
|
||||||
|
|
||||||
|
// Align read pointer to 8 bytes
|
||||||
|
if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0)
|
||||||
|
++g_state.cmd_list.current_ptr;
|
||||||
|
|
||||||
|
u32 value = *g_state.cmd_list.current_ptr++;
|
||||||
|
const CommandHeader header = { *g_state.cmd_list.current_ptr++ };
|
||||||
|
const u32 write_mask = expand_bits_to_bytes[header.parameter_mask];
|
||||||
|
u32 cmd = header.cmd_id;
|
||||||
|
|
||||||
|
WritePicaReg(cmd, value, write_mask);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < header.extra_data_length; ++i) {
|
||||||
|
u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
|
||||||
|
WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -162,6 +162,25 @@ struct Regs {
|
||||||
ETC1A4 = 13, // compressed
|
ETC1A4 = 13, // compressed
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class LogicOp : u32 {
|
||||||
|
Clear = 0,
|
||||||
|
And = 1,
|
||||||
|
AndReverse = 2,
|
||||||
|
Copy = 3,
|
||||||
|
Set = 4,
|
||||||
|
CopyInverted = 5,
|
||||||
|
NoOp = 6,
|
||||||
|
Invert = 7,
|
||||||
|
Nand = 8,
|
||||||
|
Or = 9,
|
||||||
|
Nor = 10,
|
||||||
|
Xor = 11,
|
||||||
|
Equiv = 12,
|
||||||
|
AndInverted = 13,
|
||||||
|
OrReverse = 14,
|
||||||
|
OrInverted = 15,
|
||||||
|
};
|
||||||
|
|
||||||
static unsigned NibblesPerPixel(TextureFormat format) {
|
static unsigned NibblesPerPixel(TextureFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case TextureFormat::RGBA8:
|
case TextureFormat::RGBA8:
|
||||||
|
@ -221,6 +240,7 @@ struct Regs {
|
||||||
enum class Source : u32 {
|
enum class Source : u32 {
|
||||||
PrimaryColor = 0x0,
|
PrimaryColor = 0x0,
|
||||||
PrimaryFragmentColor = 0x1,
|
PrimaryFragmentColor = 0x1,
|
||||||
|
SecondaryFragmentColor = 0x2,
|
||||||
|
|
||||||
Texture0 = 0x3,
|
Texture0 = 0x3,
|
||||||
Texture1 = 0x4,
|
Texture1 = 0x4,
|
||||||
|
@ -413,13 +433,9 @@ struct Regs {
|
||||||
} alpha_blending;
|
} alpha_blending;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
enum Op {
|
BitField<0, 4, LogicOp> logic_op;
|
||||||
Set = 4,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
BitField<0, 4, Op> op;
|
|
||||||
} logic_op;
|
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField< 0, 8, u32> r;
|
BitField< 0, 8, u32> r;
|
||||||
BitField< 8, 8, u32> g;
|
BitField< 8, 8, u32> g;
|
||||||
|
@ -708,7 +724,33 @@ struct Regs {
|
||||||
u32 set_value[3];
|
u32 set_value[3];
|
||||||
} vs_default_attributes_setup;
|
} vs_default_attributes_setup;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x28);
|
INSERT_PADDING_WORDS(0x2);
|
||||||
|
|
||||||
|
struct {
|
||||||
|
// There are two channels that can be used to configure the next command buffer, which
|
||||||
|
// can be then executed by writing to the "trigger" registers. There are two reasons why a
|
||||||
|
// game might use this feature:
|
||||||
|
// 1) With this, an arbitrary number of additional command buffers may be executed in
|
||||||
|
// sequence without requiring any intervention of the CPU after the initial one is
|
||||||
|
// kicked off.
|
||||||
|
// 2) Games can configure these registers to provide a command list subroutine mechanism.
|
||||||
|
|
||||||
|
BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
|
||||||
|
BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
|
||||||
|
u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
|
||||||
|
|
||||||
|
unsigned GetSize(unsigned index) const {
|
||||||
|
ASSERT(index < 2);
|
||||||
|
return 8 * size[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
PAddr GetPhysicalAddress(unsigned index) const {
|
||||||
|
ASSERT(index < 2);
|
||||||
|
return (PAddr)(8 * addr[index]);
|
||||||
|
}
|
||||||
|
} command_buffer;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x20);
|
||||||
|
|
||||||
enum class TriangleTopology : u32 {
|
enum class TriangleTopology : u32 {
|
||||||
List = 0,
|
List = 0,
|
||||||
|
@ -861,6 +903,7 @@ struct Regs {
|
||||||
ADD_FIELD(trigger_draw);
|
ADD_FIELD(trigger_draw);
|
||||||
ADD_FIELD(trigger_draw_indexed);
|
ADD_FIELD(trigger_draw_indexed);
|
||||||
ADD_FIELD(vs_default_attributes_setup);
|
ADD_FIELD(vs_default_attributes_setup);
|
||||||
|
ADD_FIELD(command_buffer);
|
||||||
ADD_FIELD(triangle_topology);
|
ADD_FIELD(triangle_topology);
|
||||||
ADD_FIELD(vs_bool_uniforms);
|
ADD_FIELD(vs_bool_uniforms);
|
||||||
ADD_FIELD(vs_int_uniforms);
|
ADD_FIELD(vs_int_uniforms);
|
||||||
|
@ -938,6 +981,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228);
|
||||||
ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
||||||
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
||||||
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
||||||
|
ASSERT_REG_POSITION(command_buffer, 0x238);
|
||||||
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
||||||
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
|
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
|
||||||
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
|
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
|
||||||
|
@ -1053,21 +1097,12 @@ private:
|
||||||
float value;
|
float value;
|
||||||
};
|
};
|
||||||
|
|
||||||
union CommandHeader {
|
|
||||||
CommandHeader(u32 h) : hex(h) {}
|
|
||||||
|
|
||||||
u32 hex;
|
|
||||||
|
|
||||||
BitField< 0, 16, u32> cmd_id;
|
|
||||||
BitField<16, 4, u32> parameter_mask;
|
|
||||||
BitField<20, 11, u32> extra_data_length;
|
|
||||||
BitField<31, 1, u32> group_commands;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Struct used to describe current Pica state
|
/// Struct used to describe current Pica state
|
||||||
struct State {
|
struct State {
|
||||||
|
/// Pica registers
|
||||||
Regs regs;
|
Regs regs;
|
||||||
|
|
||||||
|
/// Vertex shader memory
|
||||||
struct {
|
struct {
|
||||||
struct {
|
struct {
|
||||||
Math::Vec4<float24> f[96];
|
Math::Vec4<float24> f[96];
|
||||||
|
@ -1080,6 +1115,13 @@ struct State {
|
||||||
std::array<u32, 1024> program_code;
|
std::array<u32, 1024> program_code;
|
||||||
std::array<u32, 1024> swizzle_data;
|
std::array<u32, 1024> swizzle_data;
|
||||||
} vs;
|
} vs;
|
||||||
|
|
||||||
|
/// Current Pica command list
|
||||||
|
struct {
|
||||||
|
const u32* head_ptr;
|
||||||
|
const u32* current_ptr;
|
||||||
|
u32 length;
|
||||||
|
} cmd_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Initialize Pica state
|
/// Initialize Pica state
|
||||||
|
|
|
@ -402,11 +402,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
|
|
||||||
auto GetSource = [&](Source source) -> Math::Vec4<u8> {
|
auto GetSource = [&](Source source) -> Math::Vec4<u8> {
|
||||||
switch (source) {
|
switch (source) {
|
||||||
// TODO: What's the difference between these two?
|
|
||||||
case Source::PrimaryColor:
|
case Source::PrimaryColor:
|
||||||
|
|
||||||
|
// HACK: Until we implement fragment lighting, use primary_color
|
||||||
case Source::PrimaryFragmentColor:
|
case Source::PrimaryFragmentColor:
|
||||||
return primary_color;
|
return primary_color;
|
||||||
|
|
||||||
|
// HACK: Until we implement fragment lighting, use zero
|
||||||
|
case Source::SecondaryFragmentColor:
|
||||||
|
return {0, 0, 0, 0};
|
||||||
|
|
||||||
case Source::Texture0:
|
case Source::Texture0:
|
||||||
return texture_color[0];
|
return texture_color[0];
|
||||||
|
|
||||||
|
@ -570,6 +575,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
case Operation::Add:
|
case Operation::Add:
|
||||||
return std::min(255, input[0] + input[1]);
|
return std::min(255, input[0] + input[1]);
|
||||||
|
|
||||||
|
case Operation::AddSigned:
|
||||||
|
{
|
||||||
|
// TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
|
||||||
|
auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
|
||||||
|
return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
|
||||||
|
}
|
||||||
|
|
||||||
case Operation::Lerp:
|
case Operation::Lerp:
|
||||||
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
|
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
|
||||||
|
|
||||||
|
@ -808,10 +820,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using BlendEquation = Regs::BlendEquation;
|
|
||||||
static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
|
static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
|
||||||
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
|
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
|
||||||
BlendEquation equation) {
|
Regs::BlendEquation equation) {
|
||||||
Math::Vec4<int> result;
|
Math::Vec4<int> result;
|
||||||
|
|
||||||
auto src_result = (src * srcfactor).Cast<int>();
|
auto src_result = (src * srcfactor).Cast<int>();
|
||||||
|
@ -866,8 +877,63 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
|
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
|
||||||
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
|
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
|
||||||
} else {
|
} else {
|
||||||
LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op);
|
static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
|
||||||
UNIMPLEMENTED();
|
switch (op) {
|
||||||
|
case Regs::LogicOp::Clear:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case Regs::LogicOp::And:
|
||||||
|
return src & dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::AndReverse:
|
||||||
|
return src & ~dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Copy:
|
||||||
|
return src;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Set:
|
||||||
|
return 255;
|
||||||
|
|
||||||
|
case Regs::LogicOp::CopyInverted:
|
||||||
|
return ~src;
|
||||||
|
|
||||||
|
case Regs::LogicOp::NoOp:
|
||||||
|
return dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Invert:
|
||||||
|
return ~dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Nand:
|
||||||
|
return ~(src & dest);
|
||||||
|
|
||||||
|
case Regs::LogicOp::Or:
|
||||||
|
return src | dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Nor:
|
||||||
|
return ~(src | dest);
|
||||||
|
|
||||||
|
case Regs::LogicOp::Xor:
|
||||||
|
return src ^ dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::Equiv:
|
||||||
|
return ~(src ^ dest);
|
||||||
|
|
||||||
|
case Regs::LogicOp::AndInverted:
|
||||||
|
return ~src & dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::OrReverse:
|
||||||
|
return src | ~dest;
|
||||||
|
|
||||||
|
case Regs::LogicOp::OrInverted:
|
||||||
|
return ~src | dest;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
blend_output = Math::MakeVec(
|
||||||
|
LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
|
||||||
|
LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
|
||||||
|
LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
|
||||||
|
LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
|
||||||
}
|
}
|
||||||
|
|
||||||
const Math::Vec4<u8> result = {
|
const Math::Vec4<u8> result = {
|
||||||
|
|
|
@ -135,6 +135,7 @@ void RasterizerOpenGL::Reset() {
|
||||||
SyncBlendFuncs();
|
SyncBlendFuncs();
|
||||||
SyncBlendColor();
|
SyncBlendColor();
|
||||||
SyncAlphaTest();
|
SyncAlphaTest();
|
||||||
|
SyncLogicOp();
|
||||||
SyncStencilTest();
|
SyncStencilTest();
|
||||||
SyncDepthTest();
|
SyncDepthTest();
|
||||||
|
|
||||||
|
@ -249,6 +250,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||||
SyncDepthTest();
|
SyncDepthTest();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
// Logic op
|
||||||
|
case PICA_REG_INDEX(output_merger.logic_op):
|
||||||
|
SyncLogicOp();
|
||||||
|
break;
|
||||||
|
|
||||||
// TEV stage 0
|
// TEV stage 0
|
||||||
case PICA_REG_INDEX(tev_stage0.color_source1):
|
case PICA_REG_INDEX(tev_stage0.color_source1):
|
||||||
SyncTevSources(0, regs.tev_stage0);
|
SyncTevSources(0, regs.tev_stage0);
|
||||||
|
@ -633,6 +639,10 @@ void RasterizerOpenGL::SyncAlphaTest() {
|
||||||
glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
|
glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SyncLogicOp() {
|
||||||
|
state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SyncStencilTest() {
|
void RasterizerOpenGL::SyncStencilTest() {
|
||||||
// TODO: Implement stencil test, mask, and op
|
// TODO: Implement stencil test, mask, and op
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,6 +125,9 @@ private:
|
||||||
/// Syncs the alpha test states to match the PICA register
|
/// Syncs the alpha test states to match the PICA register
|
||||||
void SyncAlphaTest();
|
void SyncAlphaTest();
|
||||||
|
|
||||||
|
/// Syncs the logic op states to match the PICA register
|
||||||
|
void SyncLogicOp();
|
||||||
|
|
||||||
/// Syncs the stencil test states to match the PICA register
|
/// Syncs the stencil test states to match the PICA register
|
||||||
void SyncStencilTest();
|
void SyncStencilTest();
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,7 @@ const char g_fragment_shader_hw[] = R"(
|
||||||
|
|
||||||
#define SOURCE_PRIMARYCOLOR 0x0
|
#define SOURCE_PRIMARYCOLOR 0x0
|
||||||
#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1
|
#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1
|
||||||
|
#define SOURCE_SECONDARYFRAGMENTCOLOR 0x2
|
||||||
#define SOURCE_TEXTURE0 0x3
|
#define SOURCE_TEXTURE0 0x3
|
||||||
#define SOURCE_TEXTURE1 0x4
|
#define SOURCE_TEXTURE1 0x4
|
||||||
#define SOURCE_TEXTURE2 0x5
|
#define SOURCE_TEXTURE2 0x5
|
||||||
|
@ -151,8 +152,11 @@ vec4 GetSource(int source) {
|
||||||
if (source == SOURCE_PRIMARYCOLOR) {
|
if (source == SOURCE_PRIMARYCOLOR) {
|
||||||
return o[2];
|
return o[2];
|
||||||
} else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) {
|
} else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) {
|
||||||
// HACK: Uses color value, but should really use fragment lighting output
|
// HACK: Until we implement fragment lighting, use primary_color
|
||||||
return o[2];
|
return o[2];
|
||||||
|
} else if (source == SOURCE_SECONDARYFRAGMENTCOLOR) {
|
||||||
|
// HACK: Until we implement fragment lighting, use zero
|
||||||
|
return vec4(0.0, 0.0, 0.0, 0.0);
|
||||||
} else if (source == SOURCE_TEXTURE0) {
|
} else if (source == SOURCE_TEXTURE0) {
|
||||||
return texture(tex[0], o[3].xy);
|
return texture(tex[0], o[3].xy);
|
||||||
} else if (source == SOURCE_TEXTURE1) {
|
} else if (source == SOURCE_TEXTURE1) {
|
||||||
|
|
|
@ -32,6 +32,8 @@ OpenGLState::OpenGLState() {
|
||||||
blend.color.blue = 0.0f;
|
blend.color.blue = 0.0f;
|
||||||
blend.color.alpha = 0.0f;
|
blend.color.alpha = 0.0f;
|
||||||
|
|
||||||
|
logic_op = GL_COPY;
|
||||||
|
|
||||||
for (auto& texture_unit : texture_units) {
|
for (auto& texture_unit : texture_units) {
|
||||||
texture_unit.enabled_2d = false;
|
texture_unit.enabled_2d = false;
|
||||||
texture_unit.texture_2d = 0;
|
texture_unit.texture_2d = 0;
|
||||||
|
@ -99,8 +101,13 @@ void OpenGLState::Apply() {
|
||||||
if (blend.enabled != cur_state.blend.enabled) {
|
if (blend.enabled != cur_state.blend.enabled) {
|
||||||
if (blend.enabled) {
|
if (blend.enabled) {
|
||||||
glEnable(GL_BLEND);
|
glEnable(GL_BLEND);
|
||||||
|
|
||||||
|
cur_state.logic_op = GL_COPY;
|
||||||
|
glLogicOp(cur_state.logic_op);
|
||||||
|
glDisable(GL_COLOR_LOGIC_OP);
|
||||||
} else {
|
} else {
|
||||||
glDisable(GL_BLEND);
|
glDisable(GL_BLEND);
|
||||||
|
glEnable(GL_COLOR_LOGIC_OP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,6 +125,10 @@ void OpenGLState::Apply() {
|
||||||
glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func);
|
glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (logic_op != cur_state.logic_op) {
|
||||||
|
glLogicOp(logic_op);
|
||||||
|
}
|
||||||
|
|
||||||
// Textures
|
// Textures
|
||||||
for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) {
|
for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) {
|
||||||
if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) {
|
if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) {
|
||||||
|
|
|
@ -42,6 +42,8 @@ public:
|
||||||
} color; // GL_BLEND_COLOR
|
} color; // GL_BLEND_COLOR
|
||||||
} blend;
|
} blend;
|
||||||
|
|
||||||
|
GLenum logic_op; // GL_LOGIC_OP_MODE
|
||||||
|
|
||||||
// 3 texture units - one for each that is used in PICA fragment shader emulation
|
// 3 texture units - one for each that is used in PICA fragment shader emulation
|
||||||
struct {
|
struct {
|
||||||
bool enabled_2d; // GL_TEXTURE_2D
|
bool enabled_2d; // GL_TEXTURE_2D
|
||||||
|
|
|
@ -71,6 +71,37 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
|
||||||
return blend_func_table[(unsigned)factor];
|
return blend_func_table[(unsigned)factor];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline GLenum LogicOp(Pica::Regs::LogicOp op) {
|
||||||
|
static const GLenum logic_op_table[] = {
|
||||||
|
GL_CLEAR, // Clear
|
||||||
|
GL_AND, // And
|
||||||
|
GL_AND_REVERSE, // AndReverse
|
||||||
|
GL_COPY, // Copy
|
||||||
|
GL_SET, // Set
|
||||||
|
GL_COPY_INVERTED, // CopyInverted
|
||||||
|
GL_NOOP, // NoOp
|
||||||
|
GL_INVERT, // Invert
|
||||||
|
GL_NAND, // Nand
|
||||||
|
GL_OR, // Or
|
||||||
|
GL_NOR, // Nor
|
||||||
|
GL_XOR, // Xor
|
||||||
|
GL_EQUIV, // Equiv
|
||||||
|
GL_AND_INVERTED, // AndInverted
|
||||||
|
GL_OR_REVERSE, // OrReverse
|
||||||
|
GL_OR_INVERTED, // OrInverted
|
||||||
|
};
|
||||||
|
|
||||||
|
// Range check table for input
|
||||||
|
if ((unsigned)op >= ARRAY_SIZE(logic_op_table)) {
|
||||||
|
LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op);
|
||||||
|
UNREACHABLE();
|
||||||
|
|
||||||
|
return GL_COPY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return logic_op_table[(unsigned)op];
|
||||||
|
}
|
||||||
|
|
||||||
inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
|
inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
|
||||||
static const GLenum compare_func_table[] = {
|
static const GLenum compare_func_table[] = {
|
||||||
GL_NEVER, // CompareFunc::Never
|
GL_NEVER, // CompareFunc::Never
|
||||||
|
|
|
@ -119,17 +119,13 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
switch (instr.opcode.Value().GetInfo().type) {
|
switch (instr.opcode.Value().GetInfo().type) {
|
||||||
case OpCode::Type::Arithmetic:
|
case OpCode::Type::Arithmetic:
|
||||||
{
|
{
|
||||||
bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed);
|
const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||||
// TODO: We don't really support this properly: For instance, the address register
|
|
||||||
// offset needs to be applied to SRC2 instead, etc.
|
|
||||||
// For now, we just abort in this situation.
|
|
||||||
ASSERT_MSG(!is_inverted, "Bad condition...");
|
|
||||||
|
|
||||||
const int address_offset = (instr.common.address_register_index == 0)
|
const int address_offset = (instr.common.address_register_index == 0)
|
||||||
? 0 : state.address_registers[instr.common.address_register_index - 1];
|
? 0 : state.address_registers[instr.common.address_register_index - 1];
|
||||||
|
|
||||||
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
|
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
|
||||||
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
|
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
|
||||||
|
|
||||||
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
||||||
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
||||||
|
@ -208,6 +204,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OpCode::Id::MIN:
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
dest[i] = std::min(src1[i], src2[i]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OpCode::Id::DP3:
|
case OpCode::Id::DP3:
|
||||||
case OpCode::Id::DP4:
|
case OpCode::Id::DP4:
|
||||||
{
|
{
|
||||||
|
@ -279,6 +284,16 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OpCode::Id::SLT:
|
||||||
|
case OpCode::Id::SLTI:
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OpCode::Id::CMP:
|
case OpCode::Id::CMP:
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
// TODO: Can you restrict to one compare via dest masking?
|
// TODO: Can you restrict to one compare via dest masking?
|
||||||
|
|
Loading…
Reference in a new issue