mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-28 01:52:29 -06:00
GPU: Implement TextureCopy-mode display transfers
Fixes glitchy garbage in Fire Emblem 3D scenes.
This commit is contained in:
parent
2e5696dba4
commit
9ae5a09655
4 changed files with 101 additions and 36 deletions
|
@ -418,7 +418,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
|
||||||
|
|
||||||
case CommandId::SET_DISPLAY_TRANSFER:
|
case CommandId::SET_DISPLAY_TRANSFER:
|
||||||
{
|
{
|
||||||
auto& params = command.image_copy;
|
auto& params = command.display_transfer;
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
|
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
|
||||||
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
|
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
|
||||||
|
@ -433,17 +433,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
|
||||||
// TODO: Check if texture copies are implemented correctly..
|
// TODO: Check if texture copies are implemented correctly..
|
||||||
case CommandId::SET_TEXTURE_COPY:
|
case CommandId::SET_TEXTURE_COPY:
|
||||||
{
|
{
|
||||||
auto& params = command.image_copy;
|
auto& params = command.texture_copy;
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
|
||||||
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
|
||||||
Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
|
Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size);
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size);
|
params.size);
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags);
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
|
||||||
|
params.in_width_gap);
|
||||||
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
|
||||||
|
params.out_width_gap);
|
||||||
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags),
|
||||||
|
params.flags);
|
||||||
|
|
||||||
// TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
|
// NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
|
||||||
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
|
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -127,7 +127,16 @@ struct Command {
|
||||||
u32 in_buffer_size;
|
u32 in_buffer_size;
|
||||||
u32 out_buffer_size;
|
u32 out_buffer_size;
|
||||||
u32 flags;
|
u32 flags;
|
||||||
} image_copy;
|
} display_transfer;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 in_buffer_address;
|
||||||
|
u32 out_buffer_address;
|
||||||
|
u32 size;
|
||||||
|
u32 in_width_gap;
|
||||||
|
u32 out_width_gap;
|
||||||
|
u32 flags;
|
||||||
|
} texture_copy;
|
||||||
|
|
||||||
u8 raw_data[0x1C];
|
u8 raw_data[0x1C];
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <numeric>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "common/color.h"
|
#include "common/color.h"
|
||||||
|
@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) {
|
||||||
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
|
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
|
||||||
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
|
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
|
||||||
|
|
||||||
|
if (config.is_texture_copy) {
|
||||||
|
u32 input_width = config.texture_copy.input_width * 16;
|
||||||
|
u32 input_gap = config.texture_copy.input_gap * 16;
|
||||||
|
u32 output_width = config.texture_copy.output_width * 16;
|
||||||
|
u32 output_gap = config.texture_copy.output_gap * 16;
|
||||||
|
|
||||||
|
size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
|
||||||
|
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
|
||||||
|
|
||||||
|
u32 remaining_size = config.texture_copy.size;
|
||||||
|
u32 remaining_input = input_width;
|
||||||
|
u32 remaining_output = output_width;
|
||||||
|
while (remaining_size > 0) {
|
||||||
|
u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
|
||||||
|
|
||||||
|
std::memcpy(dst_pointer, src_pointer, copy_size);
|
||||||
|
src_pointer += copy_size;
|
||||||
|
dst_pointer += copy_size;
|
||||||
|
|
||||||
|
remaining_input -= copy_size;
|
||||||
|
remaining_output -= copy_size;
|
||||||
|
remaining_size -= copy_size;
|
||||||
|
|
||||||
|
if (remaining_input == 0) {
|
||||||
|
remaining_input = input_width;
|
||||||
|
src_pointer += input_gap;
|
||||||
|
}
|
||||||
|
if (remaining_output == 0) {
|
||||||
|
remaining_output = output_width;
|
||||||
|
dst_pointer += output_gap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
|
||||||
|
config.texture_copy.size,
|
||||||
|
config.GetPhysicalInputAddress(), input_width, input_gap,
|
||||||
|
config.GetPhysicalOutputAddress(), output_width, output_gap,
|
||||||
|
config.flags);
|
||||||
|
|
||||||
|
size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
|
||||||
|
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
|
||||||
|
|
||||||
|
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (config.scaling > config.ScaleXY) {
|
if (config.scaling > config.ScaleXY) {
|
||||||
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
|
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.output_tiled &&
|
if (config.input_linear && config.scaling != config.NoScale) {
|
||||||
(config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
|
|
||||||
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
|
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
break;
|
break;
|
||||||
|
@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) {
|
||||||
|
|
||||||
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
|
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
|
||||||
|
|
||||||
if (config.raw_copy) {
|
|
||||||
// Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
|
|
||||||
// TODO(Subv): Verify if raw copies perform scaling
|
|
||||||
memcpy(dst_pointer, src_pointer, output_size);
|
|
||||||
|
|
||||||
LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
|
|
||||||
output_size,
|
|
||||||
config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
|
|
||||||
config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
|
|
||||||
config.output_format.Value(), config.flags);
|
|
||||||
|
|
||||||
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
|
|
||||||
|
|
||||||
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (u32 y = 0; y < output_height; ++y) {
|
for (u32 y = 0; y < output_height; ++y) {
|
||||||
for (u32 x = 0; x < output_width; ++x) {
|
for (u32 x = 0; x < output_width; ++x) {
|
||||||
Math::Vec4<u8> src_color;
|
Math::Vec4<u8> src_color;
|
||||||
|
@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) {
|
||||||
u32 src_offset;
|
u32 src_offset;
|
||||||
u32 dst_offset;
|
u32 dst_offset;
|
||||||
|
|
||||||
if (config.output_tiled) {
|
if (config.input_linear) {
|
||||||
if (!config.dont_swizzle) {
|
if (!config.dont_swizzle) {
|
||||||
// Interpret the input as linear and the output as tiled
|
// Interpret the input as linear and the output as tiled
|
||||||
u32 coarse_y = y & ~7;
|
u32 coarse_y = y & ~7;
|
||||||
|
|
|
@ -201,12 +201,14 @@ struct Regs {
|
||||||
u32 flags;
|
u32 flags;
|
||||||
|
|
||||||
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
|
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
|
||||||
BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
|
BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
|
||||||
BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
|
BitField< 2, 1, u32> crop_input_lines;
|
||||||
|
BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
|
||||||
BitField< 5, 1, u32> dont_swizzle;
|
BitField< 5, 1, u32> dont_swizzle;
|
||||||
BitField< 8, 3, PixelFormat> input_format;
|
BitField< 8, 3, PixelFormat> input_format;
|
||||||
BitField<12, 3, PixelFormat> output_format;
|
BitField<12, 3, PixelFormat> output_format;
|
||||||
|
/// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
|
||||||
|
BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
|
||||||
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
|
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -214,10 +216,30 @@ struct Regs {
|
||||||
|
|
||||||
// it seems that writing to this field triggers the display transfer
|
// it seems that writing to this field triggers the display transfer
|
||||||
u32 trigger;
|
u32 trigger;
|
||||||
} display_transfer_config;
|
|
||||||
ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
|
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x331);
|
INSERT_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 size;
|
||||||
|
|
||||||
|
union {
|
||||||
|
u32 input_size;
|
||||||
|
|
||||||
|
BitField< 0, 16, u32> input_width;
|
||||||
|
BitField<16, 16, u32> input_gap;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
u32 output_size;
|
||||||
|
|
||||||
|
BitField< 0, 16, u32> output_width;
|
||||||
|
BitField<16, 16, u32> output_gap;
|
||||||
|
};
|
||||||
|
} texture_copy;
|
||||||
|
} display_transfer_config;
|
||||||
|
ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x32D);
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
// command list size (in bytes)
|
// command list size (in bytes)
|
||||||
|
|
Loading…
Reference in a new issue