mirror of
https://git.suyu.dev/suyu/suyu
synced 2024-12-24 10:23:01 -06:00
Merge pull request #37 from neobrain/pica
Initial work on Pica rendering.
This commit is contained in:
commit
36cabe35cc
24 changed files with 2367 additions and 260 deletions
|
@ -78,7 +78,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const
|
|||
// index refers to a specific command
|
||||
const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second;
|
||||
const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index];
|
||||
const Pica::CommandHeader& header = cmd.GetHeader();
|
||||
const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader();
|
||||
|
||||
if (role == Qt::DisplayRole) {
|
||||
QString content;
|
||||
|
|
|
@ -173,7 +173,7 @@ void ExecuteCommand(const Command& command) {
|
|||
case CommandId::SET_COMMAND_LIST_LAST:
|
||||
{
|
||||
auto& params = command.set_command_list_last;
|
||||
WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), params.address >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), Memory::VirtualToPhysicalAddress(params.address) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3);
|
||||
|
||||
// TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
|
||||
|
@ -193,20 +193,28 @@ void ExecuteCommand(const Command& command) {
|
|||
case CommandId::SET_MEMORY_FILL:
|
||||
{
|
||||
auto& params = command.memory_fill;
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), params.start1 >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), params.end1 >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), Memory::VirtualToPhysicalAddress(params.start1) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), Memory::VirtualToPhysicalAddress(params.end1) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1);
|
||||
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), params.start2 >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), params.end2 >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), Memory::VirtualToPhysicalAddress(params.start2) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2);
|
||||
WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2);
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: Check if texture copies are implemented correctly..
|
||||
case CommandId::SET_DISPLAY_TRANSFER:
|
||||
{
|
||||
auto& params = command.image_copy;
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
|
||||
|
||||
// TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
|
||||
// work well enough for running demos. Need to figure out how these all work and trigger
|
||||
// them correctly.
|
||||
|
@ -216,18 +224,19 @@ void ExecuteCommand(const Command& command) {
|
|||
SignalInterrupt(InterruptId::P3D);
|
||||
SignalInterrupt(InterruptId::DMA);
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: Check if texture copies are implemented correctly..
|
||||
case CommandId::SET_TEXTURE_COPY:
|
||||
{
|
||||
auto& params = command.image_copy;
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), params.in_buffer_address >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), params.out_buffer_address >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
|
||||
|
||||
// TODO: Should this only be ORed with 1 for texture copies?
|
||||
// trigger transfer
|
||||
// TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
|
||||
WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "core/hw/gpu.h"
|
||||
|
||||
#include "video_core/command_processor.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
|
||||
|
@ -24,83 +25,6 @@ Regs g_regs;
|
|||
u32 g_cur_line = 0; ///< Current vertical screen line
|
||||
u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
|
||||
|
||||
/**
|
||||
* Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
|
||||
* @param
|
||||
*/
|
||||
void SetFramebufferLocation(const FramebufferLocation mode) {
|
||||
switch (mode) {
|
||||
case FRAMEBUFFER_LOCATION_FCRAM:
|
||||
{
|
||||
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
||||
auto& framebuffer_sub = g_regs.framebuffer_config[1];
|
||||
|
||||
framebuffer_top.address_left1 = PADDR_TOP_LEFT_FRAME1;
|
||||
framebuffer_top.address_left2 = PADDR_TOP_LEFT_FRAME2;
|
||||
framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1;
|
||||
framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2;
|
||||
framebuffer_sub.address_left1 = PADDR_SUB_FRAME1;
|
||||
//framebuffer_sub.address_left2 = unknown;
|
||||
framebuffer_sub.address_right1 = PADDR_SUB_FRAME2;
|
||||
//framebuffer_sub.address_right2 = unknown;
|
||||
break;
|
||||
}
|
||||
|
||||
case FRAMEBUFFER_LOCATION_VRAM:
|
||||
{
|
||||
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
||||
auto& framebuffer_sub = g_regs.framebuffer_config[1];
|
||||
|
||||
framebuffer_top.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1;
|
||||
framebuffer_top.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2;
|
||||
framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1;
|
||||
framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2;
|
||||
framebuffer_sub.address_left1 = PADDR_VRAM_SUB_FRAME1;
|
||||
//framebuffer_sub.address_left2 = unknown;
|
||||
framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2;
|
||||
//framebuffer_sub.address_right2 = unknown;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the location of the framebuffers
|
||||
* @return Location of framebuffers as FramebufferLocation enum
|
||||
*/
|
||||
FramebufferLocation GetFramebufferLocation(u32 address) {
|
||||
if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) {
|
||||
return FRAMEBUFFER_LOCATION_VRAM;
|
||||
} else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) {
|
||||
return FRAMEBUFFER_LOCATION_FCRAM;
|
||||
} else {
|
||||
ERROR_LOG(GPU, "unknown framebuffer location!");
|
||||
}
|
||||
return FRAMEBUFFER_LOCATION_UNKNOWN;
|
||||
}
|
||||
|
||||
u32 GetFramebufferAddr(const u32 address) {
|
||||
switch (GetFramebufferLocation(address)) {
|
||||
case FRAMEBUFFER_LOCATION_FCRAM:
|
||||
return Memory::VirtualAddressFromPhysical_FCRAM(address);
|
||||
case FRAMEBUFFER_LOCATION_VRAM:
|
||||
return Memory::VirtualAddressFromPhysical_VRAM(address);
|
||||
default:
|
||||
ERROR_LOG(GPU, "unknown framebuffer location");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a read-only pointer to a framebuffer in memory
|
||||
* @param address Physical address of framebuffer
|
||||
* @return Returns const pointer to raw framebuffer
|
||||
*/
|
||||
const u8* GetFramebufferPointer(const u32 address) {
|
||||
u32 addr = GetFramebufferAddr(address);
|
||||
return (addr != 0) ? Memory::GetPointer(addr) : nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Read(T &var, const u32 raw_addr) {
|
||||
u32 addr = raw_addr - 0x1EF00000;
|
||||
|
@ -141,8 +65,8 @@ inline void Write(u32 addr, const T data) {
|
|||
// TODO: Not sure if this check should be done at GSP level instead
|
||||
if (config.address_start) {
|
||||
// TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
|
||||
u32* start = (u32*)Memory::GetPointer(config.GetStartAddress());
|
||||
u32* end = (u32*)Memory::GetPointer(config.GetEndAddress());
|
||||
u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress()));
|
||||
u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress()));
|
||||
for (u32* ptr = start; ptr < end; ++ptr)
|
||||
*ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
|
||||
|
||||
|
@ -155,8 +79,8 @@ inline void Write(u32 addr, const T data) {
|
|||
{
|
||||
const auto& config = g_regs.display_transfer_config;
|
||||
if (config.trigger & 1) {
|
||||
u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress());
|
||||
u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress());
|
||||
u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress()));
|
||||
u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress()));
|
||||
|
||||
for (int y = 0; y < config.output_height; ++y) {
|
||||
// TODO: Why does the register seem to hold twice the framebuffer width?
|
||||
|
@ -220,14 +144,15 @@ inline void Write(u32 addr, const T data) {
|
|||
break;
|
||||
}
|
||||
|
||||
// Seems like writing to this register triggers processing
|
||||
case GPU_REG_INDEX(command_processor_config.trigger):
|
||||
{
|
||||
const auto& config = g_regs.command_processor_config;
|
||||
if (config.trigger & 1)
|
||||
{
|
||||
// u32* buffer = (u32*)Memory::GetPointer(config.GetPhysicalAddress());
|
||||
ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.GetPhysicalAddress());
|
||||
// TODO: Process command list!
|
||||
u32* buffer = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalAddress()));
|
||||
u32 size = config.size << 3;
|
||||
Pica::CommandProcessor::ProcessCommandList(buffer, size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -276,11 +201,22 @@ void Init() {
|
|||
g_cur_line = 0;
|
||||
g_last_line_ticks = Core::g_app_core->GetTicks();
|
||||
|
||||
// SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
|
||||
SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);
|
||||
|
||||
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
||||
auto& framebuffer_sub = g_regs.framebuffer_config[1];
|
||||
|
||||
// Setup default framebuffer addresses (located in VRAM)
|
||||
// .. or at least these are the ones used by system applets.
|
||||
// There's probably a smarter way to come up with addresses
|
||||
// like this which does not require hardcoding.
|
||||
framebuffer_top.address_left1 = 0x181E6000;
|
||||
framebuffer_top.address_left2 = 0x1822C800;
|
||||
framebuffer_top.address_right1 = 0x18273000;
|
||||
framebuffer_top.address_right2 = 0x182B9800;
|
||||
framebuffer_sub.address_left1 = 0x1848F000;
|
||||
//framebuffer_sub.address_left2 = unknown;
|
||||
framebuffer_sub.address_right1 = 0x184C7800;
|
||||
//framebuffer_sub.address_right2 = unknown;
|
||||
|
||||
// TODO: Width should be 240 instead?
|
||||
framebuffer_top.width = 480;
|
||||
framebuffer_top.height = 400;
|
||||
|
|
|
@ -249,72 +249,6 @@ static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of regis
|
|||
|
||||
extern Regs g_regs;
|
||||
|
||||
enum {
|
||||
TOP_ASPECT_X = 0x5,
|
||||
TOP_ASPECT_Y = 0x3,
|
||||
|
||||
TOP_HEIGHT = 240,
|
||||
TOP_WIDTH = 400,
|
||||
BOTTOM_WIDTH = 320,
|
||||
|
||||
// Physical addresses in FCRAM (chosen arbitrarily)
|
||||
PADDR_TOP_LEFT_FRAME1 = 0x201D4C00,
|
||||
PADDR_TOP_LEFT_FRAME2 = 0x202D4C00,
|
||||
PADDR_TOP_RIGHT_FRAME1 = 0x203D4C00,
|
||||
PADDR_TOP_RIGHT_FRAME2 = 0x204D4C00,
|
||||
PADDR_SUB_FRAME1 = 0x205D4C00,
|
||||
PADDR_SUB_FRAME2 = 0x206D4C00,
|
||||
// Physical addresses in FCRAM used by ARM9 applications
|
||||
/* PADDR_TOP_LEFT_FRAME1 = 0x20184E60,
|
||||
PADDR_TOP_LEFT_FRAME2 = 0x201CB370,
|
||||
PADDR_TOP_RIGHT_FRAME1 = 0x20282160,
|
||||
PADDR_TOP_RIGHT_FRAME2 = 0x202C8670,
|
||||
PADDR_SUB_FRAME1 = 0x202118E0,
|
||||
PADDR_SUB_FRAME2 = 0x20249CF0,*/
|
||||
|
||||
// Physical addresses in VRAM
|
||||
// TODO: These should just be deduced from the ones above
|
||||
PADDR_VRAM_TOP_LEFT_FRAME1 = 0x181D4C00,
|
||||
PADDR_VRAM_TOP_LEFT_FRAME2 = 0x182D4C00,
|
||||
PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00,
|
||||
PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00,
|
||||
PADDR_VRAM_SUB_FRAME1 = 0x185D4C00,
|
||||
PADDR_VRAM_SUB_FRAME2 = 0x186D4C00,
|
||||
// Physical addresses in VRAM used by ARM9 applications
|
||||
/* PADDR_VRAM_TOP_LEFT_FRAME2 = 0x181CB370,
|
||||
PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160,
|
||||
PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670,
|
||||
PADDR_VRAM_SUB_FRAME1 = 0x182118E0,
|
||||
PADDR_VRAM_SUB_FRAME2 = 0x18249CF0,*/
|
||||
};
|
||||
|
||||
/// Framebuffer location
|
||||
enum FramebufferLocation {
|
||||
FRAMEBUFFER_LOCATION_UNKNOWN, ///< Framebuffer location is unknown
|
||||
FRAMEBUFFER_LOCATION_FCRAM, ///< Framebuffer is in the GSP heap
|
||||
FRAMEBUFFER_LOCATION_VRAM, ///< Framebuffer is in VRAM
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
|
||||
* @param
|
||||
*/
|
||||
void SetFramebufferLocation(const FramebufferLocation mode);
|
||||
|
||||
/**
|
||||
* Gets a read-only pointer to a framebuffer in memory
|
||||
* @param address Physical address of framebuffer
|
||||
* @return Returns const pointer to raw framebuffer
|
||||
*/
|
||||
const u8* GetFramebufferPointer(const u32 address);
|
||||
|
||||
u32 GetFramebufferAddr(const u32 address);
|
||||
|
||||
/**
|
||||
* Gets the location of the framebuffers
|
||||
*/
|
||||
FramebufferLocation GetFramebufferLocation(u32 address);
|
||||
|
||||
template <typename T>
|
||||
void Read(T &var, const u32 addr);
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@ namespace Memory {
|
|||
enum {
|
||||
BOOTROM_SIZE = 0x00010000, ///< Bootrom (super secret code/data @ 0x8000) size
|
||||
MPCORE_PRIV_SIZE = 0x00002000, ///< MPCore private memory region size
|
||||
VRAM_SIZE = 0x00600000, ///< VRAM size
|
||||
DSP_SIZE = 0x00080000, ///< DSP memory size
|
||||
AXI_WRAM_SIZE = 0x00080000, ///< AXI WRAM size
|
||||
|
||||
|
@ -23,8 +22,6 @@ enum {
|
|||
FCRAM_PADDR_END = (FCRAM_PADDR + FCRAM_SIZE), ///< FCRAM end of physical space
|
||||
FCRAM_VADDR = 0x08000000, ///< FCRAM virtual address
|
||||
FCRAM_VADDR_END = (FCRAM_VADDR + FCRAM_SIZE), ///< FCRAM end of virtual space
|
||||
FCRAM_VADDR_FW0B = 0xF0000000, ///< FCRAM adress for firmare FW0B
|
||||
FCRAM_VADDR_FW0B_END = (FCRAM_VADDR_FW0B + FCRAM_SIZE), ///< FCRAM adress end for FW0B
|
||||
FCRAM_MASK = (FCRAM_SIZE - 1), ///< FCRAM mask
|
||||
|
||||
SHARED_MEMORY_SIZE = 0x04000000, ///< Shared memory size
|
||||
|
@ -73,6 +70,7 @@ enum {
|
|||
HARDWARE_IO_PADDR_END = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE),
|
||||
HARDWARE_IO_VADDR_END = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE),
|
||||
|
||||
VRAM_SIZE = 0x00600000,
|
||||
VRAM_PADDR = 0x18000000,
|
||||
VRAM_VADDR = 0x1F000000,
|
||||
VRAM_PADDR_END = (VRAM_PADDR + VRAM_SIZE),
|
||||
|
@ -147,7 +145,7 @@ void Write32(const u32 addr, const u32 data);
|
|||
|
||||
void WriteBlock(const u32 addr, const u8* data, const int size);
|
||||
|
||||
u8* GetPointer(const u32 Address);
|
||||
u8* GetPointer(const u32 virtual_address);
|
||||
|
||||
/**
|
||||
* Maps a block of memory on the heap
|
||||
|
@ -169,16 +167,10 @@ inline const char* GetCharPointer(const u32 address) {
|
|||
return (const char *)GetPointer(address);
|
||||
}
|
||||
|
||||
inline const u32 VirtualAddressFromPhysical_FCRAM(const u32 address) {
|
||||
return ((address & FCRAM_MASK) | FCRAM_VADDR);
|
||||
}
|
||||
/// Converts a physical address to virtual address
|
||||
u32 PhysicalToVirtualAddress(const u32 addr);
|
||||
|
||||
inline const u32 VirtualAddressFromPhysical_IO(const u32 address) {
|
||||
return (address + 0x0EB00000);
|
||||
}
|
||||
|
||||
inline const u32 VirtualAddressFromPhysical_VRAM(const u32 address) {
|
||||
return (address + 0x07000000);
|
||||
}
|
||||
/// Converts a virtual address to physical address
|
||||
u32 VirtualToPhysicalAddress(const u32 addr);
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -17,37 +17,44 @@ std::map<u32, MemoryBlock> g_heap_map;
|
|||
std::map<u32, MemoryBlock> g_heap_gsp_map;
|
||||
std::map<u32, MemoryBlock> g_shared_map;
|
||||
|
||||
/// Convert a physical address (or firmware-specific virtual address) to primary virtual address
|
||||
u32 _VirtualAddress(const u32 addr) {
|
||||
/// Convert a physical address to virtual address
|
||||
u32 PhysicalToVirtualAddress(const u32 addr) {
|
||||
// Our memory interface read/write functions assume virtual addresses. Put any physical address
|
||||
// to virtual address translations here. This is obviously quite hacky... But we're not doing
|
||||
// any MMU emulation yet or anything
|
||||
if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) {
|
||||
return VirtualAddressFromPhysical_FCRAM(addr);
|
||||
|
||||
// Virtual address mapping FW0B
|
||||
} else if ((addr >= FCRAM_VADDR_FW0B) && (addr < FCRAM_VADDR_FW0B_END)) {
|
||||
return VirtualAddressFromPhysical_FCRAM(addr);
|
||||
|
||||
// Hardware IO
|
||||
// TODO(bunnei): FixMe
|
||||
// This isn't going to work... The physical address of HARDWARE_IO conflicts with the virtual
|
||||
// address of shared memory.
|
||||
//} else if ((addr >= HARDWARE_IO_PADDR) && (addr < HARDWARE_IO_PADDR_END)) {
|
||||
// return (addr + 0x0EB00000);
|
||||
|
||||
// to virtual address translations here. This is quite hacky, but necessary until we implement
|
||||
// proper MMU emulation.
|
||||
// TODO: Screw it, I'll let bunnei figure out how to do this properly.
|
||||
if ((addr >= VRAM_PADDR) && (addr < VRAM_PADDR_END)) {
|
||||
return addr - VRAM_PADDR + VRAM_VADDR;
|
||||
}else if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) {
|
||||
return addr - FCRAM_PADDR + FCRAM_VADDR;
|
||||
}
|
||||
|
||||
ERROR_LOG(MEMMAP, "Unknown physical address @ 0x%08x", addr);
|
||||
return addr;
|
||||
}
|
||||
|
||||
/// Convert a physical address to virtual address
|
||||
u32 VirtualToPhysicalAddress(const u32 addr) {
|
||||
// Our memory interface read/write functions assume virtual addresses. Put any physical address
|
||||
// to virtual address translations here. This is quite hacky, but necessary until we implement
|
||||
// proper MMU emulation.
|
||||
// TODO: Screw it, I'll let bunnei figure out how to do this properly.
|
||||
if ((addr >= VRAM_VADDR) && (addr < VRAM_VADDR_END)) {
|
||||
return addr - 0x07000000;
|
||||
} else if ((addr >= FCRAM_VADDR) && (addr < FCRAM_VADDR_END)) {
|
||||
return addr - FCRAM_VADDR + FCRAM_PADDR;
|
||||
}
|
||||
|
||||
ERROR_LOG(MEMMAP, "Unknown virtual address @ 0x%08x", addr);
|
||||
return addr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Read(T &var, const u32 addr) {
|
||||
inline void Read(T &var, const u32 vaddr) {
|
||||
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
|
||||
// TODO: Make sure this represents the mirrors in a correct way.
|
||||
// Could just do a base-relative read, too.... TODO
|
||||
|
||||
const u32 vaddr = _VirtualAddress(addr);
|
||||
|
||||
// Kernel memory command buffer
|
||||
if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
|
||||
var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]);
|
||||
|
@ -91,8 +98,7 @@ inline void Read(T &var, const u32 addr) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Write(u32 addr, const T data) {
|
||||
u32 vaddr = _VirtualAddress(addr);
|
||||
inline void Write(u32 vaddr, const T data) {
|
||||
|
||||
// Kernel memory command buffer
|
||||
if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
|
||||
|
@ -140,9 +146,7 @@ inline void Write(u32 addr, const T data) {
|
|||
}
|
||||
}
|
||||
|
||||
u8 *GetPointer(const u32 addr) {
|
||||
const u32 vaddr = _VirtualAddress(addr);
|
||||
|
||||
u8 *GetPointer(const u32 vaddr) {
|
||||
// Kernel memory command buffer
|
||||
if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
|
||||
return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK);
|
||||
|
|
|
@ -1,10 +1,22 @@
|
|||
set(SRCS video_core.cpp
|
||||
set(SRCS clipper.cpp
|
||||
command_processor.cpp
|
||||
primitive_assembly.cpp
|
||||
rasterizer.cpp
|
||||
utils.cpp
|
||||
vertex_shader.cpp
|
||||
video_core.cpp
|
||||
renderer_opengl/renderer_opengl.cpp)
|
||||
|
||||
set(HEADERS video_core.h
|
||||
set(HEADERS clipper.h
|
||||
command_processor.h
|
||||
math.h
|
||||
primitive_assembly.h
|
||||
rasterizer.h
|
||||
utils.h
|
||||
video_core.h
|
||||
renderer_base.h
|
||||
vertex_shader.h
|
||||
video_core.h
|
||||
renderer_opengl/renderer_opengl.h)
|
||||
|
||||
add_library(video_core STATIC ${SRCS} ${HEADERS})
|
||||
|
|
179
src/video_core/clipper.cpp
Normal file
179
src/video_core/clipper.cpp
Normal file
|
@ -0,0 +1,179 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "clipper.h"
|
||||
#include "pica.h"
|
||||
#include "rasterizer.h"
|
||||
#include "vertex_shader.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace Clipper {
|
||||
|
||||
struct ClippingEdge {
|
||||
public:
|
||||
enum Type {
|
||||
POS_X = 0,
|
||||
NEG_X = 1,
|
||||
POS_Y = 2,
|
||||
NEG_Y = 3,
|
||||
POS_Z = 4,
|
||||
NEG_Z = 5,
|
||||
};
|
||||
|
||||
ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
|
||||
|
||||
bool IsInside(const OutputVertex& vertex) const {
|
||||
switch (type) {
|
||||
case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
|
||||
case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
|
||||
case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
|
||||
case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
|
||||
|
||||
// TODO: Check z compares ... should be 0..1 instead?
|
||||
case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
|
||||
|
||||
default:
|
||||
case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsOutSide(const OutputVertex& vertex) const {
|
||||
return !IsInside(vertex);
|
||||
}
|
||||
|
||||
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
||||
auto dotpr = [this](const OutputVertex& vtx) {
|
||||
switch (type) {
|
||||
case POS_X: return vtx.pos.x - vtx.pos.w;
|
||||
case NEG_X: return -vtx.pos.x - vtx.pos.w;
|
||||
case POS_Y: return vtx.pos.y - vtx.pos.w;
|
||||
case NEG_Y: return -vtx.pos.y - vtx.pos.w;
|
||||
|
||||
// TODO: Verify z clipping
|
||||
case POS_Z: return vtx.pos.z - vtx.pos.w;
|
||||
|
||||
default:
|
||||
case NEG_Z: return -vtx.pos.w;
|
||||
}
|
||||
};
|
||||
|
||||
float24 dp = dotpr(v0);
|
||||
float24 dp_prev = dotpr(v1);
|
||||
float24 factor = dp_prev / (dp_prev - dp);
|
||||
|
||||
return OutputVertex::Lerp(factor, v0, v1);
|
||||
}
|
||||
|
||||
private:
|
||||
Type type;
|
||||
float24 pos;
|
||||
};
|
||||
|
||||
static void InitScreenCoordinates(OutputVertex& vtx)
|
||||
{
|
||||
struct {
|
||||
float24 halfsize_x;
|
||||
float24 offset_x;
|
||||
float24 halfsize_y;
|
||||
float24 offset_y;
|
||||
float24 zscale;
|
||||
float24 offset_z;
|
||||
} viewport;
|
||||
|
||||
viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x);
|
||||
viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y);
|
||||
viewport.offset_x = float24::FromFloat32(registers.viewport_corner.x);
|
||||
viewport.offset_y = float24::FromFloat32(registers.viewport_corner.y);
|
||||
viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range);
|
||||
viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane);
|
||||
|
||||
// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
|
||||
vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x;
|
||||
vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
|
||||
vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale;
|
||||
}
|
||||
|
||||
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
||||
|
||||
// TODO (neobrain):
|
||||
// The list of output vertices has some fixed maximum size,
|
||||
// however I haven't taken the time to figure out what it is exactly.
|
||||
// For now, we hence just assume a maximal size of 1000 vertices.
|
||||
const size_t max_vertices = 1000;
|
||||
std::vector<OutputVertex> buffer_vertices;
|
||||
std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 };
|
||||
|
||||
// Make sure to reserve space for all vertices.
|
||||
// Without this, buffer reallocation would invalidate references.
|
||||
buffer_vertices.reserve(max_vertices);
|
||||
|
||||
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
||||
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
||||
for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
|
||||
ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
|
||||
ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
|
||||
ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
|
||||
ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
|
||||
ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
|
||||
|
||||
const std::vector<OutputVertex*> input_list = output_list;
|
||||
output_list.clear();
|
||||
|
||||
const OutputVertex* reference_vertex = input_list.back();
|
||||
|
||||
for (const auto& vertex : input_list) {
|
||||
// NOTE: This algorithm changes vertex order in some cases!
|
||||
if (edge.IsInside(*vertex)) {
|
||||
if (edge.IsOutSide(*reference_vertex)) {
|
||||
buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
|
||||
output_list.push_back(&(buffer_vertices.back()));
|
||||
}
|
||||
|
||||
output_list.push_back(vertex);
|
||||
} else if (edge.IsInside(*reference_vertex)) {
|
||||
buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
|
||||
output_list.push_back(&(buffer_vertices.back()));
|
||||
}
|
||||
|
||||
reference_vertex = vertex;
|
||||
}
|
||||
|
||||
// Need to have at least a full triangle to continue...
|
||||
if (output_list.size() < 3)
|
||||
return;
|
||||
}
|
||||
|
||||
InitScreenCoordinates(*(output_list[0]));
|
||||
InitScreenCoordinates(*(output_list[1]));
|
||||
|
||||
for (int i = 0; i < output_list.size() - 2; i ++) {
|
||||
OutputVertex& vtx0 = *(output_list[0]);
|
||||
OutputVertex& vtx1 = *(output_list[i+1]);
|
||||
OutputVertex& vtx2 = *(output_list[i+2]);
|
||||
|
||||
InitScreenCoordinates(vtx2);
|
||||
|
||||
DEBUG_LOG(GPU,
|
||||
"Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), "
|
||||
"(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
|
||||
"screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
|
||||
i,output_list.size(), buffer_vertices.size(),
|
||||
vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(),
|
||||
vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
|
||||
vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),
|
||||
vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(),
|
||||
vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(),
|
||||
vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
|
||||
|
||||
Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
21
src/video_core/clipper.h
Normal file
21
src/video_core/clipper.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace VertexShader {
|
||||
struct OutputVertex;
|
||||
}
|
||||
|
||||
namespace Clipper {
|
||||
|
||||
using VertexShader::OutputVertex;
|
||||
|
||||
void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
238
src/video_core/command_processor.cpp
Normal file
238
src/video_core/command_processor.cpp
Normal file
|
@ -0,0 +1,238 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "command_processor.h"
|
||||
#include "math.h"
|
||||
#include "pica.h"
|
||||
#include "primitive_assembly.h"
|
||||
#include "vertex_shader.h"
|
||||
|
||||
|
||||
namespace Pica {
|
||||
|
||||
Regs registers;
|
||||
|
||||
namespace CommandProcessor {
|
||||
|
||||
static int float_regs_counter = 0;
|
||||
|
||||
static u32 uniform_write_buffer[4];
|
||||
|
||||
// Used for VSLoadProgramData and VSLoadSwizzleData
|
||||
static u32 vs_binary_write_offset = 0;
|
||||
static u32 vs_swizzle_write_offset = 0;
|
||||
|
||||
static inline void WritePicaReg(u32 id, u32 value) {
|
||||
u32 old_value = registers[id];
|
||||
registers[id] = value;
|
||||
|
||||
switch(id) {
|
||||
// It seems like these trigger vertex rendering
|
||||
case PICA_REG_INDEX(trigger_draw):
|
||||
case PICA_REG_INDEX(trigger_draw_indexed):
|
||||
{
|
||||
const auto& attribute_config = registers.vertex_attributes;
|
||||
const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress());
|
||||
|
||||
// Information about internal vertex attributes
|
||||
const u8* vertex_attribute_sources[16];
|
||||
u32 vertex_attribute_strides[16];
|
||||
u32 vertex_attribute_formats[16];
|
||||
u32 vertex_attribute_elements[16];
|
||||
u32 vertex_attribute_element_size[16];
|
||||
|
||||
// Setup attribute data from loaders
|
||||
for (int loader = 0; loader < 12; ++loader) {
|
||||
const auto& loader_config = attribute_config.attribute_loaders[loader];
|
||||
|
||||
const u8* load_address = base_address + loader_config.data_offset;
|
||||
|
||||
// TODO: What happens if a loader overwrites a previous one's data?
|
||||
for (int component = 0; component < loader_config.component_count; ++component) {
|
||||
u32 attribute_index = loader_config.GetComponent(component);
|
||||
vertex_attribute_sources[attribute_index] = load_address;
|
||||
vertex_attribute_strides[attribute_index] = loader_config.byte_count;
|
||||
vertex_attribute_formats[attribute_index] = (u32)attribute_config.GetFormat(attribute_index);
|
||||
vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
|
||||
vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index);
|
||||
load_address += attribute_config.GetStride(attribute_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Load vertices
|
||||
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
|
||||
|
||||
const auto& index_info = registers.index_array;
|
||||
const u8* index_address_8 = (u8*)base_address + index_info.offset;
|
||||
const u16* index_address_16 = (u16*)index_address_8;
|
||||
bool index_u16 = (bool)index_info.format;
|
||||
|
||||
for (int index = 0; index < registers.num_vertices; ++index)
|
||||
{
|
||||
int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
|
||||
|
||||
if (is_indexed) {
|
||||
// TODO: Implement some sort of vertex cache!
|
||||
}
|
||||
|
||||
// Initialize data for the current vertex
|
||||
VertexShader::InputVertex input;
|
||||
|
||||
for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
|
||||
for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
|
||||
const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
|
||||
const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata :
|
||||
(vertex_attribute_formats[i] == 1) ? *(u8*)srcdata :
|
||||
(vertex_attribute_formats[i] == 2) ? *(s16*)srcdata :
|
||||
*(float*)srcdata;
|
||||
input.attr[i][comp] = float24::FromFloat32(srcval);
|
||||
DEBUG_LOG(GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
|
||||
comp, i, vertex, index,
|
||||
attribute_config.GetBaseAddress(),
|
||||
vertex_attribute_sources[i] - base_address,
|
||||
srcdata - vertex_attribute_sources[i],
|
||||
input.attr[i][comp].ToFloat32());
|
||||
}
|
||||
}
|
||||
VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
|
||||
|
||||
if (is_indexed) {
|
||||
// TODO: Add processed vertex to vertex cache!
|
||||
}
|
||||
|
||||
PrimitiveAssembly::SubmitVertex(output);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8):
|
||||
{
|
||||
auto& uniform_setup = registers.vs_uniform_setup;
|
||||
|
||||
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
||||
// it directly write the values?
|
||||
uniform_write_buffer[float_regs_counter++] = value;
|
||||
|
||||
// Uniforms are written in a packed format such that 4 float24 values are encoded in
|
||||
// three 32-bit numbers. We write to internal memory once a full such vector is
|
||||
// written.
|
||||
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
|
||||
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
|
||||
float_regs_counter = 0;
|
||||
|
||||
auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index);
|
||||
|
||||
if (uniform_setup.index > 95) {
|
||||
ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
|
||||
break;
|
||||
}
|
||||
|
||||
// NOTE: The destination component order indeed is "backwards"
|
||||
if (uniform_setup.IsFloat32()) {
|
||||
for (auto i : {0,1,2,3})
|
||||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
||||
} else {
|
||||
// TODO: Untested
|
||||
uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8);
|
||||
uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||
uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||
uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF);
|
||||
}
|
||||
|
||||
DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
|
||||
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
|
||||
uniform.w.ToFloat32());
|
||||
|
||||
// TODO: Verify that this actually modifies the register!
|
||||
uniform_setup.index = uniform_setup.index + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Seems to be used to reset the write pointer for VSLoadProgramData
|
||||
case PICA_REG_INDEX(vs_program.begin_load):
|
||||
vs_binary_write_offset = 0;
|
||||
break;
|
||||
|
||||
// Load shader program code
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3):
|
||||
{
|
||||
VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value);
|
||||
vs_binary_write_offset++;
|
||||
break;
|
||||
}
|
||||
|
||||
// Seems to be used to reset the write pointer for VSLoadSwizzleData
|
||||
case PICA_REG_INDEX(vs_swizzle_patterns.begin_load):
|
||||
vs_swizzle_write_offset = 0;
|
||||
break;
|
||||
|
||||
// Load swizzle pattern data
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd):
|
||||
{
|
||||
VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value);
|
||||
vs_swizzle_write_offset++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
|
||||
const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]);
|
||||
|
||||
u32* read_pointer = (u32*)first_command_word;
|
||||
|
||||
// TODO: Take parameter mask into consideration!
|
||||
|
||||
WritePicaReg(header.cmd_id, *read_pointer);
|
||||
read_pointer += 2;
|
||||
|
||||
for (int i = 1; i < 1+header.extra_data_length; ++i) {
|
||||
u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
|
||||
WritePicaReg(cmd, *read_pointer);
|
||||
++read_pointer;
|
||||
}
|
||||
|
||||
// align read pointer to 8 bytes
|
||||
if ((first_command_word - read_pointer) % 2)
|
||||
++read_pointer;
|
||||
|
||||
return read_pointer - first_command_word;
|
||||
}
|
||||
|
||||
void ProcessCommandList(const u32* list, u32 size) {
|
||||
u32* read_pointer = (u32*)list;
|
||||
|
||||
while (read_pointer < list + size) {
|
||||
read_pointer += ExecuteCommandBlock(read_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
31
src/video_core/command_processor.h
Normal file
31
src/video_core/command_processor.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "pica.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace CommandProcessor {
|
||||
|
||||
union CommandHeader {
|
||||
u32 hex;
|
||||
|
||||
BitField< 0, 16, u32> cmd_id;
|
||||
BitField<16, 4, u32> parameter_mask;
|
||||
BitField<20, 11, u32> extra_data_length;
|
||||
BitField<31, 1, u32> group_commands;
|
||||
};
|
||||
static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout");
|
||||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
||||
|
||||
void ProcessCommandList(const u32* list, u32 size);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
|
@ -11,6 +11,8 @@
|
|||
#include "common/log.h"
|
||||
|
||||
#include "core/hle/service/gsp.h"
|
||||
|
||||
#include "command_processor.h"
|
||||
#include "pica.h"
|
||||
|
||||
class GraphicsDebugger
|
||||
|
@ -20,10 +22,10 @@ public:
|
|||
// A vector of commands represented by their raw byte sequence
|
||||
struct PicaCommand : public std::vector<u32>
|
||||
{
|
||||
const Pica::CommandHeader& GetHeader() const
|
||||
const Pica::CommandProcessor::CommandHeader& GetHeader() const
|
||||
{
|
||||
const u32& val = at(1);
|
||||
return *(Pica::CommandHeader*)&val;
|
||||
return *(Pica::CommandProcessor::CommandHeader*)&val;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -99,7 +101,7 @@ public:
|
|||
PicaCommandList cmdlist;
|
||||
for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;)
|
||||
{
|
||||
const Pica::CommandHeader header = static_cast<Pica::CommandHeader>(parse_pointer[1]);
|
||||
const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]);
|
||||
|
||||
cmdlist.push_back(PicaCommand());
|
||||
auto& cmd = cmdlist.back();
|
||||
|
|
578
src/video_core/math.h
Normal file
578
src/video_core/math.h
Normal file
|
@ -0,0 +1,578 @@
|
|||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
|
||||
// Copyright 2014 Tony Wasserka
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of the owner nor the names of its contributors may
|
||||
// be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace Math {
|
||||
|
||||
template<typename T> class Vec2;
|
||||
template<typename T> class Vec3;
|
||||
template<typename T> class Vec4;
|
||||
|
||||
|
||||
template<typename T>
|
||||
class Vec2 {
|
||||
public:
|
||||
struct {
|
||||
T x,y;
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
|
||||
Vec2() = default;
|
||||
Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
|
||||
Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
|
||||
|
||||
template<typename T2>
|
||||
Vec2<T2> Cast() const {
|
||||
return Vec2<T2>((T2)x, (T2)y);
|
||||
}
|
||||
|
||||
static Vec2 AssignToAll(const T& f)
|
||||
{
|
||||
return Vec2<T>(f, f);
|
||||
}
|
||||
|
||||
void Write(T a[2])
|
||||
{
|
||||
a[0] = x; a[1] = y;
|
||||
}
|
||||
|
||||
Vec2 operator +(const Vec2& other) const
|
||||
{
|
||||
return Vec2(x+other.x, y+other.y);
|
||||
}
|
||||
void operator += (const Vec2 &other)
|
||||
{
|
||||
x+=other.x; y+=other.y;
|
||||
}
|
||||
Vec2 operator -(const Vec2& other) const
|
||||
{
|
||||
return Vec2(x-other.x, y-other.y);
|
||||
}
|
||||
void operator -= (const Vec2& other)
|
||||
{
|
||||
x-=other.x; y-=other.y;
|
||||
}
|
||||
Vec2 operator -() const
|
||||
{
|
||||
return Vec2(-x,-y);
|
||||
}
|
||||
Vec2 operator * (const Vec2& other) const
|
||||
{
|
||||
return Vec2(x*other.x, y*other.y);
|
||||
}
|
||||
template<typename V>
|
||||
Vec2 operator * (const V& f) const
|
||||
{
|
||||
return Vec2(x*f,y*f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator *= (const V& f)
|
||||
{
|
||||
x*=f; y*=f;
|
||||
}
|
||||
template<typename V>
|
||||
Vec2 operator / (const V& f) const
|
||||
{
|
||||
return Vec2(x/f,y/f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator /= (const V& f)
|
||||
{
|
||||
*this = *this / f;
|
||||
}
|
||||
|
||||
T Length2() const
|
||||
{
|
||||
return x*x + y*y;
|
||||
}
|
||||
|
||||
// Only implemented for T=float
|
||||
float Length() const;
|
||||
void SetLength(const float l);
|
||||
Vec2 WithLength(const float l) const;
|
||||
float Distance2To(Vec2 &other);
|
||||
Vec2 Normalized() const;
|
||||
float Normalize(); // returns the previous length, which is often useful
|
||||
|
||||
T& operator [] (int i) //allow vector[1] = 3 (vector.y=3)
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
T operator [] (const int i) const
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
|
||||
void SetZero()
|
||||
{
|
||||
x=0; y=0;
|
||||
}
|
||||
|
||||
// Common aliases: UV (texel coordinates), ST (texture coordinates)
|
||||
T& u() { return x; }
|
||||
T& v() { return y; }
|
||||
T& s() { return x; }
|
||||
T& t() { return y; }
|
||||
|
||||
const T& u() const { return x; }
|
||||
const T& v() const { return y; }
|
||||
const T& s() const { return x; }
|
||||
const T& t() const { return y; }
|
||||
|
||||
// swizzlers - create a subvector of specific components
|
||||
Vec2 yx() const { return Vec2(y, x); }
|
||||
Vec2 vu() const { return Vec2(y, x); }
|
||||
Vec2 ts() const { return Vec2(y, x); }
|
||||
|
||||
// Inserters to add new elements to effectively create larger vectors containing this Vec2
|
||||
Vec3<T> InsertBeforeX(const T& value) {
|
||||
return Vec3<T>(value, x, y);
|
||||
}
|
||||
Vec3<T> InsertBeforeY(const T& value) {
|
||||
return Vec3<T>(x, value, y);
|
||||
}
|
||||
Vec3<T> Append(const T& value) {
|
||||
return Vec3<T>(x, y, value);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename V>
|
||||
Vec2<T> operator * (const V& f, const Vec2<T>& vec)
|
||||
{
|
||||
return Vec2<T>(f*vec.x,f*vec.y);
|
||||
}
|
||||
|
||||
typedef Vec2<float> Vec2f;
|
||||
|
||||
template<typename T>
|
||||
class Vec3
|
||||
{
|
||||
public:
|
||||
struct
|
||||
{
|
||||
T x,y,z;
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
|
||||
Vec3() = default;
|
||||
Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||
Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
|
||||
|
||||
template<typename T2>
|
||||
Vec3<T2> Cast() const {
|
||||
return Vec3<T2>((T2)x, (T2)y, (T2)z);
|
||||
}
|
||||
|
||||
// Only implemented for T=int and T=float
|
||||
static Vec3 FromRGB(unsigned int rgb);
|
||||
unsigned int ToRGB() const; // alpha bits set to zero
|
||||
|
||||
static Vec3 AssignToAll(const T& f)
|
||||
{
|
||||
return Vec3<T>(f, f, f);
|
||||
}
|
||||
|
||||
void Write(T a[3])
|
||||
{
|
||||
a[0] = x; a[1] = y; a[2] = z;
|
||||
}
|
||||
|
||||
Vec3 operator +(const Vec3 &other) const
|
||||
{
|
||||
return Vec3(x+other.x, y+other.y, z+other.z);
|
||||
}
|
||||
void operator += (const Vec3 &other)
|
||||
{
|
||||
x+=other.x; y+=other.y; z+=other.z;
|
||||
}
|
||||
Vec3 operator -(const Vec3 &other) const
|
||||
{
|
||||
return Vec3(x-other.x, y-other.y, z-other.z);
|
||||
}
|
||||
void operator -= (const Vec3 &other)
|
||||
{
|
||||
x-=other.x; y-=other.y; z-=other.z;
|
||||
}
|
||||
Vec3 operator -() const
|
||||
{
|
||||
return Vec3(-x,-y,-z);
|
||||
}
|
||||
Vec3 operator * (const Vec3 &other) const
|
||||
{
|
||||
return Vec3(x*other.x, y*other.y, z*other.z);
|
||||
}
|
||||
template<typename V>
|
||||
Vec3 operator * (const V& f) const
|
||||
{
|
||||
return Vec3(x*f,y*f,z*f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator *= (const V& f)
|
||||
{
|
||||
x*=f; y*=f; z*=f;
|
||||
}
|
||||
template<typename V>
|
||||
Vec3 operator / (const V& f) const
|
||||
{
|
||||
return Vec3(x/f,y/f,z/f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator /= (const V& f)
|
||||
{
|
||||
*this = *this / f;
|
||||
}
|
||||
|
||||
T Length2() const
|
||||
{
|
||||
return x*x + y*y + z*z;
|
||||
}
|
||||
|
||||
// Only implemented for T=float
|
||||
float Length() const;
|
||||
void SetLength(const float l);
|
||||
Vec3 WithLength(const float l) const;
|
||||
float Distance2To(Vec3 &other);
|
||||
Vec3 Normalized() const;
|
||||
float Normalize(); // returns the previous length, which is often useful
|
||||
|
||||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
T operator [] (const int i) const
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
|
||||
void SetZero()
|
||||
{
|
||||
x=0; y=0; z=0;
|
||||
}
|
||||
|
||||
// Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
|
||||
T& u() { return x; }
|
||||
T& v() { return y; }
|
||||
T& w() { return z; }
|
||||
|
||||
T& r() { return x; }
|
||||
T& g() { return y; }
|
||||
T& b() { return z; }
|
||||
|
||||
T& s() { return x; }
|
||||
T& t() { return y; }
|
||||
T& q() { return z; }
|
||||
|
||||
const T& u() const { return x; }
|
||||
const T& v() const { return y; }
|
||||
const T& w() const { return z; }
|
||||
|
||||
const T& r() const { return x; }
|
||||
const T& g() const { return y; }
|
||||
const T& b() const { return z; }
|
||||
|
||||
const T& s() const { return x; }
|
||||
const T& t() const { return y; }
|
||||
const T& q() const { return z; }
|
||||
|
||||
// swizzlers - create a subvector of specific components
|
||||
// e.g. Vec2 uv() { return Vec2(x,y); }
|
||||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
||||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
|
||||
#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
|
||||
_DEFINE_SWIZZLER2(a, b, a##b); \
|
||||
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
||||
_DEFINE_SWIZZLER2(a, b, a3##b3); \
|
||||
_DEFINE_SWIZZLER2(a, b, a4##b4); \
|
||||
_DEFINE_SWIZZLER2(b, a, b##a); \
|
||||
_DEFINE_SWIZZLER2(b, a, b2##a2); \
|
||||
_DEFINE_SWIZZLER2(b, a, b3##a3); \
|
||||
_DEFINE_SWIZZLER2(b, a, b4##a4);
|
||||
|
||||
DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
|
||||
DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
|
||||
DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
|
||||
#undef DEFINE_SWIZZLER2
|
||||
#undef _DEFINE_SWIZZLER2
|
||||
|
||||
// Inserters to add new elements to effectively create larger vectors containing this Vec2
|
||||
Vec4<T> InsertBeforeX(const T& value) {
|
||||
return Vec4<T>(value, x, y, z);
|
||||
}
|
||||
Vec4<T> InsertBeforeY(const T& value) {
|
||||
return Vec4<T>(x, value, y, z);
|
||||
}
|
||||
Vec4<T> InsertBeforeZ(const T& value) {
|
||||
return Vec4<T>(x, y, value, z);
|
||||
}
|
||||
Vec4<T> Append(const T& value) {
|
||||
return Vec4<T>(x, y, z, value);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename V>
|
||||
Vec3<T> operator * (const V& f, const Vec3<T>& vec)
|
||||
{
|
||||
return Vec3<T>(f*vec.x,f*vec.y,f*vec.z);
|
||||
}
|
||||
|
||||
typedef Vec3<float> Vec3f;
|
||||
|
||||
template<typename T>
|
||||
class Vec4
|
||||
{
|
||||
public:
|
||||
struct
|
||||
{
|
||||
T x,y,z,w;
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
|
||||
Vec4() = default;
|
||||
Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
|
||||
Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {}
|
||||
|
||||
template<typename T2>
|
||||
Vec4<T2> Cast() const {
|
||||
return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w);
|
||||
}
|
||||
|
||||
// Only implemented for T=int and T=float
|
||||
static Vec4 FromRGBA(unsigned int rgba);
|
||||
unsigned int ToRGBA() const;
|
||||
|
||||
static Vec4 AssignToAll(const T& f) {
|
||||
return Vec4<T>(f, f, f, f);
|
||||
}
|
||||
|
||||
void Write(T a[4])
|
||||
{
|
||||
a[0] = x; a[1] = y; a[2] = z; a[3] = w;
|
||||
}
|
||||
|
||||
Vec4 operator +(const Vec4& other) const
|
||||
{
|
||||
return Vec4(x+other.x, y+other.y, z+other.z, w+other.w);
|
||||
}
|
||||
void operator += (const Vec4& other)
|
||||
{
|
||||
x+=other.x; y+=other.y; z+=other.z; w+=other.w;
|
||||
}
|
||||
Vec4 operator -(const Vec4 &other) const
|
||||
{
|
||||
return Vec4(x-other.x, y-other.y, z-other.z, w-other.w);
|
||||
}
|
||||
void operator -= (const Vec4 &other)
|
||||
{
|
||||
x-=other.x; y-=other.y; z-=other.z; w-=other.w;
|
||||
}
|
||||
Vec4 operator -() const
|
||||
{
|
||||
return Vec4(-x,-y,-z,-w);
|
||||
}
|
||||
Vec4 operator * (const Vec4 &other) const
|
||||
{
|
||||
return Vec4(x*other.x, y*other.y, z*other.z, w*other.w);
|
||||
}
|
||||
template<typename V>
|
||||
Vec4 operator * (const V& f) const
|
||||
{
|
||||
return Vec4(x*f,y*f,z*f,w*f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator *= (const V& f)
|
||||
{
|
||||
x*=f; y*=f; z*=f; w*=f;
|
||||
}
|
||||
template<typename V>
|
||||
Vec4 operator / (const V& f) const
|
||||
{
|
||||
return Vec4(x/f,y/f,z/f,w/f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator /= (const V& f)
|
||||
{
|
||||
*this = *this / f;
|
||||
}
|
||||
|
||||
T Length2() const
|
||||
{
|
||||
return x*x + y*y + z*z + w*w;
|
||||
}
|
||||
|
||||
// Only implemented for T=float
|
||||
float Length() const;
|
||||
void SetLength(const float l);
|
||||
Vec4 WithLength(const float l) const;
|
||||
float Distance2To(Vec4 &other);
|
||||
Vec4 Normalized() const;
|
||||
float Normalize(); // returns the previous length, which is often useful
|
||||
|
||||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
T operator [] (const int i) const
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
|
||||
void SetZero()
|
||||
{
|
||||
x=0; y=0; z=0;
|
||||
}
|
||||
|
||||
// Common alias: RGBA (colors)
|
||||
T& r() { return x; }
|
||||
T& g() { return y; }
|
||||
T& b() { return z; }
|
||||
T& a() { return w; }
|
||||
|
||||
const T& r() const { return x; }
|
||||
const T& g() const { return y; }
|
||||
const T& b() const { return z; }
|
||||
const T& a() const { return w; }
|
||||
|
||||
// swizzlers - create a subvector of specific components
|
||||
// e.g. Vec2 uv() { return Vec2(x,y); }
|
||||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
||||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
|
||||
#define DEFINE_SWIZZLER2(a, b, a2, b2) \
|
||||
_DEFINE_SWIZZLER2(a, b, a##b); \
|
||||
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
||||
_DEFINE_SWIZZLER2(b, a, b##a); \
|
||||
_DEFINE_SWIZZLER2(b, a, b2##a2);
|
||||
|
||||
DEFINE_SWIZZLER2(x, y, r, g);
|
||||
DEFINE_SWIZZLER2(x, z, r, b);
|
||||
DEFINE_SWIZZLER2(x, w, r, a);
|
||||
DEFINE_SWIZZLER2(y, z, g, b);
|
||||
DEFINE_SWIZZLER2(y, w, g, a);
|
||||
DEFINE_SWIZZLER2(z, w, b, a);
|
||||
#undef DEFINE_SWIZZLER2
|
||||
#undef _DEFINE_SWIZZLER2
|
||||
|
||||
#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); }
|
||||
#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
|
||||
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \
|
||||
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \
|
||||
_DEFINE_SWIZZLER3(b, a, c, b##a##c); \
|
||||
_DEFINE_SWIZZLER3(b, c, a, b##c##a); \
|
||||
_DEFINE_SWIZZLER3(c, a, b, c##a##b); \
|
||||
_DEFINE_SWIZZLER3(c, b, a, c##b##a); \
|
||||
_DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \
|
||||
_DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \
|
||||
_DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
|
||||
_DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
|
||||
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
|
||||
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2);
|
||||
|
||||
DEFINE_SWIZZLER3(x, y, z, r, g, b);
|
||||
DEFINE_SWIZZLER3(x, y, w, r, g, a);
|
||||
DEFINE_SWIZZLER3(x, z, w, r, b, a);
|
||||
DEFINE_SWIZZLER3(y, z, w, g, b, a);
|
||||
#undef DEFINE_SWIZZLER3
|
||||
#undef _DEFINE_SWIZZLER3
|
||||
};
|
||||
|
||||
|
||||
template<typename T, typename V>
|
||||
Vec4<T> operator * (const V& f, const Vec4<T>& vec)
|
||||
{
|
||||
return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w);
|
||||
}
|
||||
|
||||
typedef Vec4<float> Vec4f;
|
||||
|
||||
|
||||
template<typename T>
|
||||
static inline T Dot(const Vec2<T>& a, const Vec2<T>& b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline T Dot(const Vec3<T>& a, const Vec3<T>& b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline T Dot(const Vec4<T>& a, const Vec4<T>& b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
|
||||
{
|
||||
return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
||||
}
|
||||
|
||||
// linear interpolation via float: 0.0=begin, 1.0=end
|
||||
template<typename X>
|
||||
static inline X Lerp(const X& begin, const X& end, const float t)
|
||||
{
|
||||
return begin*(1.f-t) + end*t;
|
||||
}
|
||||
|
||||
// linear interpolation via int: 0=begin, base=end
|
||||
template<typename X, int base>
|
||||
static inline X LerpInt(const X& begin, const X& end, const int t)
|
||||
{
|
||||
return (begin*(base-t) + end*t) / base;
|
||||
}
|
||||
|
||||
// Utility vector factories
|
||||
template<typename T>
|
||||
static inline Vec2<T> MakeVec2(const T& x, const T& y)
|
||||
{
|
||||
return Vec2<T>{x, y};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z)
|
||||
{
|
||||
return Vec3<T>{x, y, z};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w)
|
||||
{
|
||||
return Vec4<T>{x, y, z, w};
|
||||
}
|
||||
|
||||
} // namespace
|
|
@ -11,6 +11,8 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "core/mem_map.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
// Returns index corresponding to the Regs member labeled by field_name
|
||||
|
@ -45,12 +47,104 @@ struct Regs {
|
|||
INSERT_PADDING_WORDS(0x41);
|
||||
|
||||
BitField<0, 24, u32> viewport_size_x;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
BitField<0, 24, u32> viewport_size_y;
|
||||
|
||||
INSERT_PADDING_WORDS(0x1bc);
|
||||
INSERT_PADDING_WORDS(0x9);
|
||||
|
||||
BitField<0, 24, u32> viewport_depth_range; // float24
|
||||
BitField<0, 24, u32> viewport_depth_far_plane; // float24
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
union {
|
||||
// Maps components of output vertex attributes to semantics
|
||||
enum Semantic : u32
|
||||
{
|
||||
POSITION_X = 0,
|
||||
POSITION_Y = 1,
|
||||
POSITION_Z = 2,
|
||||
POSITION_W = 3,
|
||||
|
||||
COLOR_R = 8,
|
||||
COLOR_G = 9,
|
||||
COLOR_B = 10,
|
||||
COLOR_A = 11,
|
||||
|
||||
TEXCOORD0_U = 12,
|
||||
TEXCOORD0_V = 13,
|
||||
TEXCOORD1_U = 14,
|
||||
TEXCOORD1_V = 15,
|
||||
TEXCOORD2_U = 22,
|
||||
TEXCOORD2_V = 23,
|
||||
|
||||
INVALID = 31,
|
||||
};
|
||||
|
||||
BitField< 0, 5, Semantic> map_x;
|
||||
BitField< 8, 5, Semantic> map_y;
|
||||
BitField<16, 5, Semantic> map_z;
|
||||
BitField<24, 5, Semantic> map_w;
|
||||
} vs_output_attributes[7];
|
||||
|
||||
INSERT_PADDING_WORDS(0x11);
|
||||
|
||||
union {
|
||||
BitField< 0, 16, u32> x;
|
||||
BitField<16, 16, u32> y;
|
||||
} viewport_corner;
|
||||
|
||||
INSERT_PADDING_WORDS(0xa7);
|
||||
|
||||
struct {
|
||||
enum ColorFormat : u32 {
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGBA5551 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x6);
|
||||
|
||||
u32 depth_format;
|
||||
u32 color_format;
|
||||
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
|
||||
u32 depth_buffer_address;
|
||||
u32 color_buffer_address;
|
||||
|
||||
union {
|
||||
// Apparently, the framebuffer width is stored as expected,
|
||||
// while the height is stored as the actual height minus one.
|
||||
// Hence, don't access these fields directly but use the accessors
|
||||
// GetWidth() and GetHeight() instead.
|
||||
BitField< 0, 11, u32> width;
|
||||
BitField<12, 10, u32> height;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
inline u32 GetColorBufferAddress() const {
|
||||
return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address));
|
||||
}
|
||||
inline u32 GetDepthBufferAddress() const {
|
||||
return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address));
|
||||
}
|
||||
|
||||
inline u32 GetWidth() const {
|
||||
return width;
|
||||
}
|
||||
|
||||
inline u32 GetHeight() const {
|
||||
return height + 1;
|
||||
}
|
||||
} framebuffer;
|
||||
|
||||
INSERT_PADDING_WORDS(0xe0);
|
||||
|
||||
struct {
|
||||
enum class Format : u64 {
|
||||
BYTE = 0,
|
||||
UBYTE = 1,
|
||||
|
@ -58,36 +152,230 @@ struct Regs {
|
|||
FLOAT = 3,
|
||||
};
|
||||
|
||||
BitField< 0, 2, Format> format0;
|
||||
BitField< 2, 2, u64> size0; // number of elements minus 1
|
||||
BitField< 4, 2, Format> format1;
|
||||
BitField< 6, 2, u64> size1;
|
||||
BitField< 8, 2, Format> format2;
|
||||
BitField<10, 2, u64> size2;
|
||||
BitField<12, 2, Format> format3;
|
||||
BitField<14, 2, u64> size3;
|
||||
BitField<16, 2, Format> format4;
|
||||
BitField<18, 2, u64> size4;
|
||||
BitField<20, 2, Format> format5;
|
||||
BitField<22, 2, u64> size5;
|
||||
BitField<24, 2, Format> format6;
|
||||
BitField<26, 2, u64> size6;
|
||||
BitField<28, 2, Format> format7;
|
||||
BitField<30, 2, u64> size7;
|
||||
BitField<32, 2, Format> format8;
|
||||
BitField<34, 2, u64> size8;
|
||||
BitField<36, 2, Format> format9;
|
||||
BitField<38, 2, u64> size9;
|
||||
BitField<40, 2, Format> format10;
|
||||
BitField<42, 2, u64> size10;
|
||||
BitField<44, 2, Format> format11;
|
||||
BitField<46, 2, u64> size11;
|
||||
BitField<0, 29, u32> base_address;
|
||||
|
||||
BitField<48, 12, u64> attribute_mask;
|
||||
BitField<60, 4, u64> num_attributes; // number of total attributes minus 1
|
||||
} vertex_descriptor;
|
||||
inline u32 GetBaseAddress() const {
|
||||
// TODO: Ugly, should fix PhysicalToVirtualAddress instead
|
||||
return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR;
|
||||
}
|
||||
|
||||
INSERT_PADDING_WORDS(0xfe);
|
||||
// Descriptor for internal vertex attributes
|
||||
union {
|
||||
BitField< 0, 2, Format> format0; // size of one element
|
||||
BitField< 2, 2, u64> size0; // number of elements minus 1
|
||||
BitField< 4, 2, Format> format1;
|
||||
BitField< 6, 2, u64> size1;
|
||||
BitField< 8, 2, Format> format2;
|
||||
BitField<10, 2, u64> size2;
|
||||
BitField<12, 2, Format> format3;
|
||||
BitField<14, 2, u64> size3;
|
||||
BitField<16, 2, Format> format4;
|
||||
BitField<18, 2, u64> size4;
|
||||
BitField<20, 2, Format> format5;
|
||||
BitField<22, 2, u64> size5;
|
||||
BitField<24, 2, Format> format6;
|
||||
BitField<26, 2, u64> size6;
|
||||
BitField<28, 2, Format> format7;
|
||||
BitField<30, 2, u64> size7;
|
||||
BitField<32, 2, Format> format8;
|
||||
BitField<34, 2, u64> size8;
|
||||
BitField<36, 2, Format> format9;
|
||||
BitField<38, 2, u64> size9;
|
||||
BitField<40, 2, Format> format10;
|
||||
BitField<42, 2, u64> size10;
|
||||
BitField<44, 2, Format> format11;
|
||||
BitField<46, 2, u64> size11;
|
||||
|
||||
BitField<48, 12, u64> attribute_mask;
|
||||
|
||||
// number of total attributes minus 1
|
||||
BitField<60, 4, u64> num_extra_attributes;
|
||||
};
|
||||
|
||||
inline Format GetFormat(int n) const {
|
||||
Format formats[] = {
|
||||
format0, format1, format2, format3,
|
||||
format4, format5, format6, format7,
|
||||
format8, format9, format10, format11
|
||||
};
|
||||
return formats[n];
|
||||
}
|
||||
|
||||
inline int GetNumElements(int n) const {
|
||||
u64 sizes[] = {
|
||||
size0, size1, size2, size3,
|
||||
size4, size5, size6, size7,
|
||||
size8, size9, size10, size11
|
||||
};
|
||||
return (int)sizes[n]+1;
|
||||
}
|
||||
|
||||
inline int GetElementSizeInBytes(int n) const {
|
||||
return (GetFormat(n) == Format::FLOAT) ? 4 :
|
||||
(GetFormat(n) == Format::SHORT) ? 2 : 1;
|
||||
}
|
||||
|
||||
inline int GetStride(int n) const {
|
||||
return GetNumElements(n) * GetElementSizeInBytes(n);
|
||||
}
|
||||
|
||||
inline int GetNumTotalAttributes() const {
|
||||
return (int)num_extra_attributes+1;
|
||||
}
|
||||
|
||||
// Attribute loaders map the source vertex data to input attributes
|
||||
// This e.g. allows to load different attributes from different memory locations
|
||||
struct {
|
||||
// Source attribute data offset from the base address
|
||||
u32 data_offset;
|
||||
|
||||
union {
|
||||
BitField< 0, 4, u64> comp0;
|
||||
BitField< 4, 4, u64> comp1;
|
||||
BitField< 8, 4, u64> comp2;
|
||||
BitField<12, 4, u64> comp3;
|
||||
BitField<16, 4, u64> comp4;
|
||||
BitField<20, 4, u64> comp5;
|
||||
BitField<24, 4, u64> comp6;
|
||||
BitField<28, 4, u64> comp7;
|
||||
BitField<32, 4, u64> comp8;
|
||||
BitField<36, 4, u64> comp9;
|
||||
BitField<40, 4, u64> comp10;
|
||||
BitField<44, 4, u64> comp11;
|
||||
|
||||
// bytes for a single vertex in this loader
|
||||
BitField<48, 8, u64> byte_count;
|
||||
|
||||
BitField<60, 4, u64> component_count;
|
||||
};
|
||||
|
||||
inline int GetComponent(int n) const {
|
||||
u64 components[] = {
|
||||
comp0, comp1, comp2, comp3,
|
||||
comp4, comp5, comp6, comp7,
|
||||
comp8, comp9, comp10, comp11
|
||||
};
|
||||
return (int)components[n];
|
||||
}
|
||||
} attribute_loaders[12];
|
||||
} vertex_attributes;
|
||||
|
||||
struct {
|
||||
enum IndexFormat : u32 {
|
||||
BYTE = 0,
|
||||
SHORT = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 31, u32> offset; // relative to base attribute address
|
||||
BitField<31, 1, IndexFormat> format;
|
||||
};
|
||||
} index_array;
|
||||
|
||||
// Number of vertices to render
|
||||
u32 num_vertices;
|
||||
|
||||
INSERT_PADDING_WORDS(0x5);
|
||||
|
||||
// These two trigger rendering of triangles
|
||||
u32 trigger_draw;
|
||||
u32 trigger_draw_indexed;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2e);
|
||||
|
||||
enum class TriangleTopology : u32 {
|
||||
List = 0,
|
||||
Strip = 1,
|
||||
Fan = 2,
|
||||
ListIndexed = 3, // TODO: No idea if this is correct
|
||||
};
|
||||
|
||||
BitField<8, 2, TriangleTopology> triangle_topology;
|
||||
|
||||
INSERT_PADDING_WORDS(0x5b);
|
||||
|
||||
// Offset to shader program entry point (in words)
|
||||
BitField<0, 16, u32> vs_main_offset;
|
||||
|
||||
union {
|
||||
BitField< 0, 4, u64> attribute0_register;
|
||||
BitField< 4, 4, u64> attribute1_register;
|
||||
BitField< 8, 4, u64> attribute2_register;
|
||||
BitField<12, 4, u64> attribute3_register;
|
||||
BitField<16, 4, u64> attribute4_register;
|
||||
BitField<20, 4, u64> attribute5_register;
|
||||
BitField<24, 4, u64> attribute6_register;
|
||||
BitField<28, 4, u64> attribute7_register;
|
||||
BitField<32, 4, u64> attribute8_register;
|
||||
BitField<36, 4, u64> attribute9_register;
|
||||
BitField<40, 4, u64> attribute10_register;
|
||||
BitField<44, 4, u64> attribute11_register;
|
||||
BitField<48, 4, u64> attribute12_register;
|
||||
BitField<52, 4, u64> attribute13_register;
|
||||
BitField<56, 4, u64> attribute14_register;
|
||||
BitField<60, 4, u64> attribute15_register;
|
||||
|
||||
int GetRegisterForAttribute(int attribute_index) {
|
||||
u64 fields[] = {
|
||||
attribute0_register, attribute1_register, attribute2_register, attribute3_register,
|
||||
attribute4_register, attribute5_register, attribute6_register, attribute7_register,
|
||||
attribute8_register, attribute9_register, attribute10_register, attribute11_register,
|
||||
attribute12_register, attribute13_register, attribute14_register, attribute15_register,
|
||||
};
|
||||
return (int)fields[attribute_index];
|
||||
}
|
||||
} vs_input_register_map;
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
struct {
|
||||
enum Format : u32
|
||||
{
|
||||
FLOAT24 = 0,
|
||||
FLOAT32 = 1
|
||||
};
|
||||
|
||||
bool IsFloat32() const {
|
||||
return format == FLOAT32;
|
||||
}
|
||||
|
||||
union {
|
||||
// Index of the next uniform to write to
|
||||
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
|
||||
BitField<0, 7, u32> index;
|
||||
|
||||
BitField<31, 1, Format> format;
|
||||
};
|
||||
|
||||
// Writing to these registers sets the "current" uniform.
|
||||
// TODO: It's not clear how the hardware stores what the "current" uniform is.
|
||||
u32 set_value[8];
|
||||
|
||||
} vs_uniform_setup;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
struct {
|
||||
u32 begin_load;
|
||||
|
||||
// Writing to these registers sets the "current" word in the shader program.
|
||||
// TODO: It's not clear how the hardware stores what the "current" word is.
|
||||
u32 set_word[8];
|
||||
} vs_program;
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
// This register group is used to load an internal table of swizzling patterns,
|
||||
// which are indexed by each shader instruction to specify vector component swizzling.
|
||||
struct {
|
||||
u32 begin_load;
|
||||
|
||||
// Writing to these registers sets the "current" swizzle pattern in the table.
|
||||
// TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
|
||||
u32 set_word[8];
|
||||
} vs_swizzle_patterns;
|
||||
|
||||
INSERT_PADDING_WORDS(0x22);
|
||||
|
||||
#undef INSERT_PADDING_WORDS_HELPER1
|
||||
#undef INSERT_PADDING_WORDS_HELPER2
|
||||
|
@ -112,7 +400,21 @@ struct Regs {
|
|||
|
||||
ADD_FIELD(viewport_size_x);
|
||||
ADD_FIELD(viewport_size_y);
|
||||
ADD_FIELD(vertex_descriptor);
|
||||
ADD_FIELD(viewport_depth_range);
|
||||
ADD_FIELD(viewport_depth_far_plane);
|
||||
ADD_FIELD(viewport_corner);
|
||||
ADD_FIELD(framebuffer);
|
||||
ADD_FIELD(vertex_attributes);
|
||||
ADD_FIELD(index_array);
|
||||
ADD_FIELD(num_vertices);
|
||||
ADD_FIELD(trigger_draw);
|
||||
ADD_FIELD(trigger_draw_indexed);
|
||||
ADD_FIELD(triangle_topology);
|
||||
ADD_FIELD(vs_main_offset);
|
||||
ADD_FIELD(vs_input_register_map);
|
||||
ADD_FIELD(vs_uniform_setup);
|
||||
ADD_FIELD(vs_program);
|
||||
ADD_FIELD(vs_swizzle_patterns);
|
||||
|
||||
#undef ADD_FIELD
|
||||
#endif // _MSC_VER
|
||||
|
@ -153,13 +455,106 @@ private:
|
|||
|
||||
ASSERT_REG_POSITION(viewport_size_x, 0x41);
|
||||
ASSERT_REG_POSITION(viewport_size_y, 0x43);
|
||||
ASSERT_REG_POSITION(vertex_descriptor, 0x200);
|
||||
ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
|
||||
ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
|
||||
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
|
||||
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
|
||||
ASSERT_REG_POSITION(viewport_corner, 0x68);
|
||||
ASSERT_REG_POSITION(framebuffer, 0x110);
|
||||
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
||||
ASSERT_REG_POSITION(index_array, 0x227);
|
||||
ASSERT_REG_POSITION(num_vertices, 0x228);
|
||||
ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
||||
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
||||
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
||||
ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
|
||||
ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
|
||||
ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
|
||||
ASSERT_REG_POSITION(vs_program, 0x2cb);
|
||||
ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
#endif // !defined(_MSC_VER)
|
||||
|
||||
// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
|
||||
static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set");
|
||||
static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
|
||||
static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
|
||||
|
||||
extern Regs registers; // TODO: Not sure if we want to have one global instance for this
|
||||
|
||||
|
||||
struct float24 {
|
||||
static float24 FromFloat32(float val) {
|
||||
float24 ret;
|
||||
ret.value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// 16 bit mantissa, 7 bit exponent, 1 bit sign
|
||||
// TODO: No idea if this works as intended
|
||||
static float24 FromRawFloat24(u32 hex) {
|
||||
float24 ret;
|
||||
if ((hex & 0xFFFFFF) == 0) {
|
||||
ret.value = 0;
|
||||
} else {
|
||||
u32 mantissa = hex & 0xFFFF;
|
||||
u32 exponent = (hex >> 16) & 0x7F;
|
||||
u32 sign = hex >> 23;
|
||||
ret.value = powf(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * powf(2.0f, -16.f));
|
||||
if (sign)
|
||||
ret.value = -ret.value;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Not recommended for anything but logging
|
||||
float ToFloat32() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
float24 operator * (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator / (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator + (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator - (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator - () const {
|
||||
return float24::FromFloat32(-ToFloat32());
|
||||
}
|
||||
|
||||
bool operator < (const float24& flt) const {
|
||||
return ToFloat32() < flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator > (const float24& flt) const {
|
||||
return ToFloat32() > flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator >= (const float24& flt) const {
|
||||
return ToFloat32() >= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator <= (const float24& flt) const {
|
||||
return ToFloat32() <= flt.ToFloat32();
|
||||
}
|
||||
|
||||
private:
|
||||
float24() = default;
|
||||
|
||||
// Stored as a regular float, merely for convenience
|
||||
// TODO: Perform proper arithmetic on this!
|
||||
float value;
|
||||
};
|
||||
|
||||
union CommandHeader {
|
||||
CommandHeader(u32 h) : hex(h) {}
|
||||
|
|
51
src/video_core/primitive_assembly.cpp
Normal file
51
src/video_core/primitive_assembly.cpp
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "clipper.h"
|
||||
#include "pica.h"
|
||||
#include "primitive_assembly.h"
|
||||
#include "vertex_shader.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace PrimitiveAssembly {
|
||||
|
||||
static OutputVertex buffer[2];
|
||||
static int buffer_index = 0; // TODO: reset this on emulation restart
|
||||
|
||||
void SubmitVertex(OutputVertex& vtx)
|
||||
{
|
||||
switch (registers.triangle_topology) {
|
||||
case Regs::TriangleTopology::List:
|
||||
case Regs::TriangleTopology::ListIndexed:
|
||||
if (buffer_index < 2) {
|
||||
buffer[buffer_index++] = vtx;
|
||||
} else {
|
||||
buffer_index = 0;
|
||||
|
||||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
|
||||
}
|
||||
break;
|
||||
|
||||
case Regs::TriangleTopology::Fan:
|
||||
if (buffer_index == 2) {
|
||||
buffer_index = 0;
|
||||
|
||||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
|
||||
|
||||
buffer[1] = vtx;
|
||||
} else {
|
||||
buffer[buffer_index++] = vtx;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
21
src/video_core/primitive_assembly.h
Normal file
21
src/video_core/primitive_assembly.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace VertexShader {
|
||||
struct OutputVertex;
|
||||
}
|
||||
|
||||
namespace PrimitiveAssembly {
|
||||
|
||||
using VertexShader::OutputVertex;
|
||||
|
||||
void SubmitVertex(OutputVertex& vtx);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
180
src/video_core/rasterizer.cpp
Normal file
180
src/video_core/rasterizer.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "math.h"
|
||||
#include "pica.h"
|
||||
#include "rasterizer.h"
|
||||
#include "vertex_shader.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace Rasterizer {
|
||||
|
||||
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
|
||||
u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());
|
||||
u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
|
||||
|
||||
// Assuming RGBA8 format until actual framebuffer format handling is implemented
|
||||
*(color_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value;
|
||||
}
|
||||
|
||||
static u32 GetDepth(int x, int y) {
|
||||
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
|
||||
|
||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
||||
return *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2);
|
||||
}
|
||||
|
||||
static void SetDepth(int x, int y, u16 value) {
|
||||
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
|
||||
|
||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
||||
*(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value;
|
||||
}
|
||||
|
||||
void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||
const VertexShader::OutputVertex& v1,
|
||||
const VertexShader::OutputVertex& v2)
|
||||
{
|
||||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
||||
struct Fix12P4 {
|
||||
Fix12P4() {}
|
||||
Fix12P4(u16 val) : val(val) {}
|
||||
|
||||
static u16 FracMask() { return 0xF; }
|
||||
static u16 IntMask() { return (u16)~0xF; }
|
||||
|
||||
operator u16() const {
|
||||
return val;
|
||||
}
|
||||
|
||||
bool operator < (const Fix12P4& oth) const {
|
||||
return (u16)*this < (u16)oth;
|
||||
}
|
||||
|
||||
private:
|
||||
u16 val;
|
||||
};
|
||||
|
||||
// vertex positions in rasterizer coordinates
|
||||
auto FloatToFix = [](float24 flt) {
|
||||
return Fix12P4(flt.ToFloat32() * 16.0f);
|
||||
};
|
||||
auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
|
||||
return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
|
||||
};
|
||||
Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
|
||||
ScreenToRasterizerCoordinates(v1.screenpos),
|
||||
ScreenToRasterizerCoordinates(v2.screenpos) };
|
||||
|
||||
// TODO: Proper scissor rect test!
|
||||
u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
||||
u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
||||
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
||||
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
||||
|
||||
min_x = min_x & Fix12P4::IntMask();
|
||||
min_y = min_y & Fix12P4::IntMask();
|
||||
max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask();
|
||||
max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask();
|
||||
|
||||
// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
|
||||
// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
|
||||
// values which are added to the barycentric coordinates w0, w1 and w2, respectively.
|
||||
// NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
|
||||
auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx,
|
||||
const Math::Vec2<Fix12P4>& line1,
|
||||
const Math::Vec2<Fix12P4>& line2)
|
||||
{
|
||||
if (line1.y == line2.y) {
|
||||
// just check if vertex is above us => bottom line parallel to x-axis
|
||||
return vtx.y < line1.y;
|
||||
} else {
|
||||
// check if vertex is on our left => right side
|
||||
// TODO: Not sure how likely this is to overflow
|
||||
return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y);
|
||||
}
|
||||
};
|
||||
int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
|
||||
int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
|
||||
int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
|
||||
|
||||
// TODO: Not sure if looping through x first might be faster
|
||||
for (u16 y = min_y; y < max_y; y += 0x10) {
|
||||
for (u16 x = min_x; x < max_x; x += 0x10) {
|
||||
|
||||
// Calculate the barycentric coordinates w0, w1 and w2
|
||||
auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
|
||||
const Math::Vec2<Fix12P4>& vtx2,
|
||||
const Math::Vec2<Fix12P4>& vtx3) {
|
||||
const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0);
|
||||
const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0);
|
||||
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
||||
return Cross(vec1, vec2).z;
|
||||
};
|
||||
|
||||
int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||
int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
||||
int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
||||
int wsum = w0 + w1 + w2;
|
||||
|
||||
// If current pixel is not covered by the current primitive
|
||||
if (w0 < 0 || w1 < 0 || w2 < 0)
|
||||
continue;
|
||||
|
||||
// Perspective correct attribute interpolation:
|
||||
// Attribute values cannot be calculated by simple linear interpolation since
|
||||
// they are not linear in screen space. For example, when interpolating a
|
||||
// texture coordinate across two vertices, something simple like
|
||||
// u = (u0*w0 + u1*w1)/(w0+w1)
|
||||
// will not work. However, the attribute value divided by the
|
||||
// clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
|
||||
// in screenspace. Hence, we can linearly interpolate these two independently and
|
||||
// calculate the interpolated attribute by dividing the results.
|
||||
// I.e.
|
||||
// u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
|
||||
// one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
||||
// u = u_over_w / one_over_w
|
||||
//
|
||||
// The generalization to three vertices is straightforward in baricentric coordinates.
|
||||
auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
|
||||
auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w,
|
||||
attr1 / v1.pos.w,
|
||||
attr2 / v2.pos.w);
|
||||
auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w,
|
||||
float24::FromFloat32(1.f) / v1.pos.w,
|
||||
float24::FromFloat32(1.f) / v2.pos.w);
|
||||
auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0),
|
||||
float24::FromFloat32(w1),
|
||||
float24::FromFloat32(w2));
|
||||
|
||||
float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
|
||||
float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
|
||||
return interpolated_attr_over_w / interpolated_w_inverse;
|
||||
};
|
||||
|
||||
Math::Vec4<u8> primary_color{
|
||||
(u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255),
|
||||
(u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255),
|
||||
(u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255),
|
||||
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
|
||||
};
|
||||
|
||||
u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
|
||||
(float)v1.screenpos[2].ToFloat32() * w1 +
|
||||
(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
|
||||
SetDepth(x >> 4, y >> 4, z);
|
||||
|
||||
DrawPixel(x >> 4, y >> 4, primary_color);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Rasterizer
|
||||
|
||||
} // namespace Pica
|
21
src/video_core/rasterizer.h
Normal file
21
src/video_core/rasterizer.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace VertexShader {
|
||||
struct OutputVertex;
|
||||
}
|
||||
|
||||
namespace Rasterizer {
|
||||
|
||||
void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||
const VertexShader::OutputVertex& v1,
|
||||
const VertexShader::OutputVertex& v2);
|
||||
|
||||
} // namespace Rasterizer
|
||||
|
||||
} // namespace Pica
|
|
@ -81,20 +81,20 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect&
|
|||
const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0];
|
||||
const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1];
|
||||
const u32 active_fb_top = (framebuffer_top.active_fb == 1)
|
||||
? framebuffer_top.address_left2
|
||||
: framebuffer_top.address_left1;
|
||||
? Memory::PhysicalToVirtualAddress(framebuffer_top.address_left2)
|
||||
: Memory::PhysicalToVirtualAddress(framebuffer_top.address_left1);
|
||||
const u32 active_fb_sub = (framebuffer_sub.active_fb == 1)
|
||||
? framebuffer_sub.address_left2
|
||||
: framebuffer_sub.address_left1;
|
||||
? Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left2)
|
||||
: Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left1);
|
||||
|
||||
DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x",
|
||||
framebuffer_top.stride * framebuffer_top.height,
|
||||
GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width,
|
||||
active_fb_top, (int)framebuffer_top.width,
|
||||
(int)framebuffer_top.height, (int)framebuffer_top.format);
|
||||
|
||||
// TODO: This should consider the GPU registers for framebuffer width, height and stride.
|
||||
FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped);
|
||||
FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped);
|
||||
FlipFramebuffer(Memory::GetPointer(active_fb_top), m_xfb_top_flipped);
|
||||
FlipFramebuffer(Memory::GetPointer(active_fb_sub), m_xfb_bottom_flipped);
|
||||
|
||||
// Blit the top framebuffer
|
||||
// ------------------------
|
||||
|
|
270
src/video_core/vertex_shader.cpp
Normal file
270
src/video_core/vertex_shader.cpp
Normal file
|
@ -0,0 +1,270 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "pica.h"
|
||||
#include "vertex_shader.h"
|
||||
#include <core/mem_map.h>
|
||||
#include <common/file_util.h>
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace VertexShader {
|
||||
|
||||
static struct {
|
||||
Math::Vec4<float24> f[96];
|
||||
} shader_uniforms;
|
||||
|
||||
|
||||
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
|
||||
// For now, we just keep these local arrays around.
|
||||
static u32 shader_memory[1024];
|
||||
static u32 swizzle_data[1024];
|
||||
|
||||
void SubmitShaderMemoryChange(u32 addr, u32 value)
|
||||
{
|
||||
shader_memory[addr] = value;
|
||||
}
|
||||
|
||||
void SubmitSwizzleDataChange(u32 addr, u32 value)
|
||||
{
|
||||
swizzle_data[addr] = value;
|
||||
}
|
||||
|
||||
Math::Vec4<float24>& GetFloatUniform(u32 index)
|
||||
{
|
||||
return shader_uniforms.f[index];
|
||||
}
|
||||
|
||||
struct VertexShaderState {
|
||||
u32* program_counter;
|
||||
|
||||
const float24* input_register_table[16];
|
||||
float24* output_register_table[7*4];
|
||||
|
||||
Math::Vec4<float24> temporary_registers[16];
|
||||
bool status_registers[2];
|
||||
|
||||
enum {
|
||||
INVALID_ADDRESS = 0xFFFFFFFF
|
||||
};
|
||||
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
||||
u32* call_stack_pointer;
|
||||
};
|
||||
|
||||
static void ProcessShaderCode(VertexShaderState& state) {
|
||||
while (true) {
|
||||
bool increment_pc = true;
|
||||
bool exit_loop = false;
|
||||
const Instruction& instr = *(const Instruction*)state.program_counter;
|
||||
|
||||
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
|
||||
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
|
||||
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
|
||||
: nullptr;
|
||||
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
|
||||
: &state.temporary_registers[instr.common.src2-0x10].x;
|
||||
// TODO: Unsure about the limit values
|
||||
float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
|
||||
: (instr.common.dest <= 0x3C) ? nullptr
|
||||
: (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
|
||||
: nullptr;
|
||||
|
||||
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
|
||||
|
||||
const float24 src1[4] = {
|
||||
src1_[(int)swizzle.GetSelectorSrc1(0)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(2)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||
};
|
||||
const float24 src2[4] = {
|
||||
src2_[(int)swizzle.GetSelectorSrc2(0)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(2)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(3)],
|
||||
};
|
||||
|
||||
switch (instr.opcode) {
|
||||
case Instruction::OpCode::ADD:
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i] + src2[i];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case Instruction::OpCode::MUL:
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i] * src2[i];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case Instruction::OpCode::DP3:
|
||||
case Instruction::OpCode::DP4:
|
||||
{
|
||||
float24 dot = float24::FromFloat32(0.f);
|
||||
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
|
||||
for (int i = 0; i < num_components; ++i)
|
||||
dot = dot + src1[i] * src2[i];
|
||||
|
||||
for (int i = 0; i < num_components; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = dot;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Reciprocal
|
||||
case Instruction::OpCode::RCP:
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
// TODO: Be stable against division by zero!
|
||||
// TODO: I think this might be wrong... we should only use one component here
|
||||
dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Reciprocal Square Root
|
||||
case Instruction::OpCode::RSQ:
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
// TODO: Be stable against division by zero!
|
||||
// TODO: I think this might be wrong... we should only use one component here
|
||||
dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case Instruction::OpCode::MOV:
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Instruction::OpCode::RET:
|
||||
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
|
||||
exit_loop = true;
|
||||
} else {
|
||||
state.program_counter = &shader_memory[*state.call_stack_pointer--];
|
||||
*state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case Instruction::OpCode::CALL:
|
||||
increment_pc = false;
|
||||
|
||||
_dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
|
||||
|
||||
*++state.call_stack_pointer = state.program_counter - shader_memory;
|
||||
// TODO: Does this offset refer to the beginning of shader memory?
|
||||
state.program_counter = &shader_memory[instr.flow_control.offset_words];
|
||||
break;
|
||||
|
||||
case Instruction::OpCode::FLS:
|
||||
// TODO: Do whatever needs to be done here?
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex);
|
||||
break;
|
||||
}
|
||||
|
||||
if (increment_pc)
|
||||
++state.program_counter;
|
||||
|
||||
if (exit_loop)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||
{
|
||||
VertexShaderState state;
|
||||
|
||||
const u32* main = &shader_memory[registers.vs_main_offset];
|
||||
state.program_counter = (u32*)main;
|
||||
|
||||
// Setup input register table
|
||||
const auto& attribute_register_map = registers.vs_input_register_map;
|
||||
float24 dummy_register;
|
||||
std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register);
|
||||
if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
|
||||
if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
|
||||
if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
|
||||
if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
|
||||
if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
|
||||
if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
|
||||
if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
|
||||
if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
|
||||
if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
|
||||
if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
|
||||
if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
|
||||
if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
|
||||
if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
|
||||
if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
|
||||
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
|
||||
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
|
||||
|
||||
// Setup output register table
|
||||
OutputVertex ret;
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
const auto& output_register_map = registers.vs_output_attributes[i];
|
||||
|
||||
u32 semantics[4] = {
|
||||
output_register_map.map_x, output_register_map.map_y,
|
||||
output_register_map.map_z, output_register_map.map_w
|
||||
};
|
||||
|
||||
for (int comp = 0; comp < 4; ++comp)
|
||||
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
|
||||
}
|
||||
|
||||
state.status_registers[0] = false;
|
||||
state.status_registers[1] = false;
|
||||
std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]),
|
||||
VertexShaderState::INVALID_ADDRESS);
|
||||
state.call_stack_pointer = &state.call_stack[0];
|
||||
|
||||
ProcessShaderCode(state);
|
||||
|
||||
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
211
src/video_core/vertex_shader.h
Normal file
211
src/video_core/vertex_shader.h
Normal file
|
@ -0,0 +1,211 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include <common/common_types.h>
|
||||
|
||||
#include "math.h"
|
||||
#include "pica.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace VertexShader {
|
||||
|
||||
struct InputVertex {
|
||||
Math::Vec4<float24> attr[16];
|
||||
};
|
||||
|
||||
struct OutputVertex {
|
||||
OutputVertex() = default;
|
||||
|
||||
// VS output attributes
|
||||
Math::Vec4<float24> pos;
|
||||
Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
|
||||
Math::Vec4<float24> color;
|
||||
Math::Vec2<float24> tc0;
|
||||
float24 tc0_v;
|
||||
|
||||
// Padding for optimal alignment
|
||||
float24 pad[14];
|
||||
|
||||
// Attributes used to store intermediate results
|
||||
|
||||
// position after perspective divide
|
||||
Math::Vec3<float24> screenpos;
|
||||
|
||||
// Linear interpolation
|
||||
// factor: 0=this, 1=vtx
|
||||
void Lerp(float24 factor, const OutputVertex& vtx) {
|
||||
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
|
||||
|
||||
// TODO: Should perform perspective correct interpolation here...
|
||||
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
|
||||
|
||||
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
|
||||
|
||||
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
|
||||
}
|
||||
|
||||
// Linear interpolation
|
||||
// factor: 0=v0, 1=v1
|
||||
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
|
||||
OutputVertex ret = v0;
|
||||
ret.Lerp(factor, v1);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
||||
|
||||
union Instruction {
|
||||
enum class OpCode : u32 {
|
||||
ADD = 0x0,
|
||||
DP3 = 0x1,
|
||||
DP4 = 0x2,
|
||||
|
||||
MUL = 0x8,
|
||||
|
||||
MAX = 0xC,
|
||||
MIN = 0xD,
|
||||
RCP = 0xE,
|
||||
RSQ = 0xF,
|
||||
|
||||
MOV = 0x13,
|
||||
|
||||
RET = 0x21,
|
||||
FLS = 0x22, // Flush
|
||||
CALL = 0x24,
|
||||
};
|
||||
|
||||
std::string GetOpCodeName() const {
|
||||
std::map<OpCode, std::string> map = {
|
||||
{ OpCode::ADD, "ADD" },
|
||||
{ OpCode::DP3, "DP3" },
|
||||
{ OpCode::DP4, "DP4" },
|
||||
{ OpCode::MUL, "MUL" },
|
||||
{ OpCode::MAX, "MAX" },
|
||||
{ OpCode::MIN, "MIN" },
|
||||
{ OpCode::RCP, "RCP" },
|
||||
{ OpCode::RSQ, "RSQ" },
|
||||
{ OpCode::MOV, "MOV" },
|
||||
{ OpCode::RET, "RET" },
|
||||
{ OpCode::FLS, "FLS" },
|
||||
};
|
||||
auto it = map.find(opcode);
|
||||
if (it == map.end())
|
||||
return "UNK";
|
||||
else
|
||||
return it->second;
|
||||
}
|
||||
|
||||
u32 hex;
|
||||
|
||||
BitField<0x1a, 0x6, OpCode> opcode;
|
||||
|
||||
// General notes:
|
||||
//
|
||||
// When two input registers are used, one of them uses a 5-bit index while the other
|
||||
// one uses a 7-bit index. This is because at most one floating point uniform may be used
|
||||
// as an input.
|
||||
|
||||
|
||||
// Format used e.g. by arithmetic instructions and comparisons
|
||||
// "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
|
||||
// while "dest" addresses individual floats.
|
||||
union {
|
||||
BitField<0x00, 0x5, u32> operand_desc_id;
|
||||
BitField<0x07, 0x5, u32> src2;
|
||||
BitField<0x0c, 0x7, u32> src1;
|
||||
BitField<0x13, 0x7, u32> dest;
|
||||
} common;
|
||||
|
||||
// Format used for flow control instructions ("if")
|
||||
union {
|
||||
BitField<0x00, 0x8, u32> num_instructions;
|
||||
BitField<0x0a, 0xc, u32> offset_words;
|
||||
} flow_control;
|
||||
};
|
||||
|
||||
union SwizzlePattern {
|
||||
u32 hex;
|
||||
|
||||
enum class Selector : u32 {
|
||||
x = 0,
|
||||
y = 1,
|
||||
z = 2,
|
||||
w = 3
|
||||
};
|
||||
|
||||
Selector GetSelectorSrc1(int comp) const {
|
||||
Selector selectors[] = {
|
||||
src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
|
||||
};
|
||||
return selectors[comp];
|
||||
}
|
||||
|
||||
Selector GetSelectorSrc2(int comp) const {
|
||||
Selector selectors[] = {
|
||||
src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
|
||||
};
|
||||
return selectors[comp];
|
||||
}
|
||||
|
||||
bool DestComponentEnabled(int i) const {
|
||||
return (dest_mask & (0x8 >> i));
|
||||
}
|
||||
|
||||
std::string SelectorToString(bool src2) const {
|
||||
std::map<Selector, std::string> map = {
|
||||
{ Selector::x, "x" },
|
||||
{ Selector::y, "y" },
|
||||
{ Selector::z, "z" },
|
||||
{ Selector::w, "w" }
|
||||
};
|
||||
std::string ret;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string DestMaskToString() const {
|
||||
std::string ret;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!DestComponentEnabled(i))
|
||||
ret += "_";
|
||||
else
|
||||
ret += "xyzw"[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
|
||||
BitField< 0, 4, u32> dest_mask;
|
||||
|
||||
BitField< 5, 2, Selector> src1_selector_3;
|
||||
BitField< 7, 2, Selector> src1_selector_2;
|
||||
BitField< 9, 2, Selector> src1_selector_1;
|
||||
BitField<11, 2, Selector> src1_selector_0;
|
||||
|
||||
BitField<14, 2, Selector> src2_selector_3;
|
||||
BitField<16, 2, Selector> src2_selector_2;
|
||||
BitField<18, 2, Selector> src2_selector_1;
|
||||
BitField<20, 2, Selector> src2_selector_0;
|
||||
|
||||
BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
|
||||
};
|
||||
|
||||
void SubmitShaderMemoryChange(u32 addr, u32 value);
|
||||
void SubmitSwizzleDataChange(u32 addr, u32 value);
|
||||
|
||||
OutputVertex RunShader(const InputVertex& input, int num_attributes);
|
||||
|
||||
Math::Vec4<float24>& GetFloatUniform(u32 index);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
||||
|
|
@ -20,14 +20,25 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
|
||||
<ClCompile Include="clipper.cpp" />
|
||||
<ClCompile Include="command_processor.cpp" />
|
||||
<ClCompile Include="primitive_assembly.cpp" />
|
||||
<ClCompile Include="rasterizer.cpp" />
|
||||
<ClCompile Include="utils.cpp" />
|
||||
<ClCompile Include="vertex_shader.cpp" />
|
||||
<ClCompile Include="video_core.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="clipper.h" />
|
||||
<ClInclude Include="command_processor.h" />
|
||||
<ClInclude Include="gpu_debugger.h" />
|
||||
<ClInclude Include="math.h" />
|
||||
<ClInclude Include="pica.h" />
|
||||
<ClInclude Include="primitive_assembly.h" />
|
||||
<ClInclude Include="rasterizer.h" />
|
||||
<ClInclude Include="renderer_base.h" />
|
||||
<ClInclude Include="utils.h" />
|
||||
<ClInclude Include="vertex_shader.h" />
|
||||
<ClInclude Include="video_core.h" />
|
||||
<ClInclude Include="renderer_opengl\renderer_opengl.h" />
|
||||
</ItemGroup>
|
||||
|
|
|
@ -9,17 +9,28 @@
|
|||
<ClCompile Include="renderer_opengl\renderer_opengl.cpp">
|
||||
<Filter>renderer_opengl</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="clipper.cpp" />
|
||||
<ClCompile Include="command_processor.cpp" />
|
||||
<ClCompile Include="primitive_assembly.cpp" />
|
||||
<ClCompile Include="rasterizer.cpp" />
|
||||
<ClCompile Include="utils.cpp" />
|
||||
<ClCompile Include="vertex_shader.cpp" />
|
||||
<ClCompile Include="video_core.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="renderer_opengl\renderer_opengl.h">
|
||||
<Filter>renderer_opengl</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="clipper.h" />
|
||||
<ClInclude Include="command_processor.h" />
|
||||
<ClInclude Include="gpu_debugger.h" />
|
||||
<ClInclude Include="math.h" />
|
||||
<ClInclude Include="pica.h" />
|
||||
<ClInclude Include="primitive_assembly.h" />
|
||||
<ClInclude Include="rasterizer.h" />
|
||||
<ClInclude Include="renderer_base.h" />
|
||||
<ClInclude Include="utils.h" />
|
||||
<ClInclude Include="vertex_shader.h" />
|
||||
<ClInclude Include="video_core.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
Loading…
Reference in a new issue