From e832bbe554e174694cb43d4fe86f31af283a10da Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 25 Jul 2014 11:22:40 +0200 Subject: [PATCH 01/18] GSP: Add a helper function for convenience. --- src/core/hle/service/gsp.cpp | 43 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 635f50a53..f793b592c 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -42,33 +42,38 @@ static inline InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id) { sizeof(InterruptRelayQueue) * thread_id); } +void WriteHWRegs(u32 base_address, u32 size_in_bytes, const u32* data) { + // TODO: Return proper error codes + if (base_address + size_in_bytes >= 0x420000) { + ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)", + base_address, size_in_bytes); + return; + } + + // size should be word-aligned + if ((size_in_bytes % 4) != 0) { + ERROR_LOG(GPU, "Invalid size 0x%08x", size_in_bytes); + return; + } + + while (size_in_bytes > 0) { + GPU::Write(base_address + 0x1EB00000, *data); + + size_in_bytes -= 4; + ++data; + base_address += 4; + } +} + /// Write a GSP GPU hardware register void WriteHWRegs(Service::Interface* self) { u32* cmd_buff = Service::GetCommandBuffer(); u32 reg_addr = cmd_buff[1]; u32 size = cmd_buff[2]; - // TODO: Return proper error codes - if (reg_addr + size >= 0x420000) { - ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size); - return; - } - - // size should be word-aligned - if ((size % 4) != 0) { - ERROR_LOG(GPU, "Invalid size 0x%08x", size); - return; - } - u32* src = (u32*)Memory::GetPointer(cmd_buff[0x4]); - while (size > 0) { - GPU::Write(reg_addr + 0x1EB00000, *src); - - size -= 4; - ++src; - reg_addr += 4; - } + WriteHWRegs(reg_addr, size, src); } /// Read a GSP GPU hardware register From 590c206ac8836f0e4544d2cb84191d77d07b9f36 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 25 Jul 2014 11:23:28 +0200 Subject: [PATCH 02/18] GSP: Implement SetBufferSwap. --- src/core/hle/service/gsp.cpp | 36 +++++++++++++++++++++++++++++++++++- src/core/hle/service/gsp.h | 12 ++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index f793b592c..417c01b83 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -105,6 +105,40 @@ void ReadHWRegs(Service::Interface* self) { } } +void SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) { + u32 base_address = 0x400000; + if (info.active_fb == 0) { + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_left1), 4, &info.address_left); + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_right1), 4, &info.address_right); + } else { + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_left2), 4, &info.address_left); + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_right2), 4, &info.address_right); + } + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].stride), 4, &info.stride); + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].color_format), 4, &info.format); + WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].active_fb), 4, &info.shown_fb); +} + +/** + * GSP_GPU::SetBufferSwap service function + * + * Updates GPU display framebuffer configuration using the specified parameters. + * + * Inputs: + * 1 : Screen ID (0 = top screen, 1 = bottom screen) + * 2-7 : FrameBufferInfo structure + * Outputs: + * 1: Result code + */ +void SetBufferSwap(Service::Interface* self) { + u32* cmd_buff = Service::GetCommandBuffer(); + u32 screen_id = cmd_buff[1]; + FrameBufferInfo* fb_info = (FrameBufferInfo*)&cmd_buff[2]; + SetBufferSwap(screen_id, *fb_info); + + cmd_buff[1] = 0; // No error +} + /** * GSP_GPU::RegisterInterruptRelayQueue service function * Inputs: @@ -283,7 +317,7 @@ const Interface::FunctionInfo FunctionTable[] = { {0x00020084, nullptr, "WriteHWRegsWithMask"}, {0x00030082, nullptr, "WriteHWRegRepeat"}, {0x00040080, ReadHWRegs, "ReadHWRegs"}, - {0x00050200, nullptr, "SetBufferSwap"}, + {0x00050200, SetBufferSwap, "SetBufferSwap"}, {0x00060082, nullptr, "SetCommandList"}, {0x000700C2, nullptr, "RequestDma"}, {0x00080082, nullptr, "FlushDataCache"}, diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index b25dbb7bc..884cfd65a 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -64,6 +64,18 @@ struct InterruptRelayQueue { static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size"); +struct FrameBufferInfo { + BitField<0, 1, u32> active_fb; // 0 = first, 1 = second + + u32 address_left; + u32 address_right; + u32 stride; // maps to 0x1EF00X90 ? + u32 format; // maps to 0x1EF00X70 ? + u32 shown_fb; // maps to 0x1EF00X78 ? + u32 unknown; +}; +static_assert(sizeof(FrameBufferInfo) == 0x1c, "Struct has incorrect size"); + /// GSP command struct Command { BitField<0, 8, CommandId> id; From 14b24a75b37545faf49584864cb85555f22a0154 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 19 Aug 2014 20:57:43 +0200 Subject: [PATCH 03/18] GSP: Update framebuffer information when necessary. --- src/core/hle/service/gsp.cpp | 25 +++++++++++++++++++++++-- src/core/hle/service/gsp.h | 18 ++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 417c01b83..027ba5a37 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -36,6 +36,17 @@ static inline u8* GetCommandBuffer(u32 thread_id) { 0x800 + (thread_id * sizeof(CommandBuffer))); } +static inline FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index) { + if (0 == g_shared_memory) + return nullptr; + + _dbg_assert_msg_(GSP, screen_index < 2, "Invalid screen index"); + + // For each thread there are two FrameBufferUpdate fields + u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate); + return (FrameBufferUpdate*)Kernel::GetSharedMemoryPointer(g_shared_memory, offset); +} + /// Gets a pointer to the interrupt relay queue for a given thread index static inline InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id) { return (InterruptRelayQueue*)Kernel::GetSharedMemoryPointer(g_shared_memory, @@ -166,6 +177,7 @@ void RegisterInterruptRelayQueue(Service::Interface* self) { /** * Signals that the specified interrupt type has occurred to userland code * @param interrupt_id ID of interrupt that is being signalled + * @todo This should probably take a thread_id parameter and only signal this thread? */ void SignalInterrupt(InterruptId interrupt_id) { if (0 == g_interrupt_event) { @@ -191,7 +203,7 @@ void SignalInterrupt(InterruptId interrupt_id) { } /// Executes the next GSP command -void ExecuteCommand(const Command& command) { +void ExecuteCommand(const Command& command, u32 thread_id) { // Utility function to convert register ID to address auto WriteGPURegister = [](u32 id, u32 data) { GPU::Write(0x1EF00000 + 4 * id, data); @@ -262,6 +274,15 @@ void ExecuteCommand(const Command& command) { SignalInterrupt(InterruptId::PPF); SignalInterrupt(InterruptId::P3D); SignalInterrupt(InterruptId::DMA); + + // Update framebuffer information if requested + for (int screen_id = 0; screen_id < 2; ++screen_id) { + FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); + if (info->is_dirty) + SetBufferSwap(screen_id, info->framebuffer_info[info->index]); + + info->is_dirty = false; + } break; } @@ -304,7 +325,7 @@ void TriggerCmdReqQueue(Service::Interface* self) { g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]); // Decode and execute command - ExecuteCommand(command_buffer->commands[i]); + ExecuteCommand(command_buffer->commands[i], thread_id); // Indicates that command has completed command_buffer->number_commands = command_buffer->number_commands - 1; diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index 884cfd65a..a09d59dbb 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "common/bit_field.h" #include "core/hle/service/service.h" @@ -76,6 +78,22 @@ struct FrameBufferInfo { }; static_assert(sizeof(FrameBufferInfo) == 0x1c, "Struct has incorrect size"); +struct FrameBufferUpdate { + BitField<0, 1, u8> index; // Index used for GSP::SetBufferSwap + BitField<0, 1, u8> is_dirty; // true if GSP should update GPU framebuffer registers + u16 pad1; + + FrameBufferInfo framebuffer_info[2]; + + u32 pad2; +}; +static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size"); +// TODO: Not sure if this padding is correct. +// Chances are the second block is stored at offset 0x24 rather than 0x20. +#ifndef _MSC_VER +static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20, "FrameBufferInfo element has incorrect alignment"); +#endif + /// GSP command struct Command { BitField<0, 8, CommandId> id; From 6ea003c7b5ec97d0a754197654cdf6e7fccdba24 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 17 Aug 2014 14:06:58 +0200 Subject: [PATCH 04/18] Pica: Add debug utility functions for dumping geometry data. --- src/video_core/CMakeLists.txt | 2 + src/video_core/command_processor.cpp | 8 +++ src/video_core/debug_utils/debug_utils.cpp | 60 ++++++++++++++++++++++ src/video_core/debug_utils/debug_utils.h | 40 +++++++++++++++ src/video_core/video_core.vcxproj | 2 + src/video_core/video_core.vcxproj.filters | 15 ++++-- 6 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 src/video_core/debug_utils/debug_utils.cpp create mode 100644 src/video_core/debug_utils/debug_utils.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8e7b93acb..71a1b5ecc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,6 +5,7 @@ set(SRCS clipper.cpp utils.cpp vertex_shader.cpp video_core.cpp + debug_utils/debug_utils.cpp renderer_opengl/renderer_opengl.cpp) set(HEADERS clipper.h @@ -17,6 +18,7 @@ set(HEADERS clipper.h renderer_base.h vertex_shader.h video_core.h + debug_utils/debug_utils.h renderer_opengl/renderer_opengl.h) add_library(video_core STATIC ${SRCS} ${HEADERS}) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 020a4da3f..2027e58d9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -8,6 +8,7 @@ #include "primitive_assembly.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" namespace Pica { @@ -68,6 +69,8 @@ static inline void WritePicaReg(u32 id, u32 value) { const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = (bool)index_info.format; + DebugUtils::GeometryDumper geometry_dumper; + for (int index = 0; index < registers.num_vertices; ++index) { int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; @@ -95,6 +98,10 @@ static inline void WritePicaReg(u32 id, u32 value) { input.attr[i][comp].ToFloat32()); } } + + // NOTE: For now, we simply assume that the first input attribute corresponds to the position. + geometry_dumper.AddVertex({input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()}, registers.triangle_topology); + VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); if (is_indexed) { @@ -103,6 +110,7 @@ static inline void WritePicaReg(u32 id, u32 value) { PrimitiveAssembly::SubmitVertex(output); } + geometry_dumper.Dump(); break; } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp new file mode 100644 index 000000000..ac895ec3a --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -0,0 +1,60 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include +#include + +#include "video_core/pica.h" + +#include "debug_utils.h" + +namespace Pica { + +namespace DebugUtils { + +void GeometryDumper::AddVertex(std::array pos, TriangleTopology topology) { + vertices.push_back({pos[0], pos[1], pos[2]}); + + int num_vertices = vertices.size(); + + switch (topology) { + case TriangleTopology::List: + case TriangleTopology::ListIndexed: + if (0 == (num_vertices % 3)) + faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); + break; + + default: + ERROR_LOG(GPU, "Unknown triangle topology %x", (int)topology); + exit(0); + break; + } +} + +void GeometryDumper::Dump() { + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + + static int index = 0; + std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj"; + + std::ofstream file(filename); + + for (const auto& vertex : vertices) { + file << "v " << vertex.pos[0] + << " " << vertex.pos[1] + << " " << vertex.pos[2] << std::endl; + } + + for (const Face& face : faces) { + file << "f " << 1+face.index[0] + << " " << 1+face.index[1] + << " " << 1+face.index[2] << std::endl; + } +} + +} // namespace + +} // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h new file mode 100644 index 000000000..9b4dce539 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.h @@ -0,0 +1,40 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/pica.h" + +namespace Pica { + +namespace DebugUtils { + +using TriangleTopology = Regs::TriangleTopology; + +// Simple utility class for dumping geometry data to an OBJ file +class GeometryDumper { +public: + void AddVertex(std::array pos, TriangleTopology topology); + + void Dump(); + +private: + struct Vertex { + std::array pos; + }; + + struct Face { + int index[3]; + }; + + std::vector vertices; + std::vector faces; +}; + +} // namespace + +} // namespace diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj index 48d77cdc4..4e129fbe7 100644 --- a/src/video_core/video_core.vcxproj +++ b/src/video_core/video_core.vcxproj @@ -19,6 +19,7 @@ + @@ -40,6 +41,7 @@ + diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters index 31af4f1df..90541aca0 100644 --- a/src/video_core/video_core.vcxproj.filters +++ b/src/video_core/video_core.vcxproj.filters @@ -4,6 +4,9 @@ {e0245557-dbd4-423e-9399-513d5e99f1e4} + + {0ac498e6-bbd8-46e3-9d5f-e816546ab90e} + @@ -16,11 +19,11 @@ + + debug_utils + - - renderer_opengl - @@ -32,8 +35,12 @@ + + + debug_utils + - + \ No newline at end of file From f37e39deb9abe88b4874ebc2889ed52e02ed9c13 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 14:30:38 +0200 Subject: [PATCH 05/18] Pica: Add debug utilities for dumping shaders. --- src/video_core/debug_utils/debug_utils.cpp | 205 +++++++++++++++++++++ src/video_core/debug_utils/debug_utils.h | 3 + src/video_core/pica.h | 2 +- src/video_core/vertex_shader.cpp | 18 ++ 4 files changed, 227 insertions(+), 1 deletion(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index ac895ec3a..f41249eac 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include #include #include @@ -55,6 +56,210 @@ void GeometryDumper::Dump() { } } +#pragma pack(1) +struct DVLBHeader { + enum : u32 { + MAGIC_WORD = 0x424C5644, // "DVLB" + }; + + u32 magic_word; + u32 num_programs; +// u32 dvle_offset_table[]; +}; +static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); + +struct DVLPHeader { + enum : u32 { + MAGIC_WORD = 0x504C5644, // "DVLP" + }; + + u32 magic_word; + u32 version; + u32 binary_offset; // relative to DVLP start + u32 binary_size_words; + u32 swizzle_patterns_offset; + u32 swizzle_patterns_num_entries; + u32 unk2; +}; +static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); + +struct DVLEHeader { + enum : u32 { + MAGIC_WORD = 0x454c5644, // "DVLE" + }; + + enum class ShaderType : u8 { + VERTEX = 0, + GEOMETRY = 1, + }; + + u32 magic_word; + u16 pad1; + ShaderType type; + u8 pad2; + u32 main_offset_words; // offset within binary blob + u32 endmain_offset_words; + u32 pad3; + u32 pad4; + u32 constant_table_offset; + u32 constant_table_size; // number of entries + u32 label_table_offset; + u32 label_table_size; + u32 output_register_table_offset; + u32 output_register_table_size; + u32 uniform_table_offset; + u32 uniform_table_size; + u32 symbol_table_offset; + u32 symbol_table_size; + +}; +static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); +#pragma pack() + +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes) +{ + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + + struct StuffToWrite { + u8* pointer; + u32 size; + }; + std::vector writing_queue; + u32 write_offset = 0; + + auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) { + writing_queue.push_back({pointer, size}); + u32 old_write_offset = write_offset; + write_offset += size; + return old_write_offset; + }; + + // First off, try to translate Pica state (one enum for output attribute type and component) + // into shbin format (separate type and component mask). + union OutputRegisterInfo { + enum Type : u64 { + POSITION = 0, + COLOR = 2, + TEXCOORD0 = 3, + TEXCOORD1 = 5, + TEXCOORD2 = 6, + }; + + BitField< 0, 64, u64> hex; + + BitField< 0, 16, Type> type; + BitField<16, 16, u64> id; + BitField<32, 4, u64> component_mask; + }; + + // This is put into a try-catch block to make sure we notice unknown configurations. + std::vector output_info_table; + for (int i = 0; i < 7; ++i) { + using OutputAttributes = Pica::Regs::VSOutputAttributes; + + // TODO: It's still unclear how the attribute components map to the register! + // Once we know that, this code probably will not make much sense anymore. + std::map > map = { + { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, + { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, + { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, + { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, + { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, + { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, + { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, + { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, + { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, + { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, + { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, + { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, + { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, + { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } + }; + + for (const auto& semantic : std::vector{ + output_attributes[i].map_x, + output_attributes[i].map_y, + output_attributes[i].map_z, + output_attributes[i].map_w }) { + if (semantic == OutputAttributes::INVALID) + continue; + + try { + OutputRegisterInfo::Type type = map.at(semantic).first; + u32 component_mask = map.at(semantic).second; + + auto it = std::find_if(output_info_table.begin(), output_info_table.end(), + [&i, &type](const OutputRegisterInfo& info) { + return info.id == i && info.type == type; + } + ); + + if (it == output_info_table.end()) { + output_info_table.push_back({}); + output_info_table.back().type = type; + output_info_table.back().component_mask = component_mask; + output_info_table.back().id = i; + } else { + it->component_mask = it->component_mask | component_mask; + } + } catch (const std::out_of_range& oor) { + _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping"); + ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", + (int)output_attributes[i].map_x.Value(), + (int)output_attributes[i].map_y.Value(), + (int)output_attributes[i].map_z.Value(), + (int)output_attributes[i].map_w.Value()); + } + } + } + + + struct { + DVLBHeader header; + u32 dvle_offset; + } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE + + DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; + DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; + + QueueForWriting((u8*)&dvlb, sizeof(dvlb)); + u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp)); + dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle)); + + // TODO: Reduce the amount of binary code written to relevant portions + dvlp.binary_offset = write_offset - dvlp_offset; + dvlp.binary_size_words = binary_size; + QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); + + dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; + dvlp.swizzle_patterns_num_entries = swizzle_size; + u32 dummy = 0; + for (int i = 0; i < swizzle_size; ++i) { + QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); + QueueForWriting((u8*)&dummy, sizeof(dummy)); + } + + dvle.main_offset_words = main_offset; + dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; + dvle.output_register_table_size = output_info_table.size(); + QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); + + // TODO: Create a label table for "main" + + + // Write data to file + static int dump_index = 0; + std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); + std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); + + for (auto& chunk : writing_queue) { + file.write((char*)chunk.pointer, chunk.size); + } +} + } // namespace } // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 9b4dce539..bd7a0a89b 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -35,6 +35,9 @@ private: std::vector faces; }; +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes); + } // namespace } // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 640830144..fe886c16f 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -57,7 +57,7 @@ struct Regs { INSERT_PADDING_WORDS(0x1); - union { + union VSOutputAttributes { // Maps components of output vertex attributes to semantics enum Semantic : u32 { diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96a..8df14b51f 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -4,6 +4,7 @@ #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" #include #include @@ -50,6 +51,11 @@ struct VertexShaderState { }; u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + + struct { + u32 max_offset; // maximum program counter ever reached + u32 max_opdesc_id; // maximum swizzle pattern index ever used + } debug; }; static void ProcessShaderCode(VertexShaderState& state) { @@ -57,6 +63,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x @@ -88,6 +95,7 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode) { case Instruction::OpCode::ADD: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -100,6 +108,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -113,6 +122,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -130,6 +140,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -145,6 +156,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -159,6 +171,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -212,6 +225,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) const u32* main = &shader_memory[registers.vs_main_offset]; state.program_counter = (u32*)main; + state.debug.max_offset = 0; + state.debug.max_opdesc_id = 0; // Setup input register table const auto& attribute_register_map = registers.vs_input_register_map; @@ -255,6 +270,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); + DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + state.debug.max_opdesc_id, registers.vs_main_offset, + registers.vs_output_attributes); DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), From 0465adf206255fc114130cc7fcca1e295bcffca2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 23:23:55 +0200 Subject: [PATCH 06/18] Pica/CommandProcessor: Implement parameter masking. --- src/video_core/command_processor.cpp | 18 +++++++++++++----- src/video_core/command_processor.h | 13 ++++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2027e58d9..f7a412bc1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -24,9 +24,14 @@ static u32 uniform_write_buffer[4]; static u32 vs_binary_write_offset = 0; static u32 vs_swizzle_write_offset = 0; -static inline void WritePicaReg(u32 id, u32 value) { +static inline void WritePicaReg(u32 id, u32 value, u32 mask) { + + if (id >= registers.NumIds()) + return; + + // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value u32 old_value = registers[id]; - registers[id] = value; + registers[id] = (old_value & ~mask) | (value & mask); switch(id) { // It seems like these trigger vertex rendering @@ -215,14 +220,17 @@ static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { u32* read_pointer = (u32*)first_command_word; - // TODO: Take parameter mask into consideration! + const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | + ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | + ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | + ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); - WritePicaReg(header.cmd_id, *read_pointer); + WritePicaReg(header.cmd_id, *read_pointer, write_mask); read_pointer += 2; for (int i = 1; i < 1+header.extra_data_length; ++i) { u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); - WritePicaReg(cmd, *read_pointer); + WritePicaReg(cmd, *read_pointer, write_mask); ++read_pointer; } diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index 6b6241a25..955f9daec 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -17,11 +17,22 @@ union CommandHeader { u32 hex; BitField< 0, 16, u32> cmd_id; + + // parameter_mask: + // Mask applied to the input value to make it possible to update + // parts of a register without overwriting its other fields. + // first bit: 0x000000FF + // second bit: 0x0000FF00 + // third bit: 0x00FF0000 + // fourth bit: 0xFF000000 BitField<16, 4, u32> parameter_mask; + BitField<20, 11, u32> extra_data_length; + BitField<31, 1, u32> group_commands; }; -static_assert(std::is_standard_layout::value == true, "CommandHeader does not use standard layout"); +static_assert(std::is_standard_layout::value == true, + "CommandHeader does not use standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); void ProcessCommandList(const u32* list, u32 size); From 26ade98411c1d76540695f15378ff7f6b5388b1a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 19:21:55 +0200 Subject: [PATCH 07/18] Pica/citra-qt: Replace command list view and command list debugging code with something more sophisticated. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 144 +++++++------------- src/citra_qt/debugger/graphics_cmdlists.hxx | 42 ++---- src/citra_qt/main.cpp | 4 +- src/core/hle/service/gsp.cpp | 5 - src/video_core/command_processor.cpp | 2 + src/video_core/debug_utils/debug_utils.cpp | 55 ++++++++ src/video_core/debug_utils/debug_utils.h | 21 +++ src/video_core/gpu_debugger.h | 63 --------- 8 files changed, 142 insertions(+), 194 deletions(-) diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index e98560a19..71dd166cd 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -2,53 +2,21 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include "graphics_cmdlists.hxx" +#include +#include +#include #include -extern GraphicsDebugger g_debugger; +#include "graphics_cmdlists.hxx" -GPUCommandListModel::GPUCommandListModel(QObject* parent) : QAbstractItemModel(parent) +GPUCommandListModel::GPUCommandListModel(QObject* parent) : QAbstractListModel(parent) { - root_item = new TreeItem(TreeItem::ROOT, 0, NULL, this); - connect(this, SIGNAL(CommandListCalled()), this, SLOT(OnCommandListCalledInternal()), Qt::UniqueConnection); -} - -QModelIndex GPUCommandListModel::index(int row, int column, const QModelIndex& parent) const -{ - TreeItem* item; - - if (!parent.isValid()) { - item = root_item; - } else { - item = (TreeItem*)parent.internalPointer(); - } - - return createIndex(row, column, item->children[row]); -} - -QModelIndex GPUCommandListModel::parent(const QModelIndex& child) const -{ - if (!child.isValid()) - return QModelIndex(); - - TreeItem* item = (TreeItem*)child.internalPointer(); - - if (item->parent == NULL) - return QModelIndex(); - - return createIndex(item->parent->index, 0, item->parent); } int GPUCommandListModel::rowCount(const QModelIndex& parent) const { - TreeItem* item; - if (!parent.isValid()) { - item = root_item; - } else { - item = (TreeItem*)parent.internalPointer(); - } - return item->children.size(); + return pica_trace.writes.size(); } int GPUCommandListModel::columnCount(const QModelIndex& parent) const @@ -61,79 +29,67 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const if (!index.isValid()) return QVariant(); - const TreeItem* item = (const TreeItem*)index.internalPointer(); + const auto& writes = pica_trace.writes; + const Pica::CommandProcessor::CommandHeader cmd{writes[index.row()].Id()}; + const u32 val{writes[index.row()].Value()}; - if (item->type == TreeItem::COMMAND_LIST) - { - const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->index].second; - u32 address = command_lists[item->index].first; - - if (role == Qt::DisplayRole && index.column() == 0) - { - return QVariant(QString("0x%1 bytes at 0x%2").arg(cmdlist.size(), 0, 16).arg(address, 8, 16, QLatin1Char('0'))); + if (role == Qt::DisplayRole) { + QString content; + if (index.column() == 0) { + content = QString::fromLatin1(Pica::Regs::GetCommandName(cmd.cmd_id).c_str()); + content.append(" "); + } else if (index.column() == 1) { + content.append(QString("%1 ").arg(cmd.hex, 8, 16, QLatin1Char('0'))); + content.append(QString("%1 ").arg(val, 8, 16, QLatin1Char('0'))); } - } - else - { - // index refers to a specific command - const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; - const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; - const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader(); - if (role == Qt::DisplayRole) { - QString content; - if (index.column() == 0) { - content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str()); - content.append(" "); - } else if (index.column() == 1) { - for (int j = 0; j < cmd.size(); ++j) - content.append(QString("%1 ").arg(cmd[j], 8, 16, QLatin1Char('0'))); - } - - return QVariant(content); - } + return QVariant(content); } return QVariant(); } -void GPUCommandListModel::OnCommandListCalled(const GraphicsDebugger::PicaCommandList& lst, bool is_new) -{ - emit CommandListCalled(); -} - - -void GPUCommandListModel::OnCommandListCalledInternal() +void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& trace) { beginResetModel(); - command_lists = GetDebugger()->GetCommandLists(); - - // delete root item and rebuild tree - delete root_item; - root_item = new TreeItem(TreeItem::ROOT, 0, NULL, this); - - for (int command_list_idx = 0; command_list_idx < command_lists.size(); ++command_list_idx) { - TreeItem* command_list_item = new TreeItem(TreeItem::COMMAND_LIST, command_list_idx, root_item, root_item); - root_item->children.push_back(command_list_item); - - const GraphicsDebugger::PicaCommandList& command_list = command_lists[command_list_idx].second; - for (int command_idx = 0; command_idx < command_list.size(); ++command_idx) { - TreeItem* command_item = new TreeItem(TreeItem::COMMAND, command_idx, command_list_item, command_list_item); - command_list_item->children.push_back(command_item); - } - } + pica_trace = trace; endResetModel(); } + GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pica Command List"), parent) { GPUCommandListModel* model = new GPUCommandListModel(this); - g_debugger.RegisterObserver(model); - QTreeView* tree_widget = new QTreeView; - tree_widget->setModel(model); - tree_widget->setFont(QFont("monospace")); - setWidget(tree_widget); + QWidget* main_widget = new QWidget; + + QTreeView* list_widget = new QTreeView; + list_widget->setModel(model); + list_widget->setFont(QFont("monospace")); + list_widget->setRootIsDecorated(false); + + QPushButton* toggle_tracing = new QPushButton(tr("Start Tracing")); + + connect(toggle_tracing, SIGNAL(clicked()), this, SLOT(OnToggleTracing())); + connect(this, SIGNAL(TracingFinished(const Pica::DebugUtils::PicaTrace&)), + model, SLOT(OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&))); + + QVBoxLayout* main_layout = new QVBoxLayout; + main_layout->addWidget(list_widget); + main_layout->addWidget(toggle_tracing); + main_widget->setLayout(main_layout); + + setWidget(main_widget); +} + +void GPUCommandListWidget::OnToggleTracing() +{ + if (!Pica::DebugUtils::IsPicaTracing()) { + Pica::DebugUtils::StartPicaTracing(); + } else { + pica_trace = Pica::DebugUtils::FinishPicaTracing(); + emit TracingFinished(*pica_trace); + } } diff --git a/src/citra_qt/debugger/graphics_cmdlists.hxx b/src/citra_qt/debugger/graphics_cmdlists.hxx index b4e6e3c8a..479ef0326 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.hxx +++ b/src/citra_qt/debugger/graphics_cmdlists.hxx @@ -4,53 +4,28 @@ #pragma once -#include +#include #include #include "video_core/gpu_debugger.h" +#include "video_core/debug_utils/debug_utils.h" -// TODO: Rename class, since it's not actually a list model anymore... -class GPUCommandListModel : public QAbstractItemModel, public GraphicsDebugger::DebuggerObserver +class GPUCommandListModel : public QAbstractListModel { Q_OBJECT public: GPUCommandListModel(QObject* parent); - QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const; - QModelIndex parent(const QModelIndex& child) const; int columnCount(const QModelIndex& parent = QModelIndex()) const; int rowCount(const QModelIndex& parent = QModelIndex()) const override; QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; -public: - void OnCommandListCalled(const GraphicsDebugger::PicaCommandList& lst, bool is_new) override; - public slots: - void OnCommandListCalledInternal(); - -signals: - void CommandListCalled(); + void OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& trace); private: - struct TreeItem : public QObject - { - enum Type { - ROOT, - COMMAND_LIST, - COMMAND - }; - - TreeItem(Type type, int index, TreeItem* item_parent, QObject* parent) : QObject(parent), type(type), index(index), parent(item_parent) {} - - Type type; - int index; - std::vector children; - TreeItem* parent; - }; - - std::vector> command_lists; - TreeItem* root_item; + Pica::DebugUtils::PicaTrace pica_trace; }; class GPUCommandListWidget : public QDockWidget @@ -60,5 +35,12 @@ class GPUCommandListWidget : public QDockWidget public: GPUCommandListWidget(QWidget* parent = 0); +public slots: + void OnToggleTracing(); + +signals: + void TracingFinished(const Pica::DebugUtils::PicaTrace&); + private: + std::unique_ptr pica_trace; }; diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 9a16cf92d..a6b87f781 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -52,11 +52,11 @@ GMainWindow::GMainWindow() graphicsWidget = new GPUCommandStreamWidget(this); addDockWidget(Qt::RightDockWidgetArea, graphicsWidget); - callstackWidget->hide(); + graphicsWidget ->hide(); graphicsCommandsWidget = new GPUCommandListWidget(this); addDockWidget(Qt::RightDockWidgetArea, graphicsCommandsWidget); - callstackWidget->hide(); + graphicsCommandsWidget->hide(); QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); debug_menu->addAction(disasmWidget->toggleViewAction()); diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 027ba5a37..46c5a8ddd 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -230,11 +230,6 @@ void ExecuteCommand(const Command& command, u32 thread_id) { // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1); - // TODO: Move this to GPU - // TODO: Not sure what units the size is measured in - g_debugger.CommandListCalled(params.address, - (u32*)Memory::GetPointer(params.address), - params.size); SignalInterrupt(InterruptId::P3D); break; } diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f7a412bc1..76fdb4e4a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -33,6 +33,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { u32 old_value = registers[id]; registers[id] = (old_value & ~mask) | (value & mask); + DebugUtils::OnPicaRegWrite(id, registers[id]); + switch(id) { // It seems like these trigger vertex rendering case PICA_REG_INDEX(trigger_draw): diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index f41249eac..1bbc0330c 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "video_core/pica.h" @@ -260,6 +261,60 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data } } +static std::unique_ptr pica_trace; +static std::mutex pica_trace_mutex; +static int is_pica_tracing = false; + +void StartPicaTracing() +{ + if (is_pica_tracing) { + ERROR_LOG(GPU, "StartPicaTracing called even though tracing already running!"); + return; + } + + pica_trace_mutex.lock(); + pica_trace = std::unique_ptr(new PicaTrace); + + is_pica_tracing = true; + pica_trace_mutex.unlock(); +} + +bool IsPicaTracing() +{ + return is_pica_tracing; +} + +void OnPicaRegWrite(u32 id, u32 value) +{ + // Double check for is_pica_tracing to avoid pointless locking overhead + if (!is_pica_tracing) + return; + + std::unique_lock lock(pica_trace_mutex); + + if (!is_pica_tracing) + return; + + pica_trace->writes.push_back({id, value}); +} + +std::unique_ptr FinishPicaTracing() +{ + if (!is_pica_tracing) { + ERROR_LOG(GPU, "FinishPicaTracing called even though tracing already running!"); + return {}; + } + + // signalize that no further tracing should be performed + is_pica_tracing = false; + + // Wait until running tracing is finished + pica_trace_mutex.lock(); + std::unique_ptr ret(std::move(pica_trace)); + pica_trace_mutex.unlock(); + return std::move(ret); +} + } // namespace } // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index bd7a0a89b..023500066 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "video_core/pica.h" @@ -38,6 +39,26 @@ private: void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, u32 main_offset, const Regs::VSOutputAttributes* output_attributes); + +// Utility class to log Pica commands. +struct PicaTrace { + struct Write : public std::pair { + Write(u32 id, u32 value) : std::pair(id, value) {} + + u32& Id() { return first; } + const u32& Id() const { return first; } + + u32& Value() { return second; } + const u32& Value() const { return second; } + }; + std::vector writes; +}; + +void StartPicaTracing(); +bool IsPicaTracing(); +void OnPicaRegWrite(u32 id, u32 value); +std::unique_ptr FinishPicaTracing(); + } // namespace } // namespace diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 2ba873457..5a81fcfcb 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -18,19 +18,6 @@ class GraphicsDebugger { public: - // A few utility structs used to expose data - // A vector of commands represented by their raw byte sequence - struct PicaCommand : public std::vector - { - const Pica::CommandProcessor::CommandHeader& GetHeader() const - { - const u32& val = at(1); - return *(Pica::CommandProcessor::CommandHeader*)&val; - } - }; - - typedef std::vector PicaCommandList; - // Base class for all objects which need to be notified about GPU events class DebuggerObserver { @@ -55,16 +42,6 @@ public: ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); } - /** - * @param lst command list which triggered this call - * @param is_new true if the command list was called for the first time - * @todo figure out how to make sure called functions don't keep references around beyond their life time - */ - virtual void OnCommandListCalled(const PicaCommandList& lst, bool is_new) - { - ERROR_LOG(GSP, "Command list called: %d", (int)is_new); - } - protected: const GraphicsDebugger* GetDebugger() const { @@ -93,49 +70,12 @@ public: } ); } - void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) - { - if (observers.empty()) - return; - - PicaCommandList cmdlist; - for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) - { - const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]); - - cmdlist.push_back(PicaCommand()); - auto& cmd = cmdlist.back(); - - size_t size = 2 + header.extra_data_length; - size = (size + 1) / 2 * 2; // align to 8 bytes - cmd.reserve(size); - std::copy(parse_pointer, parse_pointer + size, std::back_inserter(cmd)); - - parse_pointer += size; - } - - auto obj = std::pair(address, cmdlist); - auto it = std::find(command_lists.begin(), command_lists.end(), obj); - bool is_new = (it == command_lists.end()); - if (is_new) - command_lists.push_back(obj); - - ForEachObserver([&](DebuggerObserver* observer) { - observer->OnCommandListCalled(obj.second, is_new); - } ); - } - const GSP_GPU::Command& ReadGXCommandHistory(int index) const { // TODO: Is this thread-safe? return gx_command_history[index]; } - const std::vector>& GetCommandLists() const - { - return command_lists; - } - void RegisterObserver(DebuggerObserver* observer) { // TODO: Check for duplicates @@ -158,7 +98,4 @@ private: std::vector observers; std::vector gx_command_history; - - // vector of pairs of command lists and their storage address - std::vector> command_lists; }; From 62c36a4ef0a37fe83bb8f8680f928970bead545b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 23:28:55 +0200 Subject: [PATCH 08/18] Pica/VertexShader: Fix a bug in the bitfield definitions and add the "negate" field for swizzlers. --- src/video_core/vertex_shader.cpp | 28 +++++++----- src/video_core/vertex_shader.h | 78 ++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 14 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 8df14b51f..cdecbff3c 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -65,26 +65,32 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] - : &state.temporary_registers[instr.common.src2-0x10].x; - // TODO: Unsure about the limit values - float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] - : (instr.common.dest <= 0x3C) ? nullptr - : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] + const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] + : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x + : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x : nullptr; + const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] + : &state.temporary_registers[instr.common.src2.GetIndex()].x; + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + const bool negate_src1 = swizzle.negate; - const float24 src1[4] = { + float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } const float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 1b71e367b..f0a8a5b60 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -117,9 +117,78 @@ union Instruction { // while "dest" addresses individual floats. union { BitField<0x00, 0x5, u32> operand_desc_id; - BitField<0x07, 0x5, u32> src2; - BitField<0x0c, 0x7, u32> src1; - BitField<0x13, 0x7, u32> dest; + + template + struct SourceRegister : BitFieldType { + enum RegisterType { + Input, + Temporary, + FloatUniform + }; + + RegisterType GetRegisterType() const { + if (BitFieldType::Value() < 0x10) + return Input; + else if (BitFieldType::Value() < 0x20) + return Temporary; + else + return FloatUniform; + } + + int GetIndex() const { + if (GetRegisterType() == Input) + return BitFieldType::Value(); + else if (GetRegisterType() == Temporary) + return BitFieldType::Value() - 0x10; + else if (GetRegisterType() == FloatUniform) + return BitFieldType::Value() - 0x20; + } + + std::string GetRegisterName() const { + std::map type = { + { Input, "i" }, + { Temporary, "t" }, + { FloatUniform, "f" }, + }; + return type[GetRegisterType()] + std::to_string(GetIndex()); + } + }; + + SourceRegister> src2; + SourceRegister> src1; + + struct : BitField<0x15, 0x5, u32> + { + enum RegisterType { + Output, + Temporary, + Unknown + }; + RegisterType GetRegisterType() const { + if (Value() < 0x8) + return Output; + else if (Value() < 0x10) + return Unknown; + else + return Temporary; + } + int GetIndex() const { + if (GetRegisterType() == Output) + return Value(); + else if (GetRegisterType() == Temporary) + return Value() - 0x10; + else + return Value(); + } + std::string GetRegisterName() const { + std::map type = { + { Output, "o" }, + { Temporary, "t" }, + { Unknown, "u" } + }; + return type[GetRegisterType()] + std::to_string(GetIndex()); + } + } dest; } common; // Format used for flow control instructions ("if") @@ -128,6 +197,7 @@ union Instruction { BitField<0x0a, 0xc, u32> offset_words; } flow_control; }; +static_assert(std::is_standard_layout::value, "Structure is not using standard layout!"); union SwizzlePattern { u32 hex; @@ -185,6 +255,8 @@ union SwizzlePattern { // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x BitField< 0, 4, u32> dest_mask; + BitField< 4, 1, u32> negate; // negates src1 + BitField< 5, 2, Selector> src1_selector_3; BitField< 7, 2, Selector> src1_selector_2; BitField< 9, 2, Selector> src1_selector_1; From 162d641a301d87d5e25ca5d677b7f8f07f29e748 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 12 Aug 2014 20:04:28 +0200 Subject: [PATCH 09/18] Pica/Math: Improved the design of the Vec2/Vec3/Vec4 classes and simplified rasterizer code accordingly. - Swizzlers now return const objects so that things like "first_vec4.xyz() = some_vec3" now will fail to compile (ideally we should support some vector holding references to make this actually work). - The methods "InsertBeforeX/Y/Z" and "Append" have been replaced by more versions of MakeVec, which now also supports building new vectors from vectors. - Vector library now follows C++ type promotion rules (hence, the result of Vec2 with another Vec2 is now a Vec2). --- src/video_core/math.h | 196 +++++++++++++++++++-------------- src/video_core/rasterizer.cpp | 32 +++--- src/video_core/vertex_shader.h | 3 +- 3 files changed, 133 insertions(+), 98 deletions(-) diff --git a/src/video_core/math.h b/src/video_core/math.h index 7030f2cfb..ca1fb0df2 100644 --- a/src/video_core/math.h +++ b/src/video_core/math.h @@ -39,6 +39,13 @@ template class Vec2; template class Vec3; template class Vec4; +template +static inline Vec2 MakeVec(const T& x, const T& y); +template +static inline Vec3 MakeVec(const T& x, const T& y, const T& z); +template +static inline Vec4 MakeVec(const T& x, const T& y, const T& z, const T& w); + template class Vec2 { @@ -68,34 +75,34 @@ public: a[0] = x; a[1] = y; } - Vec2 operator +(const Vec2& other) const + Vec2 operator +(const Vec2& other) const { - return Vec2(x+other.x, y+other.y); + return MakeVec(x+other.x, y+other.y); } void operator += (const Vec2 &other) { x+=other.x; y+=other.y; } - Vec2 operator -(const Vec2& other) const + Vec2 operator -(const Vec2& other) const { - return Vec2(x-other.x, y-other.y); + return MakeVec(x-other.x, y-other.y); } void operator -= (const Vec2& other) { x-=other.x; y-=other.y; } - Vec2 operator -() const + Vec2 operator -() const { - return Vec2(-x,-y); + return MakeVec(-x,-y); } - Vec2 operator * (const Vec2& other) const + Vec2 operator * (const Vec2& other) const { - return Vec2(x*other.x, y*other.y); + return MakeVec(x*other.x, y*other.y); } template - Vec2 operator * (const V& f) const + Vec2 operator * (const V& f) const { - return Vec2(x*f,y*f); + return MakeVec(x*f,y*f); } template void operator *= (const V& f) @@ -103,9 +110,9 @@ public: x*=f; y*=f; } template - Vec2 operator / (const V& f) const + Vec2 operator / (const V& f) const { - return Vec2(x/f,y/f); + return MakeVec(x/f,y/f); } template void operator /= (const V& f) @@ -152,20 +159,9 @@ public: const T& t() const { return y; } // swizzlers - create a subvector of specific components - Vec2 yx() const { return Vec2(y, x); } - Vec2 vu() const { return Vec2(y, x); } - Vec2 ts() const { return Vec2(y, x); } - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec3 InsertBeforeX(const T& value) { - return Vec3(value, x, y); - } - Vec3 InsertBeforeY(const T& value) { - return Vec3(x, value, y); - } - Vec3 Append(const T& value) { - return Vec3(x, y, value); - } + const Vec2 yx() const { return Vec2(y, x); } + const Vec2 vu() const { return Vec2(y, x); } + const Vec2 ts() const { return Vec2(y, x); } }; template @@ -193,7 +189,7 @@ public: template Vec3 Cast() const { - return Vec3((T2)x, (T2)y, (T2)z); + return MakeVec((T2)x, (T2)y, (T2)z); } // Only implemented for T=int and T=float @@ -202,7 +198,7 @@ public: static Vec3 AssignToAll(const T& f) { - return Vec3(f, f, f); + return MakeVec(f, f, f); } void Write(T a[3]) @@ -210,34 +206,34 @@ public: a[0] = x; a[1] = y; a[2] = z; } - Vec3 operator +(const Vec3 &other) const + Vec3 operator +(const Vec3 &other) const { - return Vec3(x+other.x, y+other.y, z+other.z); + return MakeVec(x+other.x, y+other.y, z+other.z); } void operator += (const Vec3 &other) { x+=other.x; y+=other.y; z+=other.z; } - Vec3 operator -(const Vec3 &other) const + Vec3 operator -(const Vec3 &other) const { - return Vec3(x-other.x, y-other.y, z-other.z); + return MakeVec(x-other.x, y-other.y, z-other.z); } void operator -= (const Vec3 &other) { x-=other.x; y-=other.y; z-=other.z; } - Vec3 operator -() const + Vec3 operator -() const { - return Vec3(-x,-y,-z); + return MakeVec(-x,-y,-z); } - Vec3 operator * (const Vec3 &other) const + Vec3 operator * (const Vec3 &other) const { - return Vec3(x*other.x, y*other.y, z*other.z); + return MakeVec(x*other.x, y*other.y, z*other.z); } template - Vec3 operator * (const V& f) const + Vec3 operator * (const V& f) const { - return Vec3(x*f,y*f,z*f); + return MakeVec(x*f,y*f,z*f); } template void operator *= (const V& f) @@ -245,9 +241,9 @@ public: x*=f; y*=f; z*=f; } template - Vec3 operator / (const V& f) const + Vec3 operator / (const V& f) const { - return Vec3(x/f,y/f,z/f); + return MakeVec(x/f,y/f,z/f); } template void operator /= (const V& f) @@ -310,7 +306,7 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2 name() const { return Vec2(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2 name() const { return Vec2(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ @@ -326,20 +322,6 @@ public: DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec4 InsertBeforeX(const T& value) { - return Vec4(value, x, y, z); - } - Vec4 InsertBeforeY(const T& value) { - return Vec4(x, value, y, z); - } - Vec4 InsertBeforeZ(const T& value) { - return Vec4(x, y, value, z); - } - Vec4 Append(const T& value) { - return Vec4(x, y, z, value); - } }; template @@ -383,34 +365,34 @@ public: a[0] = x; a[1] = y; a[2] = z; a[3] = w; } - Vec4 operator +(const Vec4& other) const + Vec4 operator +(const Vec4& other) const { - return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); + return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w); } void operator += (const Vec4& other) { x+=other.x; y+=other.y; z+=other.z; w+=other.w; } - Vec4 operator -(const Vec4 &other) const + Vec4 operator -(const Vec4 &other) const { - return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); + return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w); } void operator -= (const Vec4 &other) { x-=other.x; y-=other.y; z-=other.z; w-=other.w; } - Vec4 operator -() const + Vec4 operator -() const { - return Vec4(-x,-y,-z,-w); + return MakeVec(-x,-y,-z,-w); } - Vec4 operator * (const Vec4 &other) const + Vec4 operator * (const Vec4 &other) const { - return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); + return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w); } template - Vec4 operator * (const V& f) const + Vec4 operator * (const V& f) const { - return Vec4(x*f,y*f,z*f,w*f); + return MakeVec(x*f,y*f,z*f,w*f); } template void operator *= (const V& f) @@ -418,9 +400,9 @@ public: x*=f; y*=f; z*=f; w*=f; } template - Vec4 operator / (const V& f) const + Vec4 operator / (const V& f) const { - return Vec4(x/f,y/f,z/f,w/f); + return MakeVec(x/f,y/f,z/f,w/f); } template void operator /= (const V& f) @@ -469,7 +451,7 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2 name() const { return Vec2(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2 name() const { return Vec2(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ @@ -485,7 +467,7 @@ public: #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 -#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3 name() const { return Vec3(a, b, c); } +#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3 name() const { return Vec3(a, b, c); } #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ @@ -510,69 +492,121 @@ public: template -Vec4 operator * (const V& f, const Vec4& vec) +Vec4 operator * (const V& f, const Vec4& vec) { - return Vec4(f*vec.x,f*vec.y,f*vec.z,f*vec.w); + return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w); } typedef Vec4 Vec4f; template -static inline T Dot(const Vec2& a, const Vec2& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2& a, const Vec2& b) { return a.x*b.x + a.y*b.y; } template -static inline T Dot(const Vec3& a, const Vec3& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3& a, const Vec3& b) { return a.x*b.x + a.y*b.y + a.z*b.z; } template -static inline T Dot(const Vec4& a, const Vec4& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4& a, const Vec4& b) { return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; } template -static inline Vec3 Cross(const Vec3& a, const Vec3& b) +static inline Vec3 Cross(const Vec3& a, const Vec3& b) { - return Vec3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); } // linear interpolation via float: 0.0=begin, 1.0=end template -static inline X Lerp(const X& begin, const X& end, const float t) +static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t) { return begin*(1.f-t) + end*t; } // linear interpolation via int: 0=begin, base=end template -static inline X LerpInt(const X& begin, const X& end, const int t) +static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t) { return (begin*(base-t) + end*t) / base; } // Utility vector factories template -static inline Vec2 MakeVec2(const T& x, const T& y) +static inline Vec2 MakeVec(const T& x, const T& y) { return Vec2{x, y}; } template -static inline Vec3 MakeVec3(const T& x, const T& y, const T& z) +static inline Vec3 MakeVec(const T& x, const T& y, const T& z) { return Vec3{x, y, z}; } template -static inline Vec4 MakeVec4(const T& x, const T& y, const T& z, const T& w) +static inline Vec4 MakeVec(const T& x, const T& y, const Vec2& zw) +{ + return MakeVec(x, y, zw[0], zw[1]); +} + +template +static inline Vec3 MakeVec(const Vec2& xy, const T& z) +{ + return MakeVec(xy[0], xy[1], z); +} + +template +static inline Vec3 MakeVec(const T& x, const Vec2& yz) +{ + return MakeVec(x, yz[0], yz[1]); +} + +template +static inline Vec4 MakeVec(const T& x, const T& y, const T& z, const T& w) { return Vec4{x, y, z, w}; } +template +static inline Vec4 MakeVec(const Vec2& xy, const T& z, const T& w) +{ + return MakeVec(xy[0], xy[1], z, w); +} + +template +static inline Vec4 MakeVec(const T& x, const Vec2& yz, const T& w) +{ + return MakeVec(x, yz[0], yz[1], w); +} + +// NOTE: This has priority over "Vec2> MakeVec(const Vec2& x, const Vec2& y)". +// Even if someone wanted to use an odd object like Vec2>, the compiler would error +// out soon enough due to misuse of the returned structure. +template +static inline Vec4 MakeVec(const Vec2& xy, const Vec2& zw) +{ + return MakeVec(xy[0], xy[1], zw[0], zw[1]); +} + +template +static inline Vec4 MakeVec(const Vec3& xyz, const T& w) +{ + return MakeVec(xyz[0], xyz[1], xyz[2], w); +} + +template +static inline Vec4 MakeVec(const T& x, const Vec2& yzw) +{ + return MakeVec(x, yzw[0], yzw[1], yzw[2]); +} + + } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..f418518a1 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -78,10 +78,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); - min_x = min_x & Fix12P4::IntMask(); - min_y = min_y & Fix12P4::IntMask(); - max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); - max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); + min_x &= Fix12P4::IntMask(); + min_y &= Fix12P4::IntMask(); + max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); + max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias @@ -112,10 +112,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto orient2d = [](const Math::Vec2& vtx1, const Math::Vec2& vtx2, const Math::Vec2& vtx3) { - const auto vec1 = (vtx2.Cast() - vtx1.Cast()).Append(0); - const auto vec2 = (vtx3.Cast() - vtx1.Cast()).Append(0); + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Cross(vec1, vec2).z; + return Math::Cross(vec1, vec2).z; }; int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); @@ -143,15 +143,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // // The generalization to three vertices is straightforward in baricentric coordinates. auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { - auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, - attr1 / v1.pos.w, - attr2 / v2.pos.w); - auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, - float24::FromFloat32(1.f) / v1.pos.w, - float24::FromFloat32(1.f) / v2.pos.w); - auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), - float24::FromFloat32(w1), - float24::FromFloat32(w2)); + auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, + attr1 / v1.pos.w, + attr2 / v2.pos.w); + auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, + float24::FromFloat32(1.f) / v1.pos.w, + float24::FromFloat32(1.f) / v2.pos.w); + auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), + float24::FromFloat32(w1), + float24::FromFloat32(w2)); float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index f0a8a5b60..847fdc450 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -27,7 +27,6 @@ struct OutputVertex { Math::Vec4 dummy; // quaternions (not implemented, yet) Math::Vec4 color; Math::Vec2 tc0; - float24 tc0_v; // Padding for optimal alignment float24 pad[14]; @@ -36,6 +35,7 @@ struct OutputVertex { // position after perspective divide Math::Vec3 screenpos; + float24 pad2; // Linear interpolation // factor: 0=this, 1=vtx @@ -59,6 +59,7 @@ struct OutputVertex { } }; static_assert(std::is_pod::value, "Structure is not POD"); +static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); union Instruction { enum class OpCode : u32 { From c4691b784bd7746c5845df00a82ea0909b37ec0f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 16 Aug 2014 14:06:40 +0200 Subject: [PATCH 10/18] Pica: Add support for dumping textures. --- CMakeLists.txt | 5 + src/citra/CMakeLists.txt | 2 +- src/citra_qt/CMakeLists.txt | 2 +- src/video_core/debug_utils/debug_utils.cpp | 131 +++++++++++++++++++++ src/video_core/debug_utils/debug_utils.h | 2 + src/video_core/pica.h | 45 ++++++- 6 files changed, 184 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56f5d02b0..4c7b3dd73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,11 @@ add_definitions(-Wno-attributes) add_definitions(-DSINGLETHREADED) add_definitions(${CXX_COMPILE_FLAGS}) +find_package(PNG) +if (PNG_FOUND) + add_definitions(-DHAVE_PNG) +endif () + # dependency checking list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules/") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/CMakeTests) diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt index b1ab67598..1cbe22cc0 100644 --- a/src/citra/CMakeLists.txt +++ b/src/citra/CMakeLists.txt @@ -12,7 +12,7 @@ add_executable(citra ${SRCS} ${HEADERS}) if (APPLE) target_link_libraries(citra core common video_core iconv pthread ${COREFOUNDATION_LIBRARY} ${OPENGL_LIBRARIES} ${GLEW_LIBRARY} ${GLFW_LIBRARIES}) else() - target_link_libraries(citra core common video_core GLEW pthread X11 Xxf86vm Xi Xcursor ${OPENGL_LIBRARIES} ${GLFW_LIBRARIES} rt ${X11_Xrandr_LIB} ${X11_xv86vmode_LIB}) + target_link_libraries(citra core common video_core GLEW pthread X11 Xxf86vm Xi Xcursor ${OPENGL_LIBRARIES} ${GLFW_LIBRARIES} rt ${X11_Xrandr_LIB} ${X11_xv86vmode_LIB} ${PNG_LIBRARIES}) endif() #install(TARGETS citra RUNTIME DESTINATION ${bindir}) diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index ff1fbc460..8ad6759b7 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -47,7 +47,7 @@ else() set(RT_LIBRARY rt) endif() -target_link_libraries(citra-qt core common video_core qhexedit ${ICONV_LIBRARY} ${COREFOUNDATION_LIBRARY} ${QT_LIBRARIES} ${OPENGL_LIBRARIES} ${RT_LIBRARY} ${GLEW_LIBRARY}) +target_link_libraries(citra-qt core common video_core qhexedit ${ICONV_LIBRARY} ${COREFOUNDATION_LIBRARY} ${QT_LIBRARIES} ${OPENGL_LIBRARIES} ${RT_LIBRARY} ${GLEW_LIBRARY} ${PNG_LIBRARIES}) if(USE_QT5) target_link_libraries(citra-qt Qt5::Gui Qt5::Widgets Qt5::OpenGL) endif() diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 1bbc0330c..753de41e8 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -3,10 +3,17 @@ // Refer to the license.txt file included. #include +#include #include #include #include +#ifdef HAVE_PNG +#include +#endif + +#include "common/file_util.h" + #include "video_core/pica.h" #include "debug_utils.h" @@ -315,6 +322,130 @@ std::unique_ptr FinishPicaTracing() return std::move(ret); } +void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + +#ifndef HAVE_PNG + return; +#else + if (!data) + return; + + // Write data to file + static int dump_index = 0; + std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); + u32 row_stride = texture_config.width * 3; + + u8* buf; + + char title[] = "Citra texture dump"; + char title_key[] = "Title"; + png_structp png_ptr = nullptr; + png_infop info_ptr = nullptr; + + // Open file for writing (binary mode) + File::IOFile fp(filename, "wb"); + + // Initialize write structure + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + if (png_ptr == nullptr) { + ERROR_LOG(GPU, "Could not allocate write struct\n"); + goto finalise; + + } + + // Initialize info structure + info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == nullptr) { + ERROR_LOG(GPU, "Could not allocate info struct\n"); + goto finalise; + } + + // Setup Exception handling + if (setjmp(png_jmpbuf(png_ptr))) { + ERROR_LOG(GPU, "Error during png creation\n"); + goto finalise; + } + + png_init_io(png_ptr, fp.GetHandle()); + + // Write header (8 bit colour depth) + png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, + 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_text title_text; + title_text.compression = PNG_TEXT_COMPRESSION_NONE; + title_text.key = title_key; + title_text.text = title; + png_set_text(png_ptr, info_ptr, &title_text, 1); + + png_write_info(png_ptr, info_ptr); + + buf = new u8[row_stride * texture_config.height]; + for (int y = 0; y < texture_config.height; ++y) { + for (int x = 0; x < texture_config.width; ++x) { + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + int texel_index_within_tile = 0; + for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { + int sub_tile_width = 1 << block_size_index; + int sub_tile_height = 1 << block_size_index; + + int sub_tile_index = (x & sub_tile_width) << block_size_index; + sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index); + texel_index_within_tile += sub_tile_index; + } + + const int block_width = 8; + const int block_height = 8; + + int coarse_x = (x / block_width) * block_width; + int coarse_y = (y / block_height) * block_height; + + u8* source_ptr = (u8*)data + coarse_x * block_height * 3 + coarse_y * row_stride + texel_index_within_tile * 3; + buf[3 * x + y * row_stride ] = source_ptr[2]; + buf[3 * x + y * row_stride + 1] = source_ptr[1]; + buf[3 * x + y * row_stride + 2] = source_ptr[0]; + } + } + + // Write image data + for (auto y = 0; y < texture_config.height; ++y) + { + u8* row_ptr = (u8*)buf + y * row_stride; + u8* ptr = row_ptr; + png_write_row(png_ptr, row_ptr); + } + + delete[] buf; + + // End write + png_write_end(png_ptr, nullptr); + +finalise: + if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); + if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); +#endif +} + } // namespace } // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 023500066..49b19b28a 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -59,6 +59,8 @@ bool IsPicaTracing(); void OnPicaRegWrite(u32 id, u32 value); std::unique_ptr FinishPicaTracing(); +void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); + } // namespace } // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index fe886c16f..f288615b8 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -94,7 +94,46 @@ struct Regs { BitField<16, 16, u32> y; } viewport_corner; - INSERT_PADDING_WORDS(0xa7); + INSERT_PADDING_WORDS(0x18); + + struct TextureConfig { + INSERT_PADDING_WORDS(0x1); + + union { + BitField< 0, 16, u32> height; + BitField<16, 16, u32> width; + }; + + INSERT_PADDING_WORDS(0x2); + + u32 address; + + u32 GetPhysicalAddress() { + return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR; + } + + // texture1 and texture2 store the texture format directly after the address + // whereas texture0 inserts some additional flags inbetween. + // Hence, we store the format separately so that all other parameters can be described + // in a single structure. + }; + + enum class TextureFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGBA5551 = 2, + RGB565 = 3, + RGBA4 = 4, + + // TODO: Support for the other formats is not implemented, yet. + // Seems like they are luminance formats and compressed textures. + }; + + TextureConfig texture0; + INSERT_PADDING_WORDS(0x8); + BitField<0, 4, TextureFormat> texture0_format; + + INSERT_PADDING_WORDS(0x81); struct { enum ColorFormat : u32 { @@ -403,6 +442,8 @@ struct Regs { ADD_FIELD(viewport_depth_range); ADD_FIELD(viewport_depth_far_plane); ADD_FIELD(viewport_corner); + ADD_FIELD(texture0); + ADD_FIELD(texture0_format); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); @@ -460,6 +501,8 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(texture0, 0x81); +ASSERT_REG_POSITION(texture0_format, 0x8e); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); From 27cab6477e7e72771d0661418d71cce3c2721723 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 15 Aug 2014 16:33:17 +0200 Subject: [PATCH 11/18] Pica/Rasterizer: Add initial implementation of texture combiners. --- src/video_core/pica.h | 105 ++++++++++++++++++++++++++++- src/video_core/rasterizer.cpp | 122 +++++++++++++++++++++++++++++++++- 2 files changed, 225 insertions(+), 2 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index f288615b8..7bd4388b5 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -133,7 +134,97 @@ struct Regs { INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; - INSERT_PADDING_WORDS(0x81); + INSERT_PADDING_WORDS(0x31); + + // 0xc0-0xff: Texture Combiner (akin to glTexEnv) + struct TevStageConfig { + enum class Source : u32 { + PrimaryColor = 0x0, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // 0x7-0xc = primary color?? + Constant = 0xe, + Previous = 0xf, + }; + + enum class ColorModifier : u32 { + SourceColor = 0, + OneMinusSourceColor = 1, + SourceAlpha = 2, + OneMinusSourceAlpha = 3, + + // Other values seem to be non-standard extensions + }; + + enum class AlphaModifier : u32 { + SourceAlpha = 0, + OneMinusSourceAlpha = 1, + + // Other values seem to be non-standard extensions + }; + + enum class Operation : u32 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + }; + + union { + BitField< 0, 4, Source> color_source1; + BitField< 4, 4, Source> color_source2; + BitField< 8, 4, Source> color_source3; + BitField<16, 4, Source> alpha_source1; + BitField<20, 4, Source> alpha_source2; + BitField<24, 4, Source> alpha_source3; + }; + + union { + BitField< 0, 4, ColorModifier> color_modifier1; + BitField< 4, 4, ColorModifier> color_modifier2; + BitField< 8, 4, ColorModifier> color_modifier3; + BitField<12, 3, AlphaModifier> alpha_modifier1; + BitField<16, 3, AlphaModifier> alpha_modifier2; + BitField<20, 3, AlphaModifier> alpha_modifier3; + }; + + union { + BitField< 0, 4, Operation> color_op; + BitField<16, 4, Operation> alpha_op; + }; + + union { + BitField< 0, 8, u32> const_r; + BitField< 8, 8, u32> const_g; + BitField<16, 8, u32> const_b; + BitField<24, 8, u32> const_a; + }; + + INSERT_PADDING_WORDS(0x1); + }; + + TevStageConfig tev_stage0; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage1; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage2; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage3; + INSERT_PADDING_WORDS(0x13); + TevStageConfig tev_stage4; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage5; + INSERT_PADDING_WORDS(0x13); + + const std::array GetTevStages() const { + return { tev_stage0, tev_stage1, + tev_stage2, tev_stage3, + tev_stage4, tev_stage5 }; + }; struct { enum ColorFormat : u32 { @@ -444,6 +535,12 @@ struct Regs { ADD_FIELD(viewport_corner); ADD_FIELD(texture0); ADD_FIELD(texture0_format); + ADD_FIELD(tev_stage0); + ADD_FIELD(tev_stage1); + ADD_FIELD(tev_stage2); + ADD_FIELD(tev_stage3); + ADD_FIELD(tev_stage4); + ADD_FIELD(tev_stage5); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); @@ -503,6 +600,12 @@ ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(tev_stage0, 0xc0); +ASSERT_REG_POSITION(tev_stage1, 0xc8); +ASSERT_REG_POSITION(tev_stage2, 0xd0); +ASSERT_REG_POSITION(tev_stage3, 0xd8); +ASSERT_REG_POSITION(tev_stage4, 0xf0); +ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index f418518a1..5a4155c84 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -165,12 +165,132 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; + // Texture environment - consists of 6 stages of color and alpha combining. + // + // Color combiners take three input color values from some source (e.g. interpolated + // vertex color, texture color, previous stage, etc), perform some very simple + // operations on each of them (e.g. inversion) and then calculate the output color + // with some basic arithmetic. Alpha combiners can be configured separately but work + // analogously. + Math::Vec4 combiner_output; + for (auto tev_stage : registers.GetTevStages()) { + using Source = Regs::TevStageConfig::Source; + using ColorModifier = Regs::TevStageConfig::ColorModifier; + using AlphaModifier = Regs::TevStageConfig::AlphaModifier; + using Operation = Regs::TevStageConfig::Operation; + + auto GetColorSource = [&](Source source) -> Math::Vec3 { + switch (source) { + case Source::PrimaryColor: + return primary_color.rgb(); + + case Source::Constant: + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; + + case Source::Previous: + return combiner_output.rgb(); + + default: + ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); + return {}; + } + }; + + auto GetAlphaSource = [&](Source source) -> u8 { + switch (source) { + case Source::PrimaryColor: + return primary_color.a(); + + case Source::Constant: + return tev_stage.const_a; + + case Source::Previous: + return combiner_output.a(); + + default: + ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); + return 0; + } + }; + + auto GetColorModifier = [](ColorModifier factor, const Math::Vec3& values) -> Math::Vec3 { + switch (factor) + { + case ColorModifier::SourceColor: + return values; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return {}; + } + }; + + auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { + switch (factor) { + case AlphaModifier::SourceAlpha: + return value; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return 0; + } + }; + + auto ColorCombine = [](Operation op, const Math::Vec3 input[3]) -> Math::Vec3 { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return ((input[0] * input[1]) / 255).Cast(); + + default: + ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); + return {}; + } + }; + + auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return input[0] * input[1] / 255; + + default: + ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); + return 0; + } + }; + + // color combiner + // NOTE: Not sure if the alpha combiner might use the color output of the previous + // stage as input. Hence, we currently don't directly write the result to + // combiner_output.rgb(), but instead store it in a temporary variable until + // alpha combining has been done. + Math::Vec3 color_result[3] = { + GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)), + GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)), + GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3)) + }; + auto color_output = ColorCombine(tev_stage.color_op, color_result); + + // alpha combiner + std::array alpha_result = { + GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)), + GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)), + GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3)) + }; + auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); + + combiner_output = Math::MakeVec(color_output, alpha_output); + } + u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + (float)v1.screenpos[2].ToFloat32() * w1 + (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? SetDepth(x >> 4, y >> 4, z); - DrawPixel(x >> 4, y >> 4, primary_color); + DrawPixel(x >> 4, y >> 4, combiner_output); } } } From 34fa0b6d9cd9e4027198ce11562da6c03375cd70 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 17 Aug 2014 12:17:32 +0200 Subject: [PATCH 12/18] Pica/DebugUtils: Add convenient tev setup printer. --- src/video_core/command_processor.cpp | 2 + src/video_core/debug_utils/debug_utils.cpp | 97 ++++++++++++++++++++++ src/video_core/debug_utils/debug_utils.h | 2 + 3 files changed, 101 insertions(+) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 76fdb4e4a..8da030601 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -40,6 +40,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw): case PICA_REG_INDEX(trigger_draw_indexed): { + DebugUtils::DumpTevStageConfig(registers.GetTevStages()); + const auto& attribute_config = registers.vertex_attributes; const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 753de41e8..619c0fbe5 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -446,6 +446,103 @@ finalise: #endif } +void DumpTevStageConfig(const std::array& stages) +{ + using Source = Pica::Regs::TevStageConfig::Source; + using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; + using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; + using Operation = Pica::Regs::TevStageConfig::Operation; + + std::string stage_info = "Tev setup:\n"; + for (int index = 0; index < stages.size(); ++index) { + const auto& tev_stage = stages[index]; + + const std::map source_map = { + { Source::PrimaryColor, "PrimaryColor" }, + { Source::Texture0, "Texture0" }, + { Source::Constant, "Constant" }, + { Source::Previous, "Previous" }, + }; + + const std::map color_modifier_map = { + { ColorModifier::SourceColor, { "%source.rgb" } } + }; + const std::map alpha_modifier_map = { + { AlphaModifier::SourceAlpha, "%source.a" } + }; + + std::map combiner_map = { + { Operation::Replace, "%source1" }, + { Operation::Modulate, "(%source1 * %source2) / 255" }, + }; + + auto ReplacePattern = + [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { + size_t start = input.find(pattern); + if (start == std::string::npos) + return input; + + std::string ret = input; + ret.replace(start, pattern.length(), replacement); + return ret; + }; + auto GetColorSourceStr = + [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { + auto src_it = source_map.find(src); + std::string src_str = "Unknown"; + if (src_it != source_map.end()) + src_str = src_it->second; + + auto modifier_it = color_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != color_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); + }; + auto GetColorCombinerStr = + [&](const Regs::TevStageConfig& tev_stage) { + auto op_it = combiner_map.find(tev_stage.color_op); + std::string op_str = "Unknown op (%source1, %source2, %source3)"; + if (op_it != combiner_map.end()) + op_str = op_it->second; + + op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); + return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); + }; + auto GetAlphaSourceStr = + [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { + auto src_it = source_map.find(src); + std::string src_str = "Unknown"; + if (src_it != source_map.end()) + src_str = src_it->second; + + auto modifier_it = alpha_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != alpha_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); + }; + auto GetAlphaCombinerStr = + [&](const Regs::TevStageConfig& tev_stage) { + auto op_it = combiner_map.find(tev_stage.alpha_op); + std::string op_str = "Unknown op (%source1, %source2, %source3)"; + if (op_it != combiner_map.end()) + op_str = op_it->second; + + op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); + return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); + }; + + stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n"; + } + + DEBUG_LOG(GPU, "%s", stage_info.c_str()); +} + } // namespace } // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 49b19b28a..53c33c96e 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -61,6 +61,8 @@ std::unique_ptr FinishPicaTracing(); void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); +void DumpTevStageConfig(const std::array& stages); + } // namespace } // namespace From 9679d231df0bc8fac9e0e596ab78750bb38ef248 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 17 Aug 2014 12:31:19 +0200 Subject: [PATCH 13/18] Pica/Rasterizer: Add texturing support. --- src/video_core/debug_utils/debug_utils.cpp | 18 +----- src/video_core/pica.h | 5 +- src/video_core/rasterizer.cpp | 64 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 619c0fbe5..f7d9455be 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -387,23 +387,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { buf = new u8[row_stride * texture_config.height]; for (int y = 0; y < texture_config.height; ++y) { for (int x = 0; x < texture_config.width; ++x) { - // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each - // of which is composed of four 2x2 subtiles each of which is composed of four texels. - // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. - // texels are laid out in a 2x2 subtile like this: - // 2 3 - // 0 1 - // - // The full 8x8 tile has the texels arranged like this: - // - // 42 43 46 47 58 59 62 63 - // 40 41 44 45 56 57 60 61 - // 34 35 38 39 50 51 54 55 - // 32 33 36 37 48 49 52 53 - // 10 11 14 15 26 27 30 31 - // 08 09 12 13 24 25 28 29 - // 02 03 06 07 18 19 22 23 - // 00 01 04 05 16 17 20 21 + // Cf. rasterizer code for an explanation of this algorithm. int texel_index_within_tile = 0; for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { int sub_tile_width = 1 << block_size_index; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 7bd4388b5..cfdc9b934 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -95,7 +95,7 @@ struct Regs { BitField<16, 16, u32> y; } viewport_corner; - INSERT_PADDING_WORDS(0x18); + INSERT_PADDING_WORDS(0x17); struct TextureConfig { INSERT_PADDING_WORDS(0x1); @@ -130,6 +130,7 @@ struct Regs { // Seems like they are luminance formats and compressed textures. }; + BitField<0, 1, u32> texturing_enable; TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; @@ -533,6 +534,7 @@ struct Regs { ADD_FIELD(viewport_depth_range); ADD_FIELD(viewport_depth_far_plane); ADD_FIELD(viewport_corner); + ADD_FIELD(texturing_enable); ADD_FIELD(texture0); ADD_FIELD(texture0_format); ADD_FIELD(tev_stage0); @@ -598,6 +600,7 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(texturing_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); ASSERT_REG_POSITION(tev_stage0, 0xc0); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5a4155c84..1bd32e8d0 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -11,6 +11,8 @@ #include "rasterizer.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" + namespace Pica { namespace Rasterizer { @@ -165,6 +167,62 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; + Math::Vec4 texture_color{}; + float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); + float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); + if (registers.texturing_enable) { + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + + // TODO: This is currently hardcoded for RGB8 + u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); + + // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. + // To be flexible in case different but similar patterns are used, we keep this + // somewhat inefficient code around for now. + int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32(); + int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32(); + int texel_index_within_tile = 0; + for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { + int sub_tile_width = 1 << block_size_index; + int sub_tile_height = 1 << block_size_index; + + int sub_tile_index = (s & sub_tile_width) << block_size_index; + sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); + texel_index_within_tile += sub_tile_index; + } + + const int block_width = 8; + const int block_height = 8; + + int coarse_s = (s / block_width) * block_width; + int coarse_t = (t / block_height) * block_height; + + const int row_stride = registers.texture0.width * 3; + u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; + texture_color.r() = source_ptr[2]; + texture_color.g() = source_ptr[1]; + texture_color.b() = source_ptr[0]; + texture_color.a() = 0xFF; + + DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); + } + // Texture environment - consists of 6 stages of color and alpha combining. // // Color combiners take three input color values from some source (e.g. interpolated @@ -184,6 +242,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Source::PrimaryColor: return primary_color.rgb(); + case Source::Texture0: + return texture_color.rgb(); + case Source::Constant: return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; @@ -201,6 +262,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Source::PrimaryColor: return primary_color.a(); + case Source::Texture0: + return texture_color.a(); + case Source::Constant: return tev_stage.const_a; From 2f1c129f6407fe2d5c8c3e57c6717d5668570de5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 17 Aug 2014 17:44:55 +0200 Subject: [PATCH 14/18] Pica: Consolidate the primitive assembly code in PrimitiveAssembly and GeometryDumper. --- src/video_core/command_processor.cpp | 18 ++++++++-- src/video_core/debug_utils/debug_utils.cpp | 22 ++++--------- src/video_core/debug_utils/debug_utils.h | 12 +++---- src/video_core/primitive_assembly.cpp | 28 +++++++++------- src/video_core/primitive_assembly.h | 38 +++++++++++++++++----- 5 files changed, 73 insertions(+), 45 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8da030601..9567a9849 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include "clipper.h" #include "command_processor.h" #include "math.h" #include "pica.h" @@ -79,6 +80,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { bool index_u16 = (bool)index_info.format; DebugUtils::GeometryDumper geometry_dumper; + PrimitiveAssembler clipper_primitive_assembler(registers.triangle_topology.Value()); + PrimitiveAssembler dumping_primitive_assembler(registers.triangle_topology.Value()); for (int index = 0; index < registers.num_vertices; ++index) { @@ -108,16 +111,25 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } - // NOTE: For now, we simply assume that the first input attribute corresponds to the position. - geometry_dumper.AddVertex({input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()}, registers.triangle_topology); + // NOTE: When dumping geometry, we simply assume that the first input attribute + // corresponds to the position for now. + DebugUtils::GeometryDumper::Vertex dumped_vertex = { + input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() + }; + using namespace std::placeholders; + dumping_primitive_assembler.SubmitVertex(dumped_vertex, + std::bind(&DebugUtils::GeometryDumper::AddTriangle, + &geometry_dumper, _1, _2, _3)); + // Send to vertex shader VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); if (is_indexed) { // TODO: Add processed vertex to vertex cache! } - PrimitiveAssembly::SubmitVertex(output); + // Send to triangle clipper + clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); } geometry_dumper.Dump(); break; diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index f7d9455be..48e6dd182 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -22,27 +22,17 @@ namespace Pica { namespace DebugUtils { -void GeometryDumper::AddVertex(std::array pos, TriangleTopology topology) { - vertices.push_back({pos[0], pos[1], pos[2]}); +void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) { + vertices.push_back(v0); + vertices.push_back(v1); + vertices.push_back(v2); int num_vertices = vertices.size(); - - switch (topology) { - case TriangleTopology::List: - case TriangleTopology::ListIndexed: - if (0 == (num_vertices % 3)) - faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); - break; - - default: - ERROR_LOG(GPU, "Unknown triangle topology %x", (int)topology); - exit(0); - break; - } + faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); } void GeometryDumper::Dump() { - // NOTE: Permanently enabling this just trashes hard disks for no reason. + // NOTE: Permanently enabling this just trashes the hard disk for no reason. // Hence, this is currently disabled. return; diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 53c33c96e..8b1499bf2 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -14,20 +14,18 @@ namespace Pica { namespace DebugUtils { -using TriangleTopology = Regs::TriangleTopology; - // Simple utility class for dumping geometry data to an OBJ file class GeometryDumper { public: - void AddVertex(std::array pos, TriangleTopology topology); - - void Dump(); - -private: struct Vertex { std::array pos; }; + void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2); + + void Dump(); + +private: struct Face { int index[3]; }; diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 2354ffb99..dabf2d1a3 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -2,21 +2,23 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include "clipper.h" #include "pica.h" #include "primitive_assembly.h" #include "vertex_shader.h" +#include "video_core/debug_utils/debug_utils.h" + namespace Pica { -namespace PrimitiveAssembly { +template +PrimitiveAssembler::PrimitiveAssembler(Regs::TriangleTopology topology) + : topology(topology), buffer_index(0) { +} -static OutputVertex buffer[2]; -static int buffer_index = 0; // TODO: reset this on emulation restart - -void SubmitVertex(OutputVertex& vtx) +template +void PrimitiveAssembler::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) { - switch (registers.triangle_topology) { + switch (topology) { case Regs::TriangleTopology::List: case Regs::TriangleTopology::ListIndexed: if (buffer_index < 2) { @@ -24,7 +26,7 @@ void SubmitVertex(OutputVertex& vtx) } else { buffer_index = 0; - Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); + triangle_handler(buffer[0], buffer[1], vtx); } break; @@ -32,7 +34,7 @@ void SubmitVertex(OutputVertex& vtx) if (buffer_index == 2) { buffer_index = 0; - Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); + triangle_handler(buffer[0], buffer[1], vtx); buffer[1] = vtx; } else { @@ -41,11 +43,15 @@ void SubmitVertex(OutputVertex& vtx) break; default: - ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); + ERROR_LOG(GPU, "Unknown triangle topology %x:", (int)topology); break; } } -} // namespace +// explicitly instantiate use cases +template +struct PrimitiveAssembler; +template +struct PrimitiveAssembler; } // namespace diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 2a2b0c170..ea2e2f61e 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -4,18 +4,40 @@ #pragma once +#include + +#include "video_core/pica.h" + +#include "video_core/vertex_shader.h" + namespace Pica { -namespace VertexShader { - struct OutputVertex; -} +/* + * Utility class to build triangles from a series of vertices, + * according to a given triangle topology. + */ +template +struct PrimitiveAssembler { + using TriangleHandler = std::function; -namespace PrimitiveAssembly { + PrimitiveAssembler(Regs::TriangleTopology topology); -using VertexShader::OutputVertex; + /* + * Queues a vertex, builds primitives from the vertex queue according to the given + * triangle topology, and calls triangle_handler for each generated primitive. + * NOTE: We could specify the triangle handler in the constructor, but this way we can + * keep event and handler code next to each other. + */ + void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); + +private: + Regs::TriangleTopology topology; + + int buffer_index; + VertexType buffer[2]; +}; -void SubmitVertex(OutputVertex& vtx); - -} // namespace } // namespace From 941762a573365884a2f7ec1d9a5c2634b9d97995 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 23 Aug 2014 13:42:34 +0200 Subject: [PATCH 15/18] GPU: Fix a compiler warning about redundant semicolons. --- src/core/hw/gpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index d20311a00..7186bfa84 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -42,7 +42,7 @@ struct Regs { // depending on the current source line to make sure variable names are unique. #define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y #define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) -#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; +#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)] // helper macro to make sure the defined structures are of the expected size. #if defined(_MSC_VER) @@ -53,7 +53,7 @@ struct Regs { #else #define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ static_assert(sizeof(name) == size_in_bytes, \ - "Structure size and register block length don't match"); + "Structure size and register block length don't match") #endif enum class FramebufferFormat : u32 { From b5b3aeb57699e531bdc0b8be156aa6476ea4f15d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 23 Aug 2014 13:48:00 +0200 Subject: [PATCH 16/18] Math: Warning fixes. --- src/video_core/math.h | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/video_core/math.h b/src/video_core/math.h index ca1fb0df2..83ba81235 100644 --- a/src/video_core/math.h +++ b/src/video_core/math.h @@ -50,9 +50,8 @@ static inline Vec4 MakeVec(const T& x, const T& y, const T& z, const T& w); template class Vec2 { public: - struct { - T x,y; - }; + T x; + T y; T* AsArray() { return &x; } @@ -176,10 +175,9 @@ template class Vec3 { public: - struct - { - T x,y,z; - }; + T x; + T y; + T z; T* AsArray() { return &x; } @@ -315,7 +313,7 @@ public: _DEFINE_SWIZZLER2(b, a, b##a); \ _DEFINE_SWIZZLER2(b, a, b2##a2); \ _DEFINE_SWIZZLER2(b, a, b3##a3); \ - _DEFINE_SWIZZLER2(b, a, b4##a4); + _DEFINE_SWIZZLER2(b, a, b4##a4) DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); @@ -330,16 +328,27 @@ Vec3 operator * (const V& f, const Vec3& vec) return Vec3(f*vec.x,f*vec.y,f*vec.z); } +template<> +inline float Vec3::Length() const { + return std::sqrt(x * x + y * y + z * z); +} + +template<> +inline Vec3 Vec3::Normalized() const { + return *this / Length(); +} + + typedef Vec3 Vec3f; template class Vec4 { public: - struct - { - T x,y,z,w; - }; + T x; + T y; + T z; + T w; T* AsArray() { return &x; } @@ -456,7 +465,7 @@ public: _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ _DEFINE_SWIZZLER2(b, a, b##a); \ - _DEFINE_SWIZZLER2(b, a, b2##a2); + _DEFINE_SWIZZLER2(b, a, b2##a2) DEFINE_SWIZZLER2(x, y, r, g); DEFINE_SWIZZLER2(x, z, r, b); @@ -480,7 +489,7 @@ public: _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ - _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); + _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2) DEFINE_SWIZZLER3(x, y, z, r, g, b); DEFINE_SWIZZLER3(x, y, w, r, g, a); From 697b50c3ff1b0d179c5862efa56cb19fa640824c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 23 Aug 2014 14:14:34 +0200 Subject: [PATCH 17/18] Pica/VertexShader: Fix a bug in the call stack handling. --- src/video_core/vertex_shader.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index cdecbff3c..db8244317 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -191,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) { if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { - state.program_counter = &shader_memory[*state.call_stack_pointer--]; - *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; + // Jump back to call stack position, invalidate call stack entry, move up call stack pointer + state.program_counter = &shader_memory[*state.call_stack_pointer]; + *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; } break; From a79644c9baaeaa88e79db9837e9ed6e2b74e9889 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 25 Aug 2014 21:45:49 +0200 Subject: [PATCH 18/18] Pica/Rasterizer: Clarify a TODO. --- src/video_core/rasterizer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 1bd32e8d0..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -349,9 +349,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, combiner_output = Math::MakeVec(color_output, alpha_output); } + // TODO: Not sure if the multiplication by 65535 has already been taken care + // of when transforming to screen coordinates or not. u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + (float)v1.screenpos[2].ToFloat32() * w1 + - (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? + (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); SetDepth(x >> 4, y >> 4, z); DrawPixel(x >> 4, y >> 4, combiner_output);