mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-27 01:22:37 -06:00
Merge pull request #933 from neobrain/shader_debugger
Shader debugger improvements
This commit is contained in:
commit
7312894a6a
11 changed files with 674 additions and 97 deletions
|
@ -6,9 +6,16 @@
|
|||
#include <sstream>
|
||||
|
||||
#include <QBoxLayout>
|
||||
#include <QFileDialog>
|
||||
#include <QGroupBox>
|
||||
#include <QLabel>
|
||||
#include <QLineEdit>
|
||||
#include <QPushButton>
|
||||
#include <QSignalMapper>
|
||||
#include <QSpinBox>
|
||||
#include <QTreeView>
|
||||
|
||||
#include "video_core/shader/shader_interpreter.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
#include "graphics_vertex_shader.h"
|
||||
|
||||
|
@ -17,7 +24,7 @@ using nihstro::Instruction;
|
|||
using nihstro::SourceRegister;
|
||||
using nihstro::SwizzlePattern;
|
||||
|
||||
GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) {
|
||||
GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) {
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const {
|
|||
}
|
||||
|
||||
int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const {
|
||||
return static_cast<int>(info.code.size());
|
||||
return static_cast<int>(par->info.code.size());
|
||||
}
|
||||
|
||||
QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const {
|
||||
|
@ -62,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
|
|||
{
|
||||
switch (index.column()) {
|
||||
case 0:
|
||||
if (info.HasLabel(index.row()))
|
||||
return QString::fromStdString(info.GetLabel(index.row()));
|
||||
if (par->info.HasLabel(index.row()))
|
||||
return QString::fromStdString(par->info.GetLabel(index.row()));
|
||||
|
||||
return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0'));
|
||||
|
||||
case 1:
|
||||
return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0'));
|
||||
return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0'));
|
||||
|
||||
case 2:
|
||||
{
|
||||
std::stringstream output;
|
||||
output.flags(std::ios::hex);
|
||||
|
||||
Instruction instr = info.code[index.row()];
|
||||
const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern;
|
||||
Instruction instr = par->info.code[index.row()];
|
||||
const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern;
|
||||
|
||||
// longest known instruction name: "setemit "
|
||||
output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name;
|
||||
|
@ -130,13 +137,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
|
|||
|
||||
print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName());
|
||||
output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " ";
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(0,1));
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(0,1));
|
||||
|
||||
output << ", ";
|
||||
|
||||
print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName());
|
||||
output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " ";
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(1,1));
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(1,1));
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -167,7 +174,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
|
|||
// TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1
|
||||
if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) {
|
||||
SourceRegister src2 = instr.common.GetSrc2(src_is_inverted);
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false));
|
||||
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -240,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
|
|||
case Qt::FontRole:
|
||||
return QFont("monospace");
|
||||
|
||||
case Qt::BackgroundRole:
|
||||
// Highlight instructions which have no debug data associated to them
|
||||
for (const auto& record : par->debug_data.records)
|
||||
if (index.row() == record.instruction_offset)
|
||||
return QVariant();
|
||||
|
||||
return QBrush(QColor(255, 255, 127));
|
||||
|
||||
|
||||
// TODO: Draw arrows for each "reachable" instruction to visualize control flow
|
||||
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -247,53 +266,232 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
|
|||
return QVariant();
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderModel::OnUpdate()
|
||||
{
|
||||
beginResetModel();
|
||||
void GraphicsVertexShaderWidget::DumpShader() {
|
||||
QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin",
|
||||
tr("Shader Binary (*.shbin)"));
|
||||
|
||||
info.Clear();
|
||||
if (filename.isEmpty()) {
|
||||
// If the user canceled the dialog, don't dump anything.
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto instr : Pica::g_state.vs.program_code)
|
||||
info.code.push_back({instr});
|
||||
auto& setup = Pica::g_state.vs;
|
||||
auto& config = Pica::g_state.regs.vs;
|
||||
|
||||
for (auto pattern : Pica::g_state.vs.swizzle_data)
|
||||
info.swizzle_info.push_back({pattern});
|
||||
|
||||
info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" });
|
||||
|
||||
endResetModel();
|
||||
Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes);
|
||||
}
|
||||
|
||||
|
||||
GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context,
|
||||
QWidget* parent)
|
||||
: BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) {
|
||||
setObjectName("PicaVertexShader");
|
||||
|
||||
auto binary_model = new GraphicsVertexShaderModel(this);
|
||||
auto binary_list = new QTreeView;
|
||||
binary_list->setModel(binary_model);
|
||||
auto input_data_mapper = new QSignalMapper(this);
|
||||
|
||||
// TODO: Support inputting data in hexadecimal raw format
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) {
|
||||
input_data[i] = new QLineEdit;
|
||||
input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
|
||||
}
|
||||
|
||||
breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)"));
|
||||
|
||||
// TODO: Add some button for jumping to the shader entry point
|
||||
|
||||
model = new GraphicsVertexShaderModel(this);
|
||||
binary_list = new QTreeView;
|
||||
binary_list->setModel(model);
|
||||
binary_list->setRootIsDecorated(false);
|
||||
binary_list->setAlternatingRowColors(true);
|
||||
|
||||
connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate()));
|
||||
auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump"));
|
||||
|
||||
instruction_description = new QLabel;
|
||||
|
||||
cycle_index = new QSpinBox;
|
||||
|
||||
connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)),
|
||||
binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags)));
|
||||
|
||||
connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader()));
|
||||
|
||||
connect(cycle_index, SIGNAL(valueChanged(int)), this, SLOT(OnCycleIndexChanged(int)));
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) {
|
||||
connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map()));
|
||||
input_data_mapper->setMapping(input_data[i], i);
|
||||
}
|
||||
connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int)));
|
||||
|
||||
auto main_widget = new QWidget;
|
||||
auto main_layout = new QVBoxLayout;
|
||||
{
|
||||
auto input_data_group = new QGroupBox(tr("Input Data"));
|
||||
|
||||
// For each vertex attribute, add a QHBoxLayout consisting of:
|
||||
// - A QLabel denoting the source attribute index
|
||||
// - Four QLineEdits for showing and manipulating attribute data
|
||||
// - A QLabel denoting the shader input attribute index
|
||||
auto sub_layout = new QVBoxLayout;
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
// Create an HBoxLayout to store the widgets used to specify a particular attribute
|
||||
// and store it in a QWidget to allow for easy hiding and unhiding.
|
||||
auto row_layout = new QHBoxLayout;
|
||||
row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2)));
|
||||
for (unsigned comp = 0; comp < 4; ++comp)
|
||||
row_layout->addWidget(input_data[4 * i + comp]);
|
||||
|
||||
row_layout->addWidget(input_data_mapping[i] = new QLabel);
|
||||
|
||||
input_data_container[i] = new QWidget;
|
||||
input_data_container[i]->setLayout(row_layout);
|
||||
input_data_container[i]->hide();
|
||||
|
||||
sub_layout->addWidget(input_data_container[i]);
|
||||
}
|
||||
|
||||
sub_layout->addWidget(breakpoint_warning);
|
||||
breakpoint_warning->hide();
|
||||
|
||||
input_data_group->setLayout(sub_layout);
|
||||
main_layout->addWidget(input_data_group);
|
||||
}
|
||||
{
|
||||
auto sub_layout = new QHBoxLayout;
|
||||
sub_layout->addWidget(binary_list);
|
||||
main_layout->addLayout(sub_layout);
|
||||
}
|
||||
main_layout->addWidget(dump_shader);
|
||||
{
|
||||
auto sub_layout = new QHBoxLayout;
|
||||
sub_layout->addWidget(new QLabel(tr("Cycle Index:")));
|
||||
sub_layout->addWidget(cycle_index);
|
||||
main_layout->addLayout(sub_layout);
|
||||
}
|
||||
main_layout->addWidget(instruction_description);
|
||||
main_layout->addStretch();
|
||||
main_widget->setLayout(main_layout);
|
||||
setWidget(main_widget);
|
||||
|
||||
widget()->setEnabled(false);
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
|
||||
emit Update();
|
||||
auto input = static_cast<Pica::Shader::InputVertex*>(data);
|
||||
if (event == Pica::DebugContext::Event::VertexLoaded) {
|
||||
Reload(true, data);
|
||||
} else {
|
||||
// No vertex data is retrievable => invalidate currently stored vertex data
|
||||
Reload(true, nullptr);
|
||||
}
|
||||
widget()->setEnabled(true);
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) {
|
||||
model->beginResetModel();
|
||||
|
||||
if (replace_vertex_data) {
|
||||
if (vertex_data) {
|
||||
memcpy(&input_vertex, vertex_data, sizeof(input_vertex));
|
||||
for (unsigned attr = 0; attr < 16; ++attr) {
|
||||
for (unsigned comp = 0; comp < 4; ++comp) {
|
||||
input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32()));
|
||||
}
|
||||
}
|
||||
breakpoint_warning->hide();
|
||||
} else {
|
||||
for (unsigned attr = 0; attr < 16; ++attr) {
|
||||
for (unsigned comp = 0; comp < 4; ++comp) {
|
||||
input_data[4 * attr + comp]->setText(QString("???"));
|
||||
}
|
||||
}
|
||||
breakpoint_warning->show();
|
||||
}
|
||||
}
|
||||
|
||||
// Reload shader code
|
||||
info.Clear();
|
||||
|
||||
auto& shader_setup = Pica::g_state.vs;
|
||||
auto& shader_config = Pica::g_state.regs.vs;
|
||||
for (auto instr : shader_setup.program_code)
|
||||
info.code.push_back({instr});
|
||||
|
||||
for (auto pattern : shader_setup.swizzle_data)
|
||||
info.swizzle_info.push_back({pattern});
|
||||
|
||||
u32 entry_point = Pica::g_state.regs.vs.main_offset;
|
||||
info.labels.insert({ entry_point, "main" });
|
||||
|
||||
// Generate debug information
|
||||
debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup);
|
||||
|
||||
// Reload widget state
|
||||
|
||||
// Only show input attributes which are used as input to the shader
|
||||
for (unsigned int attr = 0; attr < 16; ++attr) {
|
||||
input_data_container[attr]->setVisible(false);
|
||||
}
|
||||
for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) {
|
||||
unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr);
|
||||
input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr));
|
||||
input_data_container[source_attr]->setVisible(true);
|
||||
}
|
||||
|
||||
// Initialize debug info text for current cycle count
|
||||
cycle_index->setMaximum(debug_data.records.size() - 1);
|
||||
OnCycleIndexChanged(cycle_index->value());
|
||||
|
||||
model->endResetModel();
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderWidget::OnResumed() {
|
||||
widget()->setEnabled(false);
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) {
|
||||
float value = input_data[index]->text().toFloat();
|
||||
Reload();
|
||||
}
|
||||
|
||||
void GraphicsVertexShaderWidget::OnCycleIndexChanged(int index) {
|
||||
QString text;
|
||||
|
||||
auto& record = debug_data.records[index];
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::SRC1)
|
||||
text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32());
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::SRC2)
|
||||
text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32());
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::SRC3)
|
||||
text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32());
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN)
|
||||
text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32());
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT)
|
||||
text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32());
|
||||
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT)
|
||||
text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]);
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT)
|
||||
text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false");
|
||||
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN)
|
||||
text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false");
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN)
|
||||
text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false");
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN)
|
||||
text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w);
|
||||
|
||||
text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0'));
|
||||
if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) {
|
||||
text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0'));
|
||||
} else {
|
||||
text += tr(" (last instruction)");
|
||||
}
|
||||
|
||||
instruction_description->setText(text);
|
||||
|
||||
// Scroll to current instruction
|
||||
const QModelIndex& instr_index = model->index(record.instruction_offset, 0);
|
||||
emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows);
|
||||
binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible);
|
||||
}
|
||||
|
|
|
@ -10,11 +10,18 @@
|
|||
|
||||
#include "nihstro/parser_shbin.h"
|
||||
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
class QLabel;
|
||||
class QSpinBox;
|
||||
|
||||
class GraphicsVertexShaderWidget;
|
||||
|
||||
class GraphicsVertexShaderModel : public QAbstractItemModel {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
GraphicsVertexShaderModel(QObject* parent);
|
||||
GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent);
|
||||
|
||||
QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override;
|
||||
QModelIndex parent(const QModelIndex& child) const override;
|
||||
|
@ -23,11 +30,10 @@ public:
|
|||
QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
|
||||
QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
|
||||
|
||||
public slots:
|
||||
void OnUpdate();
|
||||
|
||||
private:
|
||||
nihstro::ShaderInfo info;
|
||||
GraphicsVertexShaderWidget* par;
|
||||
|
||||
friend class GraphicsVertexShaderWidget;
|
||||
};
|
||||
|
||||
class GraphicsVertexShaderWidget : public BreakPointObserverDock {
|
||||
|
@ -43,9 +49,42 @@ private slots:
|
|||
void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override;
|
||||
void OnResumed() override;
|
||||
|
||||
void OnInputAttributeChanged(int index);
|
||||
|
||||
void OnCycleIndexChanged(int index);
|
||||
|
||||
void DumpShader();
|
||||
|
||||
/**
|
||||
* Reload widget based on the current PICA200 state
|
||||
* @param replace_vertex_data If true, invalidate all current vertex data
|
||||
* @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true.
|
||||
*/
|
||||
void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr);
|
||||
|
||||
|
||||
signals:
|
||||
void Update();
|
||||
// Call this to change the current command selection in the disassembly view
|
||||
void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags);
|
||||
|
||||
private:
|
||||
QLabel* instruction_description;
|
||||
QTreeView* binary_list;
|
||||
GraphicsVertexShaderModel* model;
|
||||
|
||||
/// TODO: Move these into a single struct
|
||||
std::array<QLineEdit*, 4*16> input_data; // A text box for each of the 4 components of up to 16 vertex attributes
|
||||
std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute
|
||||
std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to
|
||||
|
||||
// Text to be shown when input vertex data is not retrievable
|
||||
QLabel* breakpoint_warning;
|
||||
|
||||
QSpinBox* cycle_index;
|
||||
|
||||
nihstro::ShaderInfo info;
|
||||
Pica::Shader::DebugData<true> debug_data;
|
||||
Pica::Shader::InputVertex input_vertex;
|
||||
|
||||
friend class GraphicsVertexShaderModel;
|
||||
};
|
||||
|
|
|
@ -215,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
unsigned int vertex_cache_pos = 0;
|
||||
vertex_cache_ids.fill(-1);
|
||||
|
||||
Shader::UnitState shader_unit;
|
||||
Shader::UnitState<false> shader_unit;
|
||||
Shader::Setup(shader_unit);
|
||||
|
||||
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <png.h>
|
||||
#endif
|
||||
|
||||
#include <nihstro/float24.h>
|
||||
#include <nihstro/shader_binary.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
|
@ -110,8 +111,7 @@ void GeometryDumper::Dump() {
|
|||
}
|
||||
|
||||
|
||||
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||
u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
|
||||
void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
|
||||
{
|
||||
struct StuffToWrite {
|
||||
u8* pointer;
|
||||
|
@ -131,11 +131,14 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
|
|||
// into shbin format (separate type and component mask).
|
||||
union OutputRegisterInfo {
|
||||
enum Type : u64 {
|
||||
POSITION = 0,
|
||||
COLOR = 2,
|
||||
TEXCOORD0 = 3,
|
||||
TEXCOORD1 = 5,
|
||||
TEXCOORD2 = 6,
|
||||
POSITION = 0,
|
||||
QUATERNION = 1,
|
||||
COLOR = 2,
|
||||
TEXCOORD0 = 3,
|
||||
TEXCOORD1 = 5,
|
||||
TEXCOORD2 = 6,
|
||||
|
||||
VIEW = 8,
|
||||
};
|
||||
|
||||
BitField< 0, 64, u64> hex;
|
||||
|
@ -157,6 +160,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
|
|||
{ OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} },
|
||||
{ OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} },
|
||||
{ OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} },
|
||||
{ OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} },
|
||||
{ OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} },
|
||||
{ OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} },
|
||||
{ OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} },
|
||||
{ OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} },
|
||||
{ OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} },
|
||||
{ OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} },
|
||||
|
@ -166,7 +173,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
|
|||
{ OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} },
|
||||
{ OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} },
|
||||
{ OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} },
|
||||
{ OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }
|
||||
{ OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} },
|
||||
{ OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} },
|
||||
{ OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} },
|
||||
{ OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} }
|
||||
};
|
||||
|
||||
for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
|
||||
|
@ -221,28 +231,69 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
|
|||
|
||||
// TODO: Reduce the amount of binary code written to relevant portions
|
||||
dvlp.binary_offset = write_offset - dvlp_offset;
|
||||
dvlp.binary_size_words = binary_size;
|
||||
QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
|
||||
dvlp.binary_size_words = setup.program_code.size();
|
||||
QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32));
|
||||
|
||||
dvlp.swizzle_info_offset = write_offset - dvlp_offset;
|
||||
dvlp.swizzle_info_num_entries = swizzle_size;
|
||||
dvlp.swizzle_info_num_entries = setup.swizzle_data.size();
|
||||
u32 dummy = 0;
|
||||
for (unsigned int i = 0; i < swizzle_size; ++i) {
|
||||
QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
|
||||
for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
|
||||
QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i]));
|
||||
QueueForWriting((u8*)&dummy, sizeof(dummy));
|
||||
}
|
||||
|
||||
dvle.main_offset_words = main_offset;
|
||||
dvle.main_offset_words = config.main_offset;
|
||||
dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
|
||||
dvle.output_register_table_size = static_cast<u32>(output_info_table.size());
|
||||
QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
|
||||
|
||||
// TODO: Create a label table for "main"
|
||||
|
||||
std::vector<nihstro::ConstantInfo> constant_table;
|
||||
for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) {
|
||||
nihstro::ConstantInfo constant;
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.type = nihstro::ConstantInfo::Bool;
|
||||
constant.regid = i;
|
||||
constant.b = setup.uniforms.b[i];
|
||||
constant_table.emplace_back(constant);
|
||||
}
|
||||
for (unsigned i = 0; i < setup.uniforms.i.size(); ++i) {
|
||||
nihstro::ConstantInfo constant;
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.type = nihstro::ConstantInfo::Int;
|
||||
constant.regid = i;
|
||||
constant.i.x = setup.uniforms.i[i].x;
|
||||
constant.i.y = setup.uniforms.i[i].y;
|
||||
constant.i.z = setup.uniforms.i[i].z;
|
||||
constant.i.w = setup.uniforms.i[i].w;
|
||||
constant_table.emplace_back(constant);
|
||||
}
|
||||
for (unsigned i = 0; i < sizeof(setup.uniforms.f) / sizeof(setup.uniforms.f[0]); ++i) {
|
||||
nihstro::ConstantInfo constant;
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.type = nihstro::ConstantInfo::Float;
|
||||
constant.regid = i;
|
||||
constant.f.x = nihstro::to_float24(setup.uniforms.f[i].x.ToFloat32());
|
||||
constant.f.y = nihstro::to_float24(setup.uniforms.f[i].y.ToFloat32());
|
||||
constant.f.z = nihstro::to_float24(setup.uniforms.f[i].z.ToFloat32());
|
||||
constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32());
|
||||
|
||||
// Store constant if it's different from zero..
|
||||
if (setup.uniforms.f[i].x.ToFloat32() != 0.0 ||
|
||||
setup.uniforms.f[i].y.ToFloat32() != 0.0 ||
|
||||
setup.uniforms.f[i].z.ToFloat32() != 0.0 ||
|
||||
setup.uniforms.f[i].w.ToFloat32() != 0.0)
|
||||
constant_table.emplace_back(constant);
|
||||
}
|
||||
dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
|
||||
dvle.constant_table_size = constant_table.size();
|
||||
for (const auto& constant : constant_table) {
|
||||
QueueForWriting((uint8_t*)&constant, sizeof(constant));
|
||||
}
|
||||
|
||||
// Write data to file
|
||||
static int dump_index = 0;
|
||||
std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin");
|
||||
std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
|
||||
|
||||
for (auto& chunk : writing_queue) {
|
||||
|
|
|
@ -158,7 +158,6 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g
|
|||
namespace DebugUtils {
|
||||
|
||||
#define PICA_DUMP_GEOMETRY 0
|
||||
#define PICA_DUMP_SHADERS 0
|
||||
#define PICA_DUMP_TEXTURES 0
|
||||
#define PICA_LOG_TEV 0
|
||||
|
||||
|
@ -182,8 +181,8 @@ private:
|
|||
std::vector<Face> faces;
|
||||
};
|
||||
|
||||
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||
u32 main_offset, const Regs::VSOutputAttributes* output_attributes);
|
||||
void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
|
||||
const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes);
|
||||
|
||||
|
||||
// Utility class to log Pica commands.
|
||||
|
|
|
@ -80,6 +80,11 @@ struct Regs {
|
|||
POSITION_Z = 2,
|
||||
POSITION_W = 3,
|
||||
|
||||
QUATERNION_X = 4,
|
||||
QUATERNION_Y = 5,
|
||||
QUATERNION_Z = 6,
|
||||
QUATERNION_W = 7,
|
||||
|
||||
COLOR_R = 8,
|
||||
COLOR_G = 9,
|
||||
COLOR_B = 10,
|
||||
|
@ -89,6 +94,12 @@ struct Regs {
|
|||
TEXCOORD0_V = 13,
|
||||
TEXCOORD1_U = 14,
|
||||
TEXCOORD1_V = 15,
|
||||
|
||||
// TODO: Not verified
|
||||
VIEW_X = 18,
|
||||
VIEW_Y = 19,
|
||||
VIEW_Z = 20,
|
||||
|
||||
TEXCOORD2_U = 22,
|
||||
TEXCOORD2_V = 23,
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/range/algorithm/fill.hpp>
|
||||
|
||||
#include "common/hash.h"
|
||||
#include "common/make_unique.h"
|
||||
#include "common/profiler.h"
|
||||
|
@ -30,7 +32,7 @@ static JitCompiler jit;
|
|||
static CompiledShader* jit_shader;
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
void Setup(UnitState& state) {
|
||||
void Setup(UnitState<false>& state) {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled) {
|
||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
|
@ -54,9 +56,8 @@ void Shutdown() {
|
|||
|
||||
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
|
||||
|
||||
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) {
|
||||
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
|
||||
auto& config = g_state.regs.vs;
|
||||
auto& setup = g_state.vs;
|
||||
|
||||
Common::Profiling::ScopeTimer timer(shader_category);
|
||||
|
||||
|
@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
|
|||
// Setup input register table
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
|
||||
// TODO: Instead of this cumbersome logic, just load the input data directly like
|
||||
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
|
||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
|
||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
|
||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
|
||||
|
@ -96,12 +99,6 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
|
|||
RunInterpreter(state);
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
#if PICA_DUMP_SHADERS
|
||||
DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(),
|
||||
state.debug.max_opdesc_id, config.main_offset,
|
||||
g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
|
||||
#endif
|
||||
|
||||
// Setup output data
|
||||
OutputVertex ret;
|
||||
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
||||
|
@ -132,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
|
|||
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
|
||||
}
|
||||
|
||||
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
|
||||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
|
||||
UnitState<true> state;
|
||||
|
||||
const auto& shader_memory = setup.program_code;
|
||||
state.program_counter = config.main_offset;
|
||||
state.debug.max_offset = 0;
|
||||
state.debug.max_opdesc_id = 0;
|
||||
|
||||
// Setup input register table
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
float24 dummy_register;
|
||||
boost::fill(state.registers.input, &dummy_register);
|
||||
|
||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x;
|
||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x;
|
||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
|
||||
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
|
||||
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
|
||||
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
|
||||
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
|
||||
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
|
||||
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
|
||||
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
|
||||
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
|
||||
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
|
||||
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
|
||||
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
|
||||
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
|
||||
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
|
||||
|
||||
state.conditional_code[0] = false;
|
||||
state.conditional_code[1] = false;
|
||||
|
||||
RunInterpreter(state);
|
||||
return state.debug;
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
} // namespace Pica
|
||||
|
|
|
@ -4,7 +4,10 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <nihstro/shader_binary.h>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
|
@ -72,12 +75,185 @@ struct OutputVertex {
|
|||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
||||
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
|
||||
|
||||
|
||||
// Helper structure used to keep track of data useful for inspection of shader emulation
|
||||
template<bool full_debugging>
|
||||
struct DebugData;
|
||||
|
||||
template<>
|
||||
struct DebugData<false> {
|
||||
// TODO: Hide these behind and interface and move them to DebugData<true>
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DebugData<true> {
|
||||
// Records store the input and output operands of a particular instruction.
|
||||
struct Record {
|
||||
enum Type {
|
||||
// Floating point arithmetic operands
|
||||
SRC1 = 0x1,
|
||||
SRC2 = 0x2,
|
||||
SRC3 = 0x4,
|
||||
|
||||
// Initial and final output operand value
|
||||
DEST_IN = 0x8,
|
||||
DEST_OUT = 0x10,
|
||||
|
||||
// Current and next instruction offset (in words)
|
||||
CUR_INSTR = 0x20,
|
||||
NEXT_INSTR = 0x40,
|
||||
|
||||
// Output address register value
|
||||
ADDR_REG_OUT = 0x80,
|
||||
|
||||
// Result of a comparison instruction
|
||||
CMP_RESULT = 0x100,
|
||||
|
||||
// Input values for conditional flow control instructions
|
||||
COND_BOOL_IN = 0x200,
|
||||
COND_CMP_IN = 0x400,
|
||||
|
||||
// Input values for a loop
|
||||
LOOP_INT_IN = 0x800,
|
||||
};
|
||||
|
||||
Math::Vec4<float24> src1;
|
||||
Math::Vec4<float24> src2;
|
||||
Math::Vec4<float24> src3;
|
||||
|
||||
Math::Vec4<float24> dest_in;
|
||||
Math::Vec4<float24> dest_out;
|
||||
|
||||
s32 address_registers[2];
|
||||
bool conditional_code[2];
|
||||
bool cond_bool;
|
||||
bool cond_cmp[2];
|
||||
Math::Vec4<u8> loop_int;
|
||||
|
||||
u32 instruction_offset;
|
||||
u32 next_instruction;
|
||||
|
||||
// set of enabled fields (as a combination of Type flags)
|
||||
unsigned mask = 0;
|
||||
};
|
||||
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||
|
||||
// List of records for each executed shader instruction
|
||||
std::vector<DebugData<true>::Record> records;
|
||||
};
|
||||
|
||||
// Type alias for better readability
|
||||
using DebugDataRecord = DebugData<true>::Record;
|
||||
|
||||
// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
inline void SetField(DebugDataRecord& record, ValueType value);
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
|
||||
record.src1.x = value[0];
|
||||
record.src1.y = value[1];
|
||||
record.src1.z = value[2];
|
||||
record.src1.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
|
||||
record.src2.x = value[0];
|
||||
record.src2.y = value[1];
|
||||
record.src2.z = value[2];
|
||||
record.src2.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
|
||||
record.src3.x = value[0];
|
||||
record.src3.y = value[1];
|
||||
record.src3.z = value[2];
|
||||
record.src3.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
||||
record.dest_in.x = value[0];
|
||||
record.dest_in.y = value[1];
|
||||
record.dest_in.z = value[2];
|
||||
record.dest_in.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
|
||||
record.dest_out.x = value[0];
|
||||
record.dest_out.y = value[1];
|
||||
record.dest_out.z = value[2];
|
||||
record.dest_out.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
|
||||
record.address_registers[0] = value[0];
|
||||
record.address_registers[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
|
||||
record.conditional_code[0] = value[0];
|
||||
record.conditional_code[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
|
||||
record.cond_bool = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
|
||||
record.cond_cmp[0] = value[0];
|
||||
record.cond_cmp[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
|
||||
record.loop_int = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
|
||||
record.instruction_offset = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
|
||||
record.next_instruction = value;
|
||||
}
|
||||
|
||||
// Helper function to set debug information on the current shader iteration.
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
|
||||
// Debugging disabled => nothing to do
|
||||
}
|
||||
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
|
||||
if (offset >= debug_data.records.size())
|
||||
debug_data.records.resize(offset + 1);
|
||||
|
||||
SetField<type, ValueType>(debug_data.records[offset], value);
|
||||
debug_data.records[offset].mask |= type;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
|
||||
* has four shader units that process shaders in parallel. At the present, Citra only implements a
|
||||
* single shader unit that processes all shaders serially. Putting the state information in a struct
|
||||
* here will make it easier for us to parallelize the shader processing later.
|
||||
*/
|
||||
template<bool Debug>
|
||||
struct UnitState {
|
||||
struct Registers {
|
||||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
||||
|
@ -111,10 +287,7 @@ struct UnitState {
|
|||
// TODO: Is there a maximal size for this?
|
||||
boost::container::static_vector<CallStackElement, 16> call_stack;
|
||||
|
||||
struct {
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||
} debug;
|
||||
DebugData<Debug> debug;
|
||||
|
||||
static int InputOffset(const SourceRegister& reg) {
|
||||
switch (reg.GetRegisterType()) {
|
||||
|
@ -150,7 +323,7 @@ struct UnitState {
|
|||
* vertex, which would happen within the `Run` function).
|
||||
* @param state Shader unit state, must be setup per shader and per shader unit
|
||||
*/
|
||||
void Setup(UnitState& state);
|
||||
void Setup(UnitState<false>& state);
|
||||
|
||||
/// Performs any cleanup when the emulator is shutdown
|
||||
void Shutdown();
|
||||
|
@ -162,7 +335,17 @@ void Shutdown();
|
|||
* @param num_attributes The number of vertex shader attributes
|
||||
* @return The output vertex, after having been processed by the vertex shader
|
||||
*/
|
||||
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes);
|
||||
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
|
||||
|
||||
/**
|
||||
* Produce debug information based on the given shader and input vertex
|
||||
* @param input Input vertex into the shader
|
||||
* @param num_attributes The number of vertex shader attributes
|
||||
* @param config Configuration object for the shader pipeline
|
||||
* @param setup Setup object for the shader pipeline
|
||||
* @return Debug information for this shader with regards to the given vertex
|
||||
*/
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup);
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
|
|
|
@ -21,7 +21,8 @@ namespace Pica {
|
|||
|
||||
namespace Shader {
|
||||
|
||||
void RunInterpreter(UnitState& state) {
|
||||
template<bool Debug>
|
||||
void RunInterpreter(UnitState<Debug>& state) {
|
||||
const auto& uniforms = g_state.vs.uniforms;
|
||||
const auto& swizzle_data = g_state.vs.swizzle_data;
|
||||
const auto& program_code = g_state.vs.program_code;
|
||||
|
@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) {
|
|||
// Placeholder for invalid inputs
|
||||
static float24 dummy_vec4_float24[4];
|
||||
|
||||
while (true) {
|
||||
unsigned iteration = 0;
|
||||
bool exit_loop = false;
|
||||
while (!exit_loop) {
|
||||
if (!state.call_stack.empty()) {
|
||||
auto& top = state.call_stack.back();
|
||||
if (state.program_counter == top.final_address) {
|
||||
|
@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) {
|
|||
}
|
||||
}
|
||||
|
||||
bool exit_loop = false;
|
||||
const Instruction instr = { program_code[state.program_counter] };
|
||||
const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
|
||||
|
||||
static auto call = [](UnitState& state, u32 offset, u32 num_instructions,
|
||||
static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions,
|
||||
u32 return_offset, u8 repeat_count, u8 loop_increment) {
|
||||
state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
|
||||
ASSERT(state.call_stack.size() < state.call_stack.capacity());
|
||||
state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
|
||||
};
|
||||
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter);
|
||||
if (iteration > 0)
|
||||
Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter);
|
||||
|
||||
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter);
|
||||
|
||||
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
||||
|
@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) {
|
|||
switch (instr.opcode.Value().EffectiveOpCode()) {
|
||||
case OpCode::Id::ADD:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i] + src2[i];
|
||||
}
|
||||
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MUL:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i] * src2[i];
|
||||
}
|
||||
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::FLR:
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
||||
case OpCode::Id::MAX:
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = std::max(src1[i], src2[i]);
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
||||
case OpCode::Id::MIN:
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = std::min(src1[i], src2[i]);
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
||||
case OpCode::Id::DP3:
|
||||
case OpCode::Id::DP4:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
float24 dot = float24::FromFloat32(0.f);
|
||||
int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
|
||||
for (int i = 0; i < num_components; ++i)
|
||||
|
@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) {
|
|||
|
||||
dest[i] = dot;
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
// Reciprocal
|
||||
case OpCode::Id::RCP:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) {
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
||||
dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
|
||||
}
|
||||
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
// Reciprocal Square Root
|
||||
case OpCode::Id::RSQ:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) {
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
||||
dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
|
||||
}
|
||||
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOVA:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) {
|
|||
// TODO: Figure out how the rounding is done on hardware
|
||||
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
|
||||
}
|
||||
|
||||
Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOV:
|
||||
{
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i];
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::SLT:
|
||||
case OpCode::Id::SLTI:
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
||||
case OpCode::Id::CMP:
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// TODO: Can you restrict to one compare via dest masking?
|
||||
|
||||
|
@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) {
|
|||
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
|
||||
|
||||
switch (op) {
|
||||
case compare_op.Equal:
|
||||
case Instruction::Common::CompareOpType::Equal:
|
||||
state.conditional_code[i] = (src1[i] == src2[i]);
|
||||
break;
|
||||
|
||||
case compare_op.NotEqual:
|
||||
case Instruction::Common::CompareOpType::NotEqual:
|
||||
state.conditional_code[i] = (src1[i] != src2[i]);
|
||||
break;
|
||||
|
||||
case compare_op.LessThan:
|
||||
case Instruction::Common::CompareOpType::LessThan:
|
||||
state.conditional_code[i] = (src1[i] < src2[i]);
|
||||
break;
|
||||
|
||||
case compare_op.LessEqual:
|
||||
case Instruction::Common::CompareOpType::LessEqual:
|
||||
state.conditional_code[i] = (src1[i] <= src2[i]);
|
||||
break;
|
||||
|
||||
case compare_op.GreaterThan:
|
||||
case Instruction::Common::CompareOpType::GreaterThan:
|
||||
state.conditional_code[i] = (src1[i] > src2[i]);
|
||||
break;
|
||||
|
||||
case compare_op.GreaterEqual:
|
||||
case Instruction::Common::CompareOpType::GreaterEqual:
|
||||
state.conditional_code[i] = (src1[i] >= src2[i]);
|
||||
break;
|
||||
|
||||
|
@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) {
|
|||
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::SRC3>(state.debug, iteration, src3);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = src1[i] * src2[i] + src3[i];
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
|
@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) {
|
|||
|
||||
default:
|
||||
{
|
||||
static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
|
||||
static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
|
||||
bool results[2] = { refx == state.conditional_code[0],
|
||||
refy == state.conditional_code[1] };
|
||||
|
||||
|
@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) {
|
|||
break;
|
||||
|
||||
case OpCode::Id::JMPC:
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
state.program_counter = instr.flow_control.dest_offset - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case OpCode::Id::JMPU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||
state.program_counter = instr.flow_control.dest_offset - 1;
|
||||
}
|
||||
|
@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) {
|
|||
break;
|
||||
|
||||
case OpCode::Id::CALLU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
|
@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) {
|
|||
break;
|
||||
|
||||
case OpCode::Id::CALLC:
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
|
@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) {
|
|||
break;
|
||||
|
||||
case OpCode::Id::IFU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||
call(state,
|
||||
state.program_counter + 1,
|
||||
|
@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) {
|
|||
{
|
||||
// TODO: Do we need to consider swizzlers here?
|
||||
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
call(state,
|
||||
state.program_counter + 1,
|
||||
|
@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) {
|
|||
|
||||
case OpCode::Id::LOOP:
|
||||
{
|
||||
state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y;
|
||||
Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].y,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].z,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].w);
|
||||
state.address_registers[2] = loop_param.y;
|
||||
|
||||
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
|
||||
call(state,
|
||||
state.program_counter + 1,
|
||||
instr.flow_control.dest_offset - state.program_counter + 1,
|
||||
instr.flow_control.dest_offset + 1,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].x,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].z);
|
||||
loop_param.x,
|
||||
loop_param.z);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) {
|
|||
}
|
||||
|
||||
++state.program_counter;
|
||||
|
||||
if (exit_loop)
|
||||
break;
|
||||
++iteration;
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void RunInterpreter(UnitState<false>& state);
|
||||
template void RunInterpreter(UnitState<true>& state);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -12,7 +12,8 @@ namespace Pica {
|
|||
|
||||
namespace Shader {
|
||||
|
||||
void RunInterpreter(UnitState& state);
|
||||
template<bool Debug>
|
||||
void RunInterpreter(UnitState<Debug>& state);
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
|
@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
|
|||
src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
|
||||
} else {
|
||||
src_ptr = REGISTERS;
|
||||
src_offset = UnitState::InputOffset(src_reg);
|
||||
src_offset = UnitState<false>::InputOffset(src_reg);
|
||||
}
|
||||
|
||||
unsigned operand_desc_id;
|
||||
|
@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||
// If all components are enabled, write the result to the destination register
|
||||
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
||||
// Store dest back to memory
|
||||
MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src);
|
||||
MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src);
|
||||
|
||||
} else {
|
||||
// Not all components are enabled, so mask the result when storing to the destination register...
|
||||
MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest)));
|
||||
MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)));
|
||||
|
||||
if (Common::GetCPUCaps().sse4_1) {
|
||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||
|
@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||
}
|
||||
|
||||
// Store dest back to memory
|
||||
MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH);
|
||||
MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue