mirror of
https://github.com/Lime3DS/Lime3DS
synced 2024-12-28 01:52:29 -06:00
VideoCore/Shader: Split interpreter and JIT into separate ShaderEngines
This commit is contained in:
parent
8eefc62833
commit
114d6b2f97
8 changed files with 160 additions and 104 deletions
|
@ -50,9 +50,11 @@ set(HEADERS
|
||||||
|
|
||||||
if(ARCHITECTURE_x86_64)
|
if(ARCHITECTURE_x86_64)
|
||||||
set(SRCS ${SRCS}
|
set(SRCS ${SRCS}
|
||||||
|
shader/shader_jit_x64.cpp
|
||||||
shader/shader_jit_x64_compiler.cpp)
|
shader/shader_jit_x64_compiler.cpp)
|
||||||
|
|
||||||
set(HEADERS ${HEADERS}
|
set(HEADERS ${HEADERS}
|
||||||
|
shader/shader_jit_x64.h
|
||||||
shader/shader_jit_x64_compiler.h)
|
shader/shader_jit_x64_compiler.h)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -499,7 +499,7 @@ void Init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shutdown() {
|
void Shutdown() {
|
||||||
Shader::ClearCache();
|
Shader::Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -2,14 +2,8 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <unordered_map>
|
|
||||||
#include <utility>
|
|
||||||
#include <boost/range/algorithm/fill.hpp>
|
|
||||||
#include "common/bit_field.h"
|
|
||||||
#include "common/hash.h"
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/pica.h"
|
#include "video_core/pica.h"
|
||||||
|
@ -17,7 +11,7 @@
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
#include "video_core/shader/shader_interpreter.h"
|
#include "video_core/shader/shader_interpreter.h"
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
#include "video_core/shader/shader_jit_x64_compiler.h"
|
#include "video_core/shader/shader_jit_x64.h"
|
||||||
#endif // ARCHITECTURE_x86_64
|
#endif // ARCHITECTURE_x86_64
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
|
@ -87,91 +81,31 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) {
|
||||||
conditional_code[1] = false;
|
conditional_code[1] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
class MergedShaderEngine : public ShaderEngine {
|
|
||||||
public:
|
|
||||||
void SetupBatch(const ShaderSetup* setup) override;
|
|
||||||
void Run(UnitState& state, unsigned int entry_point) const override;
|
|
||||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
|
||||||
unsigned int entry_point) const override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
const ShaderSetup* setup = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
|
|
||||||
static const JitShader* jit_shader;
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
|
|
||||||
void ClearCache() {
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
shader_map.clear();
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
}
|
|
||||||
|
|
||||||
void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) {
|
|
||||||
setup = setup_;
|
|
||||||
if (setup == nullptr)
|
|
||||||
return;
|
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
if (VideoCore::g_shader_jit_enabled) {
|
|
||||||
u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
|
|
||||||
u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
|
|
||||||
|
|
||||||
u64 cache_key = code_hash ^ swizzle_hash;
|
|
||||||
auto iter = shader_map.find(cache_key);
|
|
||||||
if (iter != shader_map.end()) {
|
|
||||||
jit_shader = iter->second.get();
|
|
||||||
} else {
|
|
||||||
auto shader = std::make_unique<JitShader>();
|
|
||||||
shader->Compile();
|
|
||||||
jit_shader = shader.get();
|
|
||||||
shader_map[cache_key] = std::move(shader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
}
|
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
||||||
|
|
||||||
void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const {
|
|
||||||
ASSERT(setup != nullptr);
|
|
||||||
ASSERT(entry_point < 1024);
|
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(GPU_Shader);
|
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
if (VideoCore::g_shader_jit_enabled) {
|
static std::unique_ptr<JitX64Engine> jit_engine;
|
||||||
jit_shader->Run(*setup, state, entry_point);
|
|
||||||
} else {
|
|
||||||
DebugData<false> dummy_debug_data;
|
|
||||||
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
DebugData<false> dummy_debug_data;
|
|
||||||
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
#endif // ARCHITECTURE_x86_64
|
||||||
}
|
static InterpreterEngine interpreter_engine;
|
||||||
|
|
||||||
DebugData<true> MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
|
||||||
unsigned int entry_point) const {
|
|
||||||
ASSERT(setup != nullptr);
|
|
||||||
ASSERT(entry_point < 1024);
|
|
||||||
|
|
||||||
UnitState state;
|
|
||||||
DebugData<true> debug_data;
|
|
||||||
|
|
||||||
// Setup input register table
|
|
||||||
boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
|
|
||||||
state.LoadInputVertex(input, num_attributes);
|
|
||||||
RunInterpreter(*setup, state, debug_data, entry_point);
|
|
||||||
return debug_data;
|
|
||||||
}
|
|
||||||
|
|
||||||
ShaderEngine* GetEngine() {
|
ShaderEngine* GetEngine() {
|
||||||
static MergedShaderEngine merged_engine;
|
#ifdef ARCHITECTURE_x86_64
|
||||||
return &merged_engine;
|
// TODO(yuriks): Re-initialize on each change rather than being persistent
|
||||||
|
if (VideoCore::g_shader_jit_enabled) {
|
||||||
|
if (jit_engine == nullptr) {
|
||||||
|
jit_engine = std::make_unique<JitX64Engine>();
|
||||||
|
}
|
||||||
|
return jit_engine.get();
|
||||||
|
}
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
|
||||||
|
return &interpreter_engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Shutdown() {
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
jit_engine = nullptr;
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <nihstro/shader_bytecode.h>
|
#include <nihstro/shader_bytecode.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
@ -152,9 +151,6 @@ struct UnitState {
|
||||||
void LoadInputVertex(const InputVertex& input, int num_attributes);
|
void LoadInputVertex(const InputVertex& input, int num_attributes);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Clears the shader cache
|
|
||||||
void ClearCache();
|
|
||||||
|
|
||||||
struct ShaderSetup {
|
struct ShaderSetup {
|
||||||
struct {
|
struct {
|
||||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||||
|
@ -210,6 +206,7 @@ public:
|
||||||
|
|
||||||
// TODO(yuriks): Remove and make it non-global state somewhere
|
// TODO(yuriks): Remove and make it non-global state somewhere
|
||||||
ShaderEngine* GetEngine();
|
ShaderEngine* GetEngine();
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,12 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
#include <boost/range/algorithm/fill.hpp>
|
||||||
#include <nihstro/shader_bytecode.h>
|
#include <nihstro/shader_bytecode.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "common/microprofile.h"
|
||||||
#include "common/vector_math.h"
|
#include "common/vector_math.h"
|
||||||
#include "video_core/pica_state.h"
|
#include "video_core/pica_state.h"
|
||||||
#include "video_core/pica_types.h"
|
#include "video_core/pica_types.h"
|
||||||
|
@ -37,8 +39,8 @@ struct CallStackElement {
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool Debug>
|
template <bool Debug>
|
||||||
void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
|
static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
|
||||||
unsigned offset) {
|
unsigned offset) {
|
||||||
// TODO: Is there a maximal size for this?
|
// TODO: Is there a maximal size for this?
|
||||||
boost::container::static_vector<CallStackElement, 16> call_stack;
|
boost::container::static_vector<CallStackElement, 16> call_stack;
|
||||||
u32 program_counter = offset;
|
u32 program_counter = offset;
|
||||||
|
@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Explicit instantiation
|
void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) {
|
||||||
template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset);
|
setup = setup_;
|
||||||
template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset);
|
}
|
||||||
|
|
||||||
|
MICROPROFILE_DECLARE(GPU_Shader);
|
||||||
|
|
||||||
|
void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const {
|
||||||
|
ASSERT(setup != nullptr);
|
||||||
|
ASSERT(entry_point < 1024);
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(GPU_Shader);
|
||||||
|
|
||||||
|
DebugData<false> dummy_debug_data;
|
||||||
|
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugData<true> InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||||
|
unsigned int entry_point) const {
|
||||||
|
ASSERT(setup != nullptr);
|
||||||
|
ASSERT(entry_point < 1024);
|
||||||
|
|
||||||
|
UnitState state;
|
||||||
|
DebugData<true> debug_data;
|
||||||
|
|
||||||
|
// Setup input register table
|
||||||
|
boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
|
||||||
|
state.LoadInputVertex(input, num_attributes);
|
||||||
|
RunInterpreter(*setup, state, debug_data, entry_point);
|
||||||
|
return debug_data;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
@ -4,19 +4,22 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct ShaderSetup;
|
class InterpreterEngine final : public ShaderEngine {
|
||||||
struct UnitState;
|
public:
|
||||||
|
void SetupBatch(const ShaderSetup* setup) override;
|
||||||
|
void Run(UnitState& state, unsigned int entry_point) const override;
|
||||||
|
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||||
|
unsigned int entry_point) const override;
|
||||||
|
|
||||||
template <bool Debug>
|
private:
|
||||||
struct DebugData;
|
const ShaderSetup* setup = nullptr;
|
||||||
|
};
|
||||||
template <bool Debug>
|
|
||||||
void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
|
|
||||||
unsigned offset);
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
56
src/video_core/shader/shader_jit_x64.cpp
Normal file
56
src/video_core/shader/shader_jit_x64.cpp
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
// Copyright 2016 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/hash.h"
|
||||||
|
#include "common/microprofile.h"
|
||||||
|
#include "video_core/shader/shader.h"
|
||||||
|
#include "video_core/shader/shader_jit_x64.h"
|
||||||
|
#include "video_core/shader/shader_jit_x64_compiler.h"
|
||||||
|
|
||||||
|
namespace Pica {
|
||||||
|
namespace Shader {
|
||||||
|
|
||||||
|
JitX64Engine::JitX64Engine() = default;
|
||||||
|
JitX64Engine::~JitX64Engine() = default;
|
||||||
|
|
||||||
|
void JitX64Engine::SetupBatch(const ShaderSetup* setup_) {
|
||||||
|
cached_shader = nullptr;
|
||||||
|
setup = setup_;
|
||||||
|
if (setup == nullptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
|
||||||
|
u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
|
||||||
|
|
||||||
|
u64 cache_key = code_hash ^ swizzle_hash;
|
||||||
|
auto iter = cache.find(cache_key);
|
||||||
|
if (iter != cache.end()) {
|
||||||
|
cached_shader = iter->second.get();
|
||||||
|
} else {
|
||||||
|
auto shader = std::make_unique<JitShader>();
|
||||||
|
shader->Compile();
|
||||||
|
cached_shader = shader.get();
|
||||||
|
cache.emplace_hint(iter, cache_key, std::move(shader));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MICROPROFILE_DECLARE(GPU_Shader);
|
||||||
|
|
||||||
|
void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const {
|
||||||
|
ASSERT(setup != nullptr);
|
||||||
|
ASSERT(cached_shader != nullptr);
|
||||||
|
ASSERT(entry_point < 1024);
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(GPU_Shader);
|
||||||
|
|
||||||
|
cached_shader->Run(*setup, state, entry_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugData<true> JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||||
|
unsigned int entry_point) const {
|
||||||
|
UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT.");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader
|
||||||
|
} // namespace Pica
|
35
src/video_core/shader/shader_jit_x64.h
Normal file
35
src/video_core/shader/shader_jit_x64.h
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
// Copyright 2016 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
|
namespace Pica {
|
||||||
|
namespace Shader {
|
||||||
|
|
||||||
|
class JitShader;
|
||||||
|
|
||||||
|
class JitX64Engine final : public ShaderEngine {
|
||||||
|
public:
|
||||||
|
JitX64Engine();
|
||||||
|
~JitX64Engine() override;
|
||||||
|
|
||||||
|
void SetupBatch(const ShaderSetup* setup) override;
|
||||||
|
void Run(UnitState& state, unsigned int entry_point) const override;
|
||||||
|
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||||
|
unsigned int entry_point) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const ShaderSetup* setup = nullptr;
|
||||||
|
|
||||||
|
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
|
||||||
|
const JitShader* cached_shader = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Shader
|
||||||
|
} // namespace Pica
|
Loading…
Reference in a new issue