mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
Merge pull request #328 from Subv/constbuffers
GPU: Upload the shader Constant Buffers as SSBOs to the GPU
This commit is contained in:
commit
bb0c3fc828
8 changed files with 104 additions and 16 deletions
|
@ -301,5 +301,26 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||||
return regs.reg_array[method];
|
return regs.reg_array[method];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
|
||||||
|
// The Vertex stage is always enabled.
|
||||||
|
if (stage == Regs::ShaderStage::Vertex)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
switch (stage) {
|
||||||
|
case Regs::ShaderStage::TesselationControl:
|
||||||
|
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
|
||||||
|
.enable != 0;
|
||||||
|
case Regs::ShaderStage::TesselationEval:
|
||||||
|
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
|
||||||
|
.enable != 0;
|
||||||
|
case Regs::ShaderStage::Geometry:
|
||||||
|
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
|
||||||
|
case Regs::ShaderStage::Fragment:
|
||||||
|
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Engines
|
} // namespace Engines
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
|
@ -518,6 +518,9 @@ public:
|
||||||
/// Returns a list of enabled textures for the specified shader stage.
|
/// Returns a list of enabled textures for the specified shader stage.
|
||||||
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
|
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
|
||||||
|
|
||||||
|
/// Returns whether the specified shader stage is enabled or not.
|
||||||
|
bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
|
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,14 @@ RasterizerOpenGL::RasterizerOpenGL() {
|
||||||
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
|
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create SSBOs
|
||||||
|
for (size_t stage = 0; stage < ssbos.size(); ++stage) {
|
||||||
|
for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
|
||||||
|
ssbos[stage][buffer].Create();
|
||||||
|
state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GLint ext_num;
|
GLint ext_num;
|
||||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
|
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
|
||||||
for (GLint i = 0; i < ext_num; i++) {
|
for (GLint i = 0; i < ext_num; i++) {
|
||||||
|
@ -191,8 +199,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
|
||||||
auto& shader_config = gpu.regs.shader_config[index];
|
auto& shader_config = gpu.regs.shader_config[index];
|
||||||
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
|
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||||
|
|
||||||
// VertexB program is always enabled, despite bit setting
|
const auto& stage = index - 1; // Stage indices are 0 - 5
|
||||||
const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB};
|
|
||||||
|
const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
|
||||||
|
|
||||||
// Skip stages that are not enabled
|
// Skip stages that are not enabled
|
||||||
if (!is_enabled) {
|
if (!is_enabled) {
|
||||||
|
@ -200,7 +209,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upload uniform data as one UBO per stage
|
// Upload uniform data as one UBO per stage
|
||||||
const auto& stage = index - 1; // Stage indices are 0 - 5
|
|
||||||
const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
|
const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
|
||||||
copy_buffer(uniform_buffers[stage].handle, ubo_offset,
|
copy_buffer(uniform_buffers[stage].handle, ubo_offset,
|
||||||
sizeof(GLShader::MaxwellUniformData));
|
sizeof(GLShader::MaxwellUniformData));
|
||||||
|
@ -298,6 +306,8 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
|
|
||||||
// Sync and bind the texture surfaces
|
// Sync and bind the texture surfaces
|
||||||
BindTextures();
|
BindTextures();
|
||||||
|
// Configure the constant buffer objects
|
||||||
|
SetupConstBuffers();
|
||||||
|
|
||||||
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
|
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
|
||||||
// scissor test to prevent drawing outside of the framebuffer region
|
// scissor test to prevent drawing outside of the framebuffer region
|
||||||
|
@ -380,7 +390,7 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
|
|
||||||
void RasterizerOpenGL::BindTextures() {
|
void RasterizerOpenGL::BindTextures() {
|
||||||
using Regs = Tegra::Engines::Maxwell3D::Regs;
|
using Regs = Tegra::Engines::Maxwell3D::Regs;
|
||||||
auto maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
|
auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
|
||||||
|
|
||||||
// Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
|
// Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
|
||||||
// certain number in OpenGL. We try to only use the minimum amount of host textures by not
|
// certain number in OpenGL. We try to only use the minimum amount of host textures by not
|
||||||
|
@ -527,6 +537,41 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SetupConstBuffers() {
|
||||||
|
using Regs = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
auto& gpu = Core::System::GetInstance().GPU();
|
||||||
|
auto& maxwell3d = gpu.Get3DEngine();
|
||||||
|
|
||||||
|
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
|
||||||
|
u32 current_bindpoint = 0;
|
||||||
|
for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
|
||||||
|
auto& shader_stage = maxwell3d.state.shader_stages[stage];
|
||||||
|
bool stage_enabled = maxwell3d.IsShaderStageEnabled(static_cast<Regs::ShaderStage>(stage));
|
||||||
|
|
||||||
|
for (u32 buffer_id = 0; buffer_id < Regs::MaxConstBuffers; ++buffer_id) {
|
||||||
|
const auto& buffer = shader_stage.const_buffers[buffer_id];
|
||||||
|
|
||||||
|
state.draw.const_buffers[stage][buffer_id].enabled = buffer.enabled && stage_enabled;
|
||||||
|
|
||||||
|
if (buffer.enabled && stage_enabled) {
|
||||||
|
state.draw.const_buffers[stage][buffer_id].bindpoint = current_bindpoint;
|
||||||
|
current_bindpoint++;
|
||||||
|
|
||||||
|
VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address);
|
||||||
|
const u8* data = Memory::GetPointer(addr);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER,
|
||||||
|
state.draw.const_buffers[stage][buffer_id].ssbo);
|
||||||
|
glBufferData(GL_SHADER_STORAGE_BUFFER, buffer.size, data, GL_DYNAMIC_DRAW);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||||
|
} else {
|
||||||
|
state.draw.const_buffers[stage][buffer_id].bindpoint = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state.Apply();
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
|
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
|
||||||
const Surface& depth_surface, bool has_stencil) {
|
const Surface& depth_surface, bool has_stencil) {
|
||||||
state.draw.draw_framebuffer = framebuffer.handle;
|
state.draw.draw_framebuffer = framebuffer.handle;
|
||||||
|
|
|
@ -87,6 +87,9 @@ private:
|
||||||
/// Binds the required textures to OpenGL before drawing a batch.
|
/// Binds the required textures to OpenGL before drawing a batch.
|
||||||
void BindTextures();
|
void BindTextures();
|
||||||
|
|
||||||
|
/// Configures the current constbuffers to use for the draw command.
|
||||||
|
void SetupConstBuffers();
|
||||||
|
|
||||||
/// Syncs the viewport to match the guest state
|
/// Syncs the viewport to match the guest state
|
||||||
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
|
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
|
||||||
|
|
||||||
|
@ -129,6 +132,10 @@ private:
|
||||||
std::array<bool, 16> hw_vao_enabled_attributes;
|
std::array<bool, 16> hw_vao_enabled_attributes;
|
||||||
|
|
||||||
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
|
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
|
||||||
|
std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
|
||||||
|
ssbos;
|
||||||
|
|
||||||
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
std::unique_ptr<OGLStreamBuffer> vertex_buffer;
|
std::unique_ptr<OGLStreamBuffer> vertex_buffer;
|
||||||
OGLBuffer uniform_buffer;
|
OGLBuffer uniform_buffer;
|
||||||
|
|
|
@ -53,13 +53,6 @@ void SetShaderSamplerBindings(GLuint shader) {
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
|
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {}
|
||||||
const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
|
|
||||||
for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) {
|
|
||||||
const auto& const_buffer = shader_stage.const_buffers[index];
|
|
||||||
const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address);
|
|
||||||
Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace GLShader
|
} // namespace GLShader
|
||||||
|
|
|
@ -30,11 +30,10 @@ void SetShaderSamplerBindings(GLuint shader);
|
||||||
// Not following that rule will cause problems on some AMD drivers.
|
// Not following that rule will cause problems on some AMD drivers.
|
||||||
struct MaxwellUniformData {
|
struct MaxwellUniformData {
|
||||||
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
|
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
|
||||||
|
// TODO(Subv): Use this for something.
|
||||||
using ConstBuffer = std::array<GLvec4, 4>;
|
|
||||||
alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers;
|
|
||||||
};
|
};
|
||||||
static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect");
|
// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is
|
||||||
|
// incorrect");
|
||||||
static_assert(sizeof(MaxwellUniformData) < 16384,
|
static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||||
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||||
|
|
||||||
|
|
|
@ -202,6 +202,20 @@ void OpenGLState::Apply() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Constbuffers
|
||||||
|
for (u32 stage = 0; stage < draw.const_buffers.size(); ++stage) {
|
||||||
|
for (u32 buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
|
||||||
|
auto& current = cur_state.draw.const_buffers[stage][buffer_id];
|
||||||
|
auto& new_state = draw.const_buffers[stage][buffer_id];
|
||||||
|
if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
|
||||||
|
current.ssbo != new_state.ssbo) {
|
||||||
|
if (new_state.enabled) {
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Lighting LUTs
|
// Lighting LUTs
|
||||||
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
|
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
|
||||||
glActiveTexture(TextureUnits::LightingLUT.Enum());
|
glActiveTexture(TextureUnits::LightingLUT.Enum());
|
||||||
|
|
|
@ -123,6 +123,12 @@ public:
|
||||||
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
|
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
|
||||||
GLuint shader_program; // GL_CURRENT_PROGRAM
|
GLuint shader_program; // GL_CURRENT_PROGRAM
|
||||||
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
|
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
|
||||||
|
struct ConstBufferConfig {
|
||||||
|
bool enabled;
|
||||||
|
GLuint bindpoint;
|
||||||
|
GLuint ssbo;
|
||||||
|
};
|
||||||
|
std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
|
||||||
} draw;
|
} draw;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
Loading…
Reference in a new issue