From ec85648af3316d5e43c7b57fca55d0dad3d03f96 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 25 Sep 2019 21:46:34 -0300 Subject: [PATCH] gl_shader_disk_cache: Store and load fast BRX --- .../engines/const_buffer_engine_interface.h | 35 ++-- .../renderer_opengl/gl_shader_cache.cpp | 19 +- .../renderer_opengl/gl_shader_cache.h | 7 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 183 +++++++++++++++--- .../renderer_opengl/gl_shader_disk_cache.h | 12 +- src/video_core/shader/const_buffer_locker.cpp | 4 +- 6 files changed, 210 insertions(+), 50 deletions(-) diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index c0e3a3a173..80f470777d 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -4,6 +4,7 @@ #pragma once +#include #include "common/bit_field.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -29,51 +30,49 @@ struct SamplerDescriptor { u32 raw{}; }; + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { - SamplerDescriptor result{}; + SamplerDescriptor result; switch (tic_texture_type) { - case Tegra::Texture::TextureType::Texture1D: { + case Tegra::Texture::TextureType::Texture1D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture2D: { + case Tegra::Texture::TextureType::Texture2D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture3D: { + case Tegra::Texture::TextureType::Texture3D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::TextureCubemap: { + case Tegra::Texture::TextureType::TextureCubemap: result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture1DArray: { + case Tegra::Texture::TextureType::Texture1DArray: result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture2DArray: { + case Tegra::Texture::TextureType::Texture2DArray: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } case Tegra::Texture::TextureType::Texture1DBuffer: { result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(0); @@ -81,30 +80,28 @@ struct SamplerDescriptor { result.is_shadow.Assign(0); return result; } - case Tegra::Texture::TextureType::Texture2DNoMipmap: { + case Tegra::Texture::TextureType::Texture2DNoMipmap: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::TextureCubeArray: { + case Tegra::Texture::TextureType::TextureCubeArray: result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - default: { + default: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; } - } } }; +static_assert(std::is_trivially_copyable_v); class ConstBufferEngineInterface { public: diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6402d67632..7e7aea15f3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -392,7 +392,7 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, variant, locker); - disk_cache.SaveUsage(GetUsage(variant)); + disk_cache.SaveUsage(GetUsage(variant, locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -408,10 +408,14 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { +ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, + const ConstBufferLocker& locker) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; usage.variant = variant; + usage.keys = locker.GetKeys(); + usage.bound_samplers = locker.GetBoundSamplers(); + usage.bindless_samplers = locker.GetBindlessSamplers(); return usage; } @@ -472,6 +476,17 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } if (!shader) { ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); + for (const auto& key : usage.keys) { + const auto [buffer, offset] = key.first; + locker.InsertKey(buffer, offset, key.second); + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + locker.InsertBoundSampler(offset, sampler); + } + for (const auto& [key, sampler] : usage.bindless_samplers) { + const auto [buffer, offset] = key; + locker.InsertBindlessSampler(buffer, offset, sampler); + } shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, unspecialized.code, unspecialized.code_b, usage.variant, locker, true); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 700a838533..2935e68312 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -31,6 +31,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class ConstBufferLocker; +} + namespace OpenGL { class CachedShader; @@ -92,7 +96,8 @@ private: GLShader::ShaderEntries entries, ProgramCode program_code, ProgramCode program_code_b); - ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, + const VideoCommon::Shader::ConstBufferLocker& locker) const; Core::System& system; ShaderDiskCacheOpenGL& disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ddc19dccde..184a565e60 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -22,6 +22,29 @@ namespace OpenGL { +using VideoCommon::Shader::BindlessSamplerMap; +using VideoCommon::Shader::BoundSamplerMap; +using VideoCommon::Shader::KeyMap; + +namespace { + +struct ConstBufferKey { + u32 cbuf; + u32 offset; + u32 value; +}; + +struct BoundSamplerKey { + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerKey { + u32 cbuf; + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + using ShaderCacheVersionHash = std::array; enum class TransferableEntryKind : u32 { @@ -33,9 +56,6 @@ constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ShaderDiskCacheUsage) == 40); - -namespace { ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -121,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, - "Failed to get transferable cache version for title id={} - skipping", + "Failed to get transferable cache version for title id={}, skipping", GetTitleID()); return {}; } if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); + LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); file.Close(); InvalidateTransferable(); is_usable = true; @@ -135,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() { } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator - skipping"); + "of the emulator, skipping"); return {}; } // Version is valid, load the shaders + constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; std::vector raws; std::vector usages; while (file.Tell() < file.GetSize()) { TransferableEntryKind kind{}; if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { - LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); + LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); return {}; } @@ -153,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { case TransferableEntryKind::Raw: { ShaderDiskCacheRaw entry; if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); + LOG_ERROR(Render_OpenGL, error_loading); return {}; } transferable.insert({entry.GetUniqueIdentifier(), {}}); @@ -161,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() { break; } case TransferableEntryKind::Usage: { - ShaderDiskCacheUsage usage{}; - if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); + ShaderDiskCacheUsage usage; + + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + if (file.ReadArray(&usage.unique_identifier, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_bindless_samplers, 1) != 1) { + LOG_ERROR(Render_OpenGL, error_loading); return {}; } + + std::vector keys(num_keys); + std::vector bound_samplers(num_bound_samplers); + std::vector bindless_samplers(num_bindless_samplers); + if (file.ReadArray(keys.data(), keys.size()) != keys.size() || + file.ReadArray(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + LOG_ERROR(Render_OpenGL, error_loading); + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } + usages.push_back(std::move(usage)); break; } default: - LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", + LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", static_cast(kind)); return {}; } @@ -197,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { const auto result = LoadPrecompiledFile(file); if (!result) { LOG_INFO(Render_OpenGL, - "Failed to load precompiled cache for game with title id={} - removing", + "Failed to load precompiled cache for game with title id={}, removing", GetTitleID()); file.Close(); InvalidatePrecompiled(); @@ -228,10 +278,35 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { + if (!LoadObjectFromPrecompiled(usage.unique_identifier) || + !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(num_bound_samplers) || + !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; } + std::vector keys(num_keys); + std::vector bound_samplers(num_bound_samplers); + std::vector bindless_samplers(num_bindless_samplers); + if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || + !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } ShaderDiskCacheDump dump; if (!LoadObjectFromPrecompiled(dump.binary_format)) { @@ -248,7 +323,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dumps.emplace(usage, dump); + dumps.emplace(std::move(usage), dump); } return dumps; } @@ -282,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { } FileUtil::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) + if (!file.IsOpen()) { return; + } if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); + LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); file.Close(); InvalidateTransferable(); return; @@ -311,13 +387,40 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { FileUtil::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) return; - - if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { - LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); file.Close(); InvalidateTransferable(); + }; + + if (file.WriteObject(TransferableEntryKind::Usage) != 1 || + file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(static_cast(usage.keys.size())) != 1 || + file.WriteObject(static_cast(usage.bound_samplers.size())) != 1 || + file.WriteObject(static_cast(usage.bindless_samplers.size())) != 1) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } } void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { @@ -339,15 +442,45 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(usage) || - !SaveObjectToPrecompiled(static_cast(binary_format)) || - !SaveObjectToPrecompiled(static_cast(binary_length)) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", usage.unique_identifier); InvalidatePrecompiled(); + }; + + if (!SaveObjectToPrecompiled(usage.unique_identifier) || + !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(static_cast(usage.keys.size())) || + !SaveObjectToPrecompiled(static_cast(usage.bound_samplers.size())) || + !SaveObjectToPrecompiled(static_cast(usage.bindless_samplers.size()))) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } + if (!SaveObjectToPrecompiled(static_cast(binary_format)) || + !SaveObjectToPrecompiled(static_cast(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { + Close(); + } } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 61b46d728d..db23ada93a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/shader/const_buffer_locker.h" namespace Core { class System; @@ -53,6 +55,7 @@ struct BaseBindings { return !operator==(rhs); } }; +static_assert(std::is_trivially_copyable_v); /// Describes the different variants a single program can be compiled. struct ProgramVariant { @@ -70,13 +73,20 @@ struct ProgramVariant { } }; +static_assert(std::is_trivially_copyable_v); + /// Describes how a shader is used. struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + VideoCommon::Shader::KeyMap keys; + VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::BindlessSamplerMap bindless_samplers; bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); + return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == + std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, + rhs.bindless_samplers); } bool operator!=(const ShaderDiskCacheUsage& rhs) const { diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index ebeba102d0..fda9e3c384 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -90,14 +90,14 @@ bool ConstBufferLocker::IsConsistent() const { [this](const auto& sampler) { const auto [key, value] = sampler; const auto other_value = engine->AccessBoundSampler(stage, key); - return value.raw == other_value.raw; + return value == other_value; }) && std::all_of( bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { const auto [cbuf, offset] = sampler.first; const auto value = sampler.second; const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); - return value.raw == other_value.raw; + return value == other_value; }); }