From 4539073ce1d8fd6df03263e826d3805b4909e055 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 30 Jul 2020 15:41:11 -0400 Subject: [PATCH] Address feedback. Bruteforce delete duplicates --- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 ++- .../renderer_vulkan/vk_pipeline_cache.h | 19 ++- .../renderer_vulkan/vk_rasterizer.cpp | 18 +-- .../renderer_vulkan/vk_rasterizer.h | 2 +- src/video_core/shader/async_shaders.cpp | 133 ++++++++++-------- src/video_core/shader/async_shaders.h | 4 +- 7 files changed, 115 insertions(+), 79 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 39c73a139f..d50bd347c1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -54,7 +54,7 @@ public: return renderpass; } - const GraphicsPipelineCacheKey& GetCacheKey() { + const GraphicsPipelineCacheKey& GetCacheKey() const { return m_key; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 45d4dcb8c5..a7ce621cae 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -205,20 +205,20 @@ std::array VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline( +VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { - return *last_graphics_pipeline; + return last_graphics_pipeline; } last_graphics_key = key; if (device.UseAsynchronousShaders()) { auto work = async_shaders.GetCompletedWork(); - for (std::size_t i = 0; i < work.size(); ++i) { - auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey()); - entry = std::move(work[i].pipeline); + for (auto& w : work) { + auto& entry = graphics_cache.at(w.pipeline->GetCacheKey()); + entry = std::move(w.pipeline); } const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); if (is_cache_miss) { @@ -227,7 +227,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline( async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params, key.padding, key.shaders, key.fixed_state); } - return *(last_graphics_pipeline = graphics_cache.at(key).get()); + last_graphics_pipeline = graphics_cache.at(key).get(); + return last_graphics_pipeline; } const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); @@ -239,7 +240,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline( update_descriptor_queue, renderpass_cache, key, bindings, program); } - return *(last_graphics_pipeline = entry.get()); + last_graphics_pipeline = entry.get(); + return last_graphics_pipeline; } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c70da6da40..404f2b3f78 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -153,31 +153,46 @@ public: std::array GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - const VKDevice& GetDevice() { + const VKDevice& GetDevice() const { return device; } VKScheduler& GetScheduler() { return scheduler; } + const VKScheduler& GetScheduler() const { + return scheduler; + } VKDescriptorPool& GetDescriptorPool() { return descriptor_pool; } + const VKDescriptorPool& GetDescriptorPool() const { + return descriptor_pool; + } + VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() { return update_descriptor_queue; } + const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const { + return update_descriptor_queue; + } + VKRenderPassCache& GetRenderpassCache() { return renderpass_cache; } + const VKRenderPassCache& GetRenderpassCache() const { + return renderpass_cache; + } + protected: void OnShaderRemoval(Shader* shader) final; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6310e898ca..fc1b51a96a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -404,10 +404,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { + // The following is subject to move into the allocate workers method, to be api agnostic + // Max worker threads we should allow - constexpr auto MAX_THREADS = 2u; + constexpr u32 MAX_THREADS = 4; // Amount of threads we should reserve for other parts of yuzu - constexpr auto RESERVED_THREADS = 6u; + constexpr u32 RESERVED_THREADS = 6; // Get the amount of threads we can use(this can return zero) const auto cpu_thread_count = std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); @@ -456,16 +458,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); - if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) { + auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. system.GPU().TickWork(); return; } - scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - const auto renderpass = pipeline.GetRenderPass(); + const auto renderpass = pipeline->GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); scheduler.RequestRenderpass(renderpass, framebuffer, render_area); @@ -475,8 +477,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - const auto pipeline_layout = pipeline.GetLayout(); - const auto descriptor_set = pipeline.CommitDescriptorSet(); + const auto pipeline_layout = pipeline->GetLayout(); + const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 27604b9a3c..f640ba649d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -287,7 +287,6 @@ private: VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; - VideoCommon::Shader::AsyncShaders async_shaders; VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; @@ -307,6 +306,7 @@ private: vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; vk::Event wfi_event; + VideoCommon::Shader::AsyncShaders async_shaders; std::array color_attachments; View zeta_attachment; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 335a0d05b3..c536b025b8 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -111,20 +111,19 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr) { - WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM - : AsyncShaders::Backend::OpenGL, - &device, - shader_type, - uid, - std::move(code), - std::move(code_b), - main_offset, - compiler_settings, - ®istry, - cpu_addr}; - + auto p = std::make_unique(); + p->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL; + p->device = &device; + p->shader_type = shader_type; + p->uid = uid; + p->code = std::move(code); + p->code_b = std::move(code_b); + p->main_offset = main_offset; + p->compiler_settings = compiler_settings; + p->registry = ®istry; + p->cpu_address = cpu_addr; std::unique_lock lock(queue_mutex); - pending_queue.push_back(std::move(params)); + pending_queue.push(std::move(p)); cv.notify_one(); } @@ -134,19 +133,19 @@ void AsyncShaders::QueueVulkanShader( std::array shaders, Vulkan::FixedPipelineState fixed_state) { - WorkerParams params{ - .backend = AsyncShaders::Backend::Vulkan, - .pp_cache = pp_cache, - .bindings = bindings, - .program = program, - .renderpass_params = renderpass_params, - .padding = padding, - .shaders = shaders, - .fixed_state = fixed_state, - }; + auto p = std::make_unique(); + + p->backend = Backend::Vulkan; + p->pp_cache = pp_cache; + p->bindings = bindings; + p->program = program; + p->renderpass_params = renderpass_params; + p->padding = padding; + p->shaders = shaders; + p->fixed_state = fixed_state; std::unique_lock lock(queue_mutex); - pending_queue.push_back(std::move(params)); + pending_queue.push(std::move(p)); cv.notify_one(); } @@ -168,64 +167,82 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (pending_queue.empty()) { continue; } - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop_front(); + // Pull work from queue + auto work = std::move(pending_queue.front()); + pending_queue.pop(); lock.unlock(); - if (work.backend == AsyncShaders::Backend::OpenGL || - work.backend == AsyncShaders::Backend::GLASM) { - VideoCommon::Shader::Registry registry = *work.registry; - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry); + if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) { + VideoCommon::Shader::Registry registry = *work->registry; + const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry); + OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry); Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; + result.backend = work->backend; + result.cpu_address = work->cpu_address; + result.uid = work->uid; + result.code = std::move(work->code); + result.code_b = std::move(work->code_b); + result.shader_type = work->shader_type; + // LOG_CRITICAL(Render_Vulkan, "Shader hast been Compiled \t0x{:016X} id {}", + // result.uid, id); - if (work.backend == AsyncShaders::Backend::OpenGL) { + if (work->backend == Backend::OpenGL) { result.program.opengl = std::move(program->source_program); - } else if (work.backend == AsyncShaders::Backend::GLASM) { + } else if (work->backend == Backend::GLASM) { result.program.glasm = std::move(program->assembly_program); } + work.reset(); { std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } - - } else if (work.backend == AsyncShaders::Backend::Vulkan) { + } else if (work->backend == Backend::Vulkan) { Vulkan::GraphicsPipelineCacheKey params_key{ - work.renderpass_params, - work.padding, - work.shaders, - work.fixed_state, + .renderpass_params = work->renderpass_params, + .padding = work->padding, + .shaders = work->shaders, + .fixed_state = work->fixed_state, }; + + { + std::unique_lock find_lock{completed_mutex}; + for (size_t i = 0; i < finished_work.size(); ++i) { + // This loop deletes duplicate pipelines in finished_work + // in favor of the pipeline about to be created + + if (finished_work[i].pipeline && + finished_work[i].pipeline->GetCacheKey().Hash() == params_key.Hash()) { + LOG_CRITICAL(Render_Vulkan, + "Pipeliene was already here \t0x{:016X} matches 0x{:016X} ", + params_key.Hash(), + finished_work[i].pipeline->GetCacheKey().Hash()); + finished_work.erase(finished_work.begin() + i); + } + } + find_lock.unlock(); + } + + auto pipeline = std::make_unique( + work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(), + work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(), + work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program); + { std::unique_lock complete_lock(completed_mutex); - - // Duplicate creation of pipelines leads to instability and crashing, caused by a - // race condition but band-aid solution is locking the making of the pipeline - // results in only one pipeline created at a time. Result result{ - .backend = work.backend, - .pipeline = std::make_unique( - work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(), - work.pp_cache->GetDescriptorPool(), - work.pp_cache->GetUpdateDescriptorQueue(), - work.pp_cache->GetRenderpassCache(), params_key, work.bindings, - work.program), + .backend = Backend::Vulkan, + .pipeline = std::move(pipeline), }; - finished_work.push_back(std::move(result)); + complete_lock.unlock(); } } + // Give a chance for another thread to get work. Lessens duplicates + std::this_thread::yield(); } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 702026ce24..d4eeb8fb6b 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -100,7 +100,7 @@ private: bool HasWorkQueued(); struct WorkerParams { - AsyncShaders::Backend backend; + Backend backend; // For OGL const OpenGL::Device* device; Tegra::Engines::ShaderType shader_type; @@ -128,7 +128,7 @@ private: std::atomic is_thread_exiting{}; std::vector> context_list; std::vector worker_threads; - std::deque pending_queue; + std::queue> pending_queue; std::vector finished_work; Core::Frontend::EmuWindow& emu_window; };