From f2e026a1d8fb2384c1ece24e6dd32062b4f390a2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Jun 2019 08:36:22 -0400 Subject: [PATCH] gpu_asynch: Simplify synchronization to a simpler consumer->producer scheme. --- src/video_core/gpu_thread.cpp | 18 +++--------------- src/video_core/gpu_thread.h | 32 -------------------------------- 2 files changed, 3 insertions(+), 47 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 6926553957..b87938fdd5 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -21,7 +21,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context - state.WaitForCommands(); + while (state.queue.Empty()); // If emulation was stopped during disk shader loading, abort before trying to acquire context if (!state.is_running) { @@ -32,7 +32,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p CommandDataContainer next; while (state.is_running) { - state.WaitForCommands(); while (!state.queue.Empty()) { state.queue.Pop(next); if (const auto submit_list = std::get_if(&next.data)) { @@ -49,8 +48,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } else { UNREACHABLE(); } - state.signaled_fence = next.fence; - state.TrySynchronize(); + state.signaled_fence.store(next.fence); } } } @@ -100,22 +98,12 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); - state.SignalCommands(); return fence; } MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); void SynchState::WaitForSynchronization(u64 fence) { - if (signaled_fence >= fence) { - return; - } - - // Wait for the GPU to be idle (all commands to be executed) - { - MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock lock{synchronization_mutex}; - synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); - } + while (signaled_fence.load() < fence); } } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a726..1d9d0c39e3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -88,41 +88,9 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; - std::atomic_int queued_frame_count{}; - std::mutex synchronization_mutex; - std::mutex commands_mutex; - std::condition_variable commands_condition; - std::condition_variable synchronization_condition; - - /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU - /// synchronized. This is entirely empirical. - bool IsSynchronized() const { - constexpr std::size_t max_queue_gap{5}; - return queue.Size() <= max_queue_gap; - } - - void TrySynchronize() { - if (IsSynchronized()) { - std::lock_guard lock{synchronization_mutex}; - synchronization_condition.notify_one(); - } - } void WaitForSynchronization(u64 fence); - void SignalCommands() { - if (queue.Empty()) { - return; - } - - commands_condition.notify_one(); - } - - void WaitForCommands() { - std::unique_lock lock{commands_mutex}; - commands_condition.wait(lock, [this] { return !queue.Empty(); }); - } - using CommandQueue = Common::SPSCQueue; CommandQueue queue; u64 last_fence{};