From 4e9f975935b4208b29e158dabe62f8ad1122a447 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 16:08:33 -0400 Subject: [PATCH 01/11] Nvdrv: Correct Async regression and avoid signaling empty buffer vsyncs --- src/core/hle/service/nvdrv/nvdrv.cpp | 4 ++-- src/core/hle/service/nvflinger/nvflinger.cpp | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e9281..7bfb99e343 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Automatic, event_label); + events_interface.events[i] = + Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b96..3b251f8c8f 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -170,8 +170,13 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { void NVFlinger::Compose() { for (auto& display : displays) { + bool trigger_event = false; // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ display.SignalVSyncEvent(); }); + SCOPE_EXIT({ + if (trigger_event) { + display.SignalVSyncEvent(); + } + }); // Don't do anything for displays without layers. if (!display.HasLayers()) @@ -194,6 +199,7 @@ void NVFlinger::Compose() { } const auto& igbp_buffer = buffer->get().igbp_buffer; + trigger_event = true; // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based From 5b5e60ffeca1a718cd980e74f0528d6ab91788cf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 19:43:23 -0400 Subject: [PATCH 02/11] GPU_Async: Correct fences, display events and more. This commit uses guest fences on vSync event instead of an articial fake fence we had. It also corrects to keep signaling display events while loading the game as the OS is suppose to send buffers to vSync during that time. --- src/core/hle/service/nvflinger/nvflinger.cpp | 21 ++++++++++++++++++-- src/core/hle/service/nvflinger/nvflinger.h | 2 ++ src/video_core/gpu.cpp | 13 ++++++++++++ src/video_core/gpu.h | 3 +++ src/video_core/gpu_thread.cpp | 14 +------------ src/video_core/gpu_thread.h | 6 ------ 6 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3b251f8c8f..86a90526cb 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -36,6 +36,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); + for (auto& display : displays) { + display.SignalVSyncEvent(); + } + // Schedule the screen composition events composition_event = system.CoreTiming().RegisterEvent( "ScreenComposition", [this](u64 userdata, s64 cycles_late) { @@ -173,7 +177,13 @@ void NVFlinger::Compose() { bool trigger_event = false; // Trigger vsync for this display at the end of drawing SCOPE_EXIT({ - if (trigger_event) { + // TODO(Blinkhawk): Correctly send buffers through nvflinger while + // loading the game thorugh the OS. + // During loading, the OS takes care of sending buffers to vsync, + // thus it triggers, since this is not properly emulated due to + // HLE complications, we allow it to signal until the game enqueues + // it's first buffer. + if (trigger_event || !first_buffer_enqueued) { display.SignalVSyncEvent(); } }); @@ -193,13 +203,20 @@ void NVFlinger::Compose() { if (!buffer) { // There was no queued buffer to draw, render previous frame - system.GetPerfStats().EndGameFrame(); system.GPU().SwapBuffers({}); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; trigger_event = true; + first_buffer_enqueued = true; + + const auto& gpu = system.GPU(); + const auto& multi_fence = buffer->get().multi_fence; + for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { + const auto& fence = multi_fence.fences[fence_id]; + gpu.WaitFence(fence.id, fence.value); + } // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 5d7e3bfb8e..95d7278f55 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -102,6 +102,8 @@ private: u32 swap_interval = 1; + bool first_buffer_enqueued{}; + /// Event that handles screen composition. Core::Timing::EventType* composition_event; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe81070..d94be9c9df 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load() < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95bd..e20b0687a8 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fdc..d7048b6ae7 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) { return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f33..108f456bd1 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue; CommandQueue queue; u64 last_fence{}; @@ -128,7 +123,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; From 976d9ef43c6eb431a1963a2e5857c100eeacd952 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 08:46:22 -0400 Subject: [PATCH 03/11] NvFlinger: Don't swap buffers if a frame is missing and always trigger event in sync gpu. --- src/core/hle/service/nvflinger/nvflinger.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 86a90526cb..3c08ac9a30 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -203,7 +203,9 @@ void NVFlinger::Compose() { if (!buffer) { // There was no queued buffer to draw, render previous frame - system.GPU().SwapBuffers({}); + auto& gpu = system.GPU(); + // Always trigger on sync GPU. + trigger_event = !gpu.IsAsync(); continue; } From ffc2ce89a03d8160c408922cd72a1f45e333c0fe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 10:12:05 -0400 Subject: [PATCH 04/11] Nvdrv: Do framelimiting only in the CPU Thread --- src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 4 ++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bcf..3f7b8e6704 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/core_timing.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/perf_stats.h" @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 transform, crop_rect}; system.GetPerfStats().EndGameFrame(); + system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); + system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); } } // namespace Service::Nvidia::Devices diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66abd..7f6ff0857f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -135,9 +135,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); From 782b7a0ca45b08d981e48a6d9a7af73cbed13281 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 15:41:10 -0400 Subject: [PATCH 05/11] NVFlinger: Reverse the change that only signaled events on buffer acquire. This has been hardware tested and it seems that NVFlinger will still signal even if there are no buffers to present. --- src/core/hle/service/nvflinger/nvflinger.cpp | 19 +------------------ src/core/hle/service/nvflinger/nvflinger.h | 2 -- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3c08ac9a30..6eee20efeb 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -174,19 +174,8 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { void NVFlinger::Compose() { for (auto& display : displays) { - bool trigger_event = false; // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ - // TODO(Blinkhawk): Correctly send buffers through nvflinger while - // loading the game thorugh the OS. - // During loading, the OS takes care of sending buffers to vsync, - // thus it triggers, since this is not properly emulated due to - // HLE complications, we allow it to signal until the game enqueues - // it's first buffer. - if (trigger_event || !first_buffer_enqueued) { - display.SignalVSyncEvent(); - } - }); + SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. if (!display.HasLayers()) @@ -202,16 +191,10 @@ void NVFlinger::Compose() { MicroProfileFlip(); if (!buffer) { - // There was no queued buffer to draw, render previous frame - auto& gpu = system.GPU(); - // Always trigger on sync GPU. - trigger_event = !gpu.IsAsync(); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; - trigger_event = true; - first_buffer_enqueued = true; const auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 95d7278f55..5d7e3bfb8e 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -102,8 +102,6 @@ private: u32 swap_interval = 1; - bool first_buffer_enqueued{}; - /// Event that handles screen composition. Core::Timing::EventType* composition_event; From 69fa2e652560dd72f7b53d44fec9d7fe4aa0ffb9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 18:11:27 -0400 Subject: [PATCH 06/11] Nvdrv: Correct Event setup in Nvdrv Events are supposed to be cleared on quering. This fixes that issue. --- .../hle/service/nvdrv/devices/nvhost_ctrl.cpp | 33 +++++++------------ src/core/hle/service/nvdrv/interface.cpp | 4 ++- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index ff6b1abaeb..708469bd5f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -62,16 +62,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& return NvResult::BadParameter; } + u32 event_id = params.value & 0x00FF; + + if (event_id >= MaxNvEvents) { + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::BadParameter; + } + + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { + event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { + event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -87,27 +97,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& return NvResult::Timeout; } - u32 event_id; - if (is_async) { - event_id = params.value & 0x00FF; - if (event_id >= MaxNvEvents) { - std::memcpy(output.data(), ¶ms, sizeof(params)); - return NvResult::BadParameter; - } - } else { - if (ctrl.fresh_call) { - const auto result = events_interface.GetFreeEvent(); - if (result) { - event_id = *result; - } else { - LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); - event_id = params.value & 0x00FF; - } - } else { - event_id = ctrl.event_id; - } - } - EventState status = events_interface.status[event_id]; if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { events_interface.SetEventStatus(event_id, EventState::Waiting); @@ -119,7 +108,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - events_interface.events[event_id].writable->Clear(); + event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602f..68d139cfb8 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3, 1}; rb.Push(RESULT_SUCCESS); if (event_id < MaxNvEvents) { - rb.PushCopyObjects(nvdrv->GetEvent(event_id)); + auto event = nvdrv->GetEvent(event_id); + event->Clear(); + rb.PushCopyObjects(event); rb.Push(NvResult::Success); } else { rb.Push(0); From 3f104464dec13f9ba90eaca5dafca87ee4116a60 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 19:08:22 -0400 Subject: [PATCH 07/11] Core: Wait for GPU to be idle before shutting down. --- src/core/core.cpp | 2 ++ src/video_core/gpu.h | 3 +++ src/video_core/gpu_asynch.cpp | 4 ++++ src/video_core/gpu_asynch.h | 1 + src/video_core/gpu_synch.h | 1 + src/video_core/gpu_thread.cpp | 5 +++++ src/video_core/gpu_thread.h | 3 +++ 7 files changed, 19 insertions(+) diff --git a/src/core/core.cpp b/src/core/core.cpp index 75a7ffb973..8f68bdbad8 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -252,6 +252,8 @@ struct System::Impl { is_powered_on = false; exit_lock = false; + gpu_core->WaitIdle(); + // Shutdown emulation session renderer.reset(); GDBStub::Shutdown(); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e20b0687a8..dbca19f35b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e1..04222d0607 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac41..1241ade1d7 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c2..c71baee89e 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index d7048b6ae7..4a42634d2c 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -90,6 +90,11 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load()) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 108f456bd1..08dc96bb37 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -116,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); From 9f2719d1a43e04cc2f5296d0f9719aab9626f730 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Sep 2019 09:39:42 -0400 Subject: [PATCH 08/11] Gl_Rasterizer: Protect CPU Memory mapping from multiple threads. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 + src/video_core/renderer_opengl/gl_rasterizer.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6a17bed723..deb3e10a5a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda36..c24a02d71e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL From 75395605d69f3515c72437135ebafee80eba775c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Sep 2019 12:00:34 -0400 Subject: [PATCH 09/11] NvFlinger: Remove leftover from corrections and clang format. --- src/core/hle/service/nvflinger/nvflinger.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 6eee20efeb..e00b665ea2 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -36,10 +36,6 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); - for (auto& display : displays) { - display.SignalVSyncEvent(); - } - // Schedule the screen composition events composition_event = system.CoreTiming().RegisterEvent( "ScreenComposition", [this](u64 userdata, s64 cycles_late) { From 538f5880fff5e29fd5539eea371c3131013908e4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 29 Sep 2019 10:12:28 -0400 Subject: [PATCH 10/11] GL_Renderer: Remove lefting snippet. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7f6ff0857f..4bbd17b122 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); From cfc2f30dc409cbbb36c19c74a98f3017e6d722f2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 11 Oct 2019 13:41:15 -0400 Subject: [PATCH 11/11] AsyncGpu: Address Feedback --- src/video_core/gpu.cpp | 2 +- src/video_core/gpu_thread.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d94be9c9df..da7359d4d4 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -72,7 +72,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) const { return; } MICROPROFILE_SCOPE(GPU_wait); - while (syncpoints[syncpoint_id].load() < value) { + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { } } diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4a42634d2c..758a37f14c 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -91,7 +91,7 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load()) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { } }