mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
This commit is contained in:
parent
47d0d292d5
commit
da440da9f5
19 changed files with 153 additions and 38 deletions
|
@ -55,7 +55,6 @@
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
|
||||||
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
|
||||||
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
|
||||||
|
@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
|
||||||
struct System::Impl {
|
struct System::Impl {
|
||||||
explicit Impl(System& system)
|
explicit Impl(System& system)
|
||||||
: kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
|
: kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
|
||||||
cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {}
|
cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
|
||||||
|
gpu_dirty_memory_write_manager{} {
|
||||||
|
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
|
||||||
|
}
|
||||||
|
|
||||||
void Initialize(System& system) {
|
void Initialize(System& system) {
|
||||||
device_memory = std::make_unique<Core::DeviceMemory>();
|
device_memory = std::make_unique<Core::DeviceMemory>();
|
||||||
|
@ -236,6 +238,8 @@ struct System::Impl {
|
||||||
// Setting changes may require a full system reinitialization (e.g., disabling multicore).
|
// Setting changes may require a full system reinitialization (e.g., disabling multicore).
|
||||||
ReinitializeIfNecessary(system);
|
ReinitializeIfNecessary(system);
|
||||||
|
|
||||||
|
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
|
||||||
|
|
||||||
kernel.Initialize();
|
kernel.Initialize();
|
||||||
cpu_manager.Initialize();
|
cpu_manager.Initialize();
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -59,8 +62,7 @@ public:
|
||||||
mask = mask >> empty_bits;
|
mask = mask >> empty_bits;
|
||||||
|
|
||||||
const size_t continuous_bits = std::countr_one(mask);
|
const size_t continuous_bits = std::countr_one(mask);
|
||||||
callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
|
callback((transform.address << page_bits) + offset, continuous_bits << align_bits);
|
||||||
continuous_bits << align_bits);
|
|
||||||
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
|
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
|
||||||
offset += continuous_bits << align_bits;
|
offset += continuous_bits << align_bits;
|
||||||
}
|
}
|
||||||
|
@ -74,6 +76,10 @@ private:
|
||||||
u64 mask;
|
u64 mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
constexpr static size_t page_bits = Memory::YUZU_PAGEBITS;
|
||||||
|
constexpr static size_t page_size = 1ULL << page_bits;
|
||||||
|
constexpr static size_t page_mask = page_size - 1;
|
||||||
|
|
||||||
constexpr static size_t align_bits = 6U;
|
constexpr static size_t align_bits = 6U;
|
||||||
constexpr static size_t align_size = 1U << align_bits;
|
constexpr static size_t align_size = 1U << align_bits;
|
||||||
constexpr static size_t align_mask = align_size - 1;
|
constexpr static size_t align_mask = align_size - 1;
|
||||||
|
@ -94,11 +100,11 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
TransformAddress BuildTransform(VAddr address, size_t size) {
|
TransformAddress BuildTransform(VAddr address, size_t size) {
|
||||||
const size_t minor_address = address & Memory::YUZU_PAGEMASK;
|
const size_t minor_address = address & page_mask;
|
||||||
const size_t minor_bit = minor_address >> align_bits;
|
const size_t minor_bit = minor_address >> align_bits;
|
||||||
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
|
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
|
||||||
TransformAddress result{};
|
TransformAddress result{};
|
||||||
result.address = address >> Memory::YUZU_PAGEBITS;
|
result.address = address >> page_bits;
|
||||||
result.mask = CreateMask<u64>(top_bit, minor_bit);
|
result.mask = CreateMask<u64>(top_bit, minor_bit);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <span>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/atomic_ops.h"
|
#include "common/atomic_ops.h"
|
||||||
|
@ -679,7 +680,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
|
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
|
||||||
GetInteger(vaddr), static_cast<u64>(data));
|
GetInteger(vaddr), static_cast<u64>(data));
|
||||||
},
|
},
|
||||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
std::memcpy(ptr, &data, sizeof(T));
|
std::memcpy(ptr, &data, sizeof(T));
|
||||||
}
|
}
|
||||||
|
@ -693,7 +694,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
|
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
|
||||||
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
|
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
|
||||||
},
|
},
|
||||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
|
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
|
||||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||||
|
@ -708,7 +709,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
|
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
|
||||||
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
|
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
|
||||||
},
|
},
|
||||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
|
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
|
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
|
||||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||||
|
@ -718,7 +719,7 @@ struct Memory::Impl {
|
||||||
|
|
||||||
void HandleRasterizerDownload(VAddr address, size_t size) {
|
void HandleRasterizerDownload(VAddr address, size_t size) {
|
||||||
const size_t core = system.GetCurrentHostThreadID();
|
const size_t core = system.GetCurrentHostThreadID();
|
||||||
auto& current_area = rasterizer_areas[core];
|
auto& current_area = rasterizer_read_areas[core];
|
||||||
const VAddr end_address = address + size;
|
const VAddr end_address = address + size;
|
||||||
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
||||||
[[likely]] {
|
[[likely]] {
|
||||||
|
@ -727,9 +728,31 @@ struct Memory::Impl {
|
||||||
current_area = system.GPU().OnCPURead(address, size);
|
current_area = system.GPU().OnCPURead(address, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
Common::PageTable* current_page_table = nullptr;
|
void HandleRasterizerWrite(VAddr address, size_t size) {
|
||||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{};
|
const size_t core = system.GetCurrentHostThreadID();
|
||||||
|
auto& current_area = rasterizer_write_areas[core];
|
||||||
|
VAddr subaddress = address >> YUZU_PAGEBITS;
|
||||||
|
bool do_collection = current_area.last_address == subaddress;
|
||||||
|
if (!do_collection) [[unlikely]] {
|
||||||
|
do_collection = system.GPU().OnCPUWrite(address, size);
|
||||||
|
if (!do_collection) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
current_area.last_address = subaddress;
|
||||||
|
}
|
||||||
|
gpu_dirty_managers[core].Collect(address, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GPUDirtyState {
|
||||||
|
VAddr last_address;
|
||||||
|
};
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
Common::PageTable* current_page_table = nullptr;
|
||||||
|
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
||||||
|
rasterizer_read_areas{};
|
||||||
|
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
||||||
|
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
|
||||||
};
|
};
|
||||||
|
|
||||||
Memory::Memory(Core::System& system_) : system{system_} {
|
Memory::Memory(Core::System& system_) : system{system_} {
|
||||||
|
@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
|
||||||
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
|
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
|
||||||
|
impl->gpu_dirty_managers = managers;
|
||||||
|
}
|
||||||
|
|
||||||
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
|
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
|
||||||
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
|
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "common/typed_address.h"
|
#include "common/typed_address.h"
|
||||||
#include "core/hle/result.h"
|
#include "core/hle/result.h"
|
||||||
|
@ -15,7 +16,8 @@ struct PageTable;
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
}
|
class GPUDirtyMemoryManager;
|
||||||
|
} // namespace Core
|
||||||
|
|
||||||
namespace Kernel {
|
namespace Kernel {
|
||||||
class PhysicalMemory;
|
class PhysicalMemory;
|
||||||
|
@ -458,6 +460,8 @@ public:
|
||||||
*/
|
*/
|
||||||
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
||||||
|
|
||||||
|
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
||||||
|
|
|
@ -132,6 +132,19 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
||||||
InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
|
InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
|
||||||
|
const bool is_dirty = IsRegionRegistered(cpu_addr, size);
|
||||||
|
if (!is_dirty) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
WriteMemory(cpu_addr, size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr,
|
std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr,
|
||||||
u64 size) {
|
u64 size) {
|
||||||
|
@ -1574,7 +1587,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
||||||
std::span<const u8> inlined_buffer) {
|
std::span<const u8> inlined_buffer) {
|
||||||
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
||||||
ClearDownload(subtract_interval);
|
ClearDownload(subtract_interval);
|
||||||
common_ranges.subtract(subtract_interval);
|
common_ranges.subtract(subtract_interval);
|
||||||
|
|
|
@ -245,6 +245,8 @@ public:
|
||||||
|
|
||||||
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
||||||
|
|
||||||
|
bool OnCPUWrite(VAddr cpu_addr, u64 size);
|
||||||
|
|
||||||
void DownloadMemory(VAddr cpu_addr, u64 size);
|
void DownloadMemory(VAddr cpu_addr, u64 size);
|
||||||
|
|
||||||
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
|
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
|
||||||
|
@ -543,7 +545,8 @@ private:
|
||||||
|
|
||||||
void ClearDownload(IntervalType subtract_interval);
|
void ClearDownload(IntervalType subtract_interval);
|
||||||
|
|
||||||
void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
|
void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
||||||
|
std::span<const u8> inlined_buffer);
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Core::Memory::Memory& cpu_memory;
|
Core::Memory::Memory& cpu_memory;
|
||||||
|
|
|
@ -69,7 +69,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void SignalFence(std::function<void()>&& func) {
|
void SignalFence(std::function<void()>&& func) {
|
||||||
rasterizer.InvalidateGPUCache();
|
|
||||||
bool delay_fence = Settings::IsGPULevelHigh();
|
bool delay_fence = Settings::IsGPULevelHigh();
|
||||||
if constexpr (!can_async_check) {
|
if constexpr (!can_async_check) {
|
||||||
TryReleasePendingFences<false>();
|
TryReleasePendingFences<false>();
|
||||||
|
@ -96,6 +95,7 @@ public:
|
||||||
guard.unlock();
|
guard.unlock();
|
||||||
cv.notify_all();
|
cv.notify_all();
|
||||||
}
|
}
|
||||||
|
rasterizer.InvalidateGPUCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SignalSyncPoint(u32 value) {
|
void SignalSyncPoint(u32 value) {
|
||||||
|
|
|
@ -96,7 +96,7 @@ struct GPU::Impl {
|
||||||
/// Synchronizes CPU writes with Host GPU memory.
|
/// Synchronizes CPU writes with Host GPU memory.
|
||||||
void InvalidateGPUCache() {
|
void InvalidateGPUCache() {
|
||||||
std::function<void(VAddr, size_t)> callback_writes(
|
std::function<void(VAddr, size_t)> callback_writes(
|
||||||
[this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); });
|
[this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
|
||||||
system.GatherGPUDirtyMemory(callback_writes);
|
system.GatherGPUDirtyMemory(callback_writes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,6 +301,10 @@ struct GPU::Impl {
|
||||||
gpu_thread.InvalidateRegion(addr, size);
|
gpu_thread.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
return rasterizer->OnCPUWrite(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||||
|
@ -563,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
impl->InvalidateRegion(addr, size);
|
impl->InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GPU::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
return impl->OnCPUWrite(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
impl->FlushAndInvalidateRegion(addr, size);
|
impl->FlushAndInvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -250,6 +250,10 @@ public:
|
||||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||||
void InvalidateRegion(VAddr addr, u64 size);
|
void InvalidateRegion(VAddr addr, u64 size);
|
||||||
|
|
||||||
|
/// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
|
||||||
|
/// sensible, false otherwise
|
||||||
|
bool OnCPUWrite(VAddr addr, u64 size);
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
|
||||||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||||
rasterizer->FlushRegion(flush->addr, flush->size);
|
rasterizer->FlushRegion(flush->addr, flush->size);
|
||||||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
}
|
}
|
||||||
|
@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
rasterizer->OnCPUWrite(addr, size);
|
rasterizer->OnCacheInvalidation(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||||
rasterizer->OnCPUWrite(addr, size);
|
rasterizer->OnCacheInvalidation(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
|
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
|
||||||
|
|
|
@ -109,7 +109,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region are desync with guest
|
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Sync memory between guest and host.
|
/// Sync memory between guest and host.
|
||||||
virtual void InvalidateGPUCache() = 0;
|
virtual void InvalidateGPUCache() = 0;
|
||||||
|
|
|
@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
||||||
void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
|
bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
|
||||||
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
|
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
|
||||||
VideoCore::RasterizerDownloadArea new_area{
|
VideoCore::RasterizerDownloadArea new_area{
|
||||||
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
|
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
|
||||||
|
|
|
@ -53,7 +53,8 @@ public:
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size,
|
void InvalidateRegion(VAddr addr, u64 size,
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||||
|
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
||||||
void InvalidateGPUCache() override;
|
void InvalidateGPUCache() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
|
|
@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
if (addr == 0 || size == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
|
if (buffer_cache.OnCPUWrite(addr, size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.WriteMemory(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
shader_cache.OnCPUWrite(addr, size);
|
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
texture_cache.WriteMemory(addr, size);
|
texture_cache.WriteMemory(addr, size);
|
||||||
|
@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
buffer_cache.CachedWriteMemory(addr, size);
|
buffer_cache.CachedWriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateGPUCache() {
|
void RasterizerOpenGL::InvalidateGPUCache() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
gpu.InvalidateGPUCache();
|
||||||
shader_cache.SyncGuestHost();
|
|
||||||
{
|
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
|
||||||
buffer_cache.FlushCachedWrites();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||||
|
@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
buffer_cache.WriteMemory(addr, size);
|
buffer_cache.WriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
shader_cache.OnCPUWrite(addr, size);
|
shader_cache.OnCacheInvalidation(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
||||||
|
|
|
@ -98,7 +98,8 @@ public:
|
||||||
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size,
|
void InvalidateRegion(VAddr addr, u64 size,
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||||
|
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void InvalidateGPUCache() override;
|
void InvalidateGPUCache() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||||
|
|
|
@ -566,7 +566,28 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
if (addr == 0 || size == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
|
if (buffer_cache.OnCPUWrite(addr, size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.WriteMemory(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -595,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
buffer_cache.WriteMemory(addr, size);
|
buffer_cache.WriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
pipeline_cache.OnCPUWrite(addr, size);
|
pipeline_cache.OnCacheInvalidation(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
||||||
|
|
|
@ -96,7 +96,8 @@ public:
|
||||||
void InvalidateRegion(VAddr addr, u64 size,
|
void InvalidateRegion(VAddr addr, u64 size,
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
|
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||||
|
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void InvalidateGPUCache() override;
|
void InvalidateGPUCache() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||||
|
|
|
@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
|
||||||
RemovePendingShaders();
|
RemovePendingShaders();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
|
void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
|
||||||
std::scoped_lock lock{invalidation_mutex};
|
std::scoped_lock lock{invalidation_mutex};
|
||||||
InvalidatePagesInRegion(addr, size);
|
InvalidatePagesInRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,7 @@ public:
|
||||||
/// @brief Unmarks a memory region as cached and marks it for removal
|
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||||
/// @param addr Start address of the CPU write operation
|
/// @param addr Start address of the CPU write operation
|
||||||
/// @param size Number of bytes of the CPU write operation
|
/// @param size Number of bytes of the CPU write operation
|
||||||
void OnCPUWrite(VAddr addr, size_t size);
|
void OnCacheInvalidation(VAddr addr, size_t size);
|
||||||
|
|
||||||
/// @brief Flushes delayed removal operations
|
/// @brief Flushes delayed removal operations
|
||||||
void SyncGuestHost();
|
void SyncGuestHost();
|
||||||
|
|
Loading…
Reference in a new issue