From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 17:30:36 -0400 Subject: [PATCH] Change texture_cache chaching from GPUAddr to CacheAddr This also reverses the changes to make invalidation and flushing through the GPU address. --- src/video_core/memory_manager.cpp | 2 +- src/video_core/rasterizer_interface.h | 4 - .../renderer_opengl/gl_rasterizer.cpp | 16 --- .../renderer_opengl/gl_rasterizer.h | 2 - src/video_core/texture_cache/surface_base.cpp | 5 +- src/video_core/texture_cache/surface_base.h | 30 +++--- src/video_core/texture_cache/texture_cache.h | 102 +++++++----------- 7 files changed, 60 insertions(+), 101 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 74a1441e39..5d8d126c18 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; - rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size); + rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); return gpu_addr; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d5505ef9c8..3c18d3b1f8 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,10 +49,6 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - /// and invalidated - virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0; - /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 482d0428c9..77ac963b40 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -737,27 +737,11 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { - return; - } - texture_cache.InvalidateRegionEx(gpu_addr, size); - shader_cache.InvalidateRegion(addr, size); - global_cache.InvalidateRegion(addr, size); - buffer_cache.InvalidateRegion(addr, size); -} - void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - FlushRegion(addr, size); - InvalidateRegionEx(gpu_addr, addr, size); -} - bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 971a38ab7d..5c37d3bfa6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -64,9 +64,7 @@ public: void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; - void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size); void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5273fcb444..0de0bc6561 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& u32 offset = 0; mipmap_offsets.resize(params.num_levels); mipmap_sizes.resize(params.num_levels); - gpu_addr_end = gpu_addr + memory_size; for (u32 i = 0; i < params.num_levels; i++) { mipmap_offsets[i] = offset; mipmap_sizes[i] = params.GetGuestMipmapSize(i); @@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } -void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { +void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9c048eb881..74be3237d2 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -45,40 +45,40 @@ class SurfaceBaseImpl { public: void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { return gpu_addr; } - GPUVAddr GetGpuAddrEnd() const { - return gpu_addr_end; - } - - bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { - return (gpu_addr < end) && (gpu_addr_end > start); + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); } // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; - gpu_addr_end = new_addr + memory_size; } VAddr GetCpuAddr() const { - return gpu_addr; + return cpu_addr; } void SetCpuAddr(const VAddr new_addr) { cpu_addr = new_addr; } - u8* GetHostPtr() const { - return host_ptr; + CacheAddr GetCacheAddr() const { + return cache_addr; } - void SetHostPtr(u8* new_addr) { - host_ptr = new_addr; + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; + } + + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -201,13 +201,13 @@ protected: const SurfaceParams params; GPUVAddr gpu_addr{}; - GPUVAddr gpu_addr_end{}; std::vector mipmap_sizes; std::vector mipmap_offsets; const std::size_t layer_size; const std::size_t memory_size; const std::size_t host_memory_size; - u8* host_ptr; + CacheAddr cache_addr; + CacheAddr cache_addr_end{}; VAddr cpu_addr; private: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f3b28453a7..43aaec0112 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,12 +60,6 @@ public: } } - void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { - Unregister(surface); - } - } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; @@ -154,9 +148,19 @@ public: return GetSurface(gpu_addr, params, true).second; } - TSurface TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + TSurface TryFindFramebufferSurface(const u8* host_ptr) { + const CacheAddr cache_addr = ToCacheAddr(host_ptr); + if (!cache_addr) { + return nullptr; + } + const CacheAddr page = cache_addr >> registry_page_bits; + std::list& list = registry[page]; + for (auto& s : list) { + if (s->GetCacheAddr() == cache_addr) { + return s; + } + } + return nullptr; } u64 Tick() { @@ -181,30 +185,28 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); - if (!host_ptr || !cpu_addr) { + if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - surface->SetHostPtr(host_ptr); + surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); - registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); RegisterInnerCache(surface); surface->MarkAsRegistered(true); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } void Unregister(TSurface surface) { if (surface->IsProtected()) return; const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const void* host_ptr = surface->GetHostPtr(); + const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); const VAddr cpu_addr = surface->GetCpuAddr(); - registered_surfaces.erase(GetInterval(host_ptr, size)); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); UnregisterInnerCache(surface); surface->MarkAsRegistered(false); @@ -280,7 +282,7 @@ private: } } - std::pair RebuildMirage(TSurface current_surface, + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); @@ -297,7 +299,7 @@ private: const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } const bool matches_target = current_surface->MatchTarget(params.target); if (matches_target) { @@ -356,7 +358,7 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -378,7 +380,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params); } else { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } } if (current_surface->GetSizeInBytes() <= candidate_size) { @@ -429,58 +431,40 @@ private: } staging_buffer.resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(staging_buffer); + surface->FlushBuffer(*memory_manager, staging_buffer); surface->MarkAsModified(false, Tick()); } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - for (auto& s : pair.second) { - if (!s || !s->IsRegistered()) { - continue; - } - surfaces.push_back(s); - } - } - return surfaces; - } - void RegisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].push_back(surface); + registry[start].push_back(surface); start++; } } void UnregisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].remove(surface); + registry[start].remove(surface); start++; } } - std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + std::vector GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { if (size == 0) { return {}; } - const GPUVAddr gpu_addr_end = gpu_addr + size; - GPUVAddr start = gpu_addr >> inner_cache_page_bits; - const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + const CacheAddr cache_addr_end = cache_addr + size; + CacheAddr start = cache_addr >> registry_page_bits; + const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = inner_cache[start]; + std::list& list = registry[start]; for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); surfaces.push_back(s); } @@ -510,11 +494,6 @@ private: return {}; } - IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { - const CacheAddr addr = ToCacheAddr(host_ptr); - return IntervalType::right_open(addr, addr + size); - } - struct RenderInfo { RenderTargetConfig config; TSurface target; @@ -531,11 +510,12 @@ private: u64 ticks{}; - IntervalMap registered_surfaces; - - static constexpr u64 inner_cache_page_bits{20}; - static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; - std::unordered_map> inner_cache; + // The internal Cache is different for the Texture Cache. It's based on buckets + // of 1MB. This fits better for the purpose of this cache as textures are normaly + // large in size. + static constexpr u64 registry_page_bits{20}; + static constexpr u64 registry_page_size{1 << registry_page_bits}; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and