From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: [PATCH] Reduce amount of size calculations. --- src/common/common_funcs.h | 11 ++++ .../renderer_opengl/gl_texture_cache.cpp | 1 - .../renderer_opengl/gl_texture_cache.h | 2 +- src/video_core/texture_cache/surface_base.cpp | 22 +++++--- src/video_core/texture_cache/surface_base.h | 28 ++++------- .../texture_cache/surface_params.cpp | 31 +----------- src/video_core/texture_cache/surface_params.h | 50 +++++++++++++++---- src/video_core/texture_cache/texture_cache.h | 40 +++++++-------- 8 files changed, 97 insertions(+), 88 deletions(-) diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6e..00a5698f3e 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,7 @@ #pragma once +#include #include #if !defined(ARCHITECTURE_x86_64) @@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a58e3a816e..32cb08963e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -240,7 +240,6 @@ CachedSurface::~CachedSurface() { } void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - LOG_CRITICAL(Render_OpenGL, "Flushing"); MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ad01137b4..0a1b57014d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -133,7 +133,7 @@ protected: const VideoCommon::CopyParams& copy_params) override; void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + const Common::Rectangle& dst_rect) override; private: OGLFramebuffer src_framebuffer; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index dc50132403..36ca72b4a2 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, - guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ - params.GetHostSizeInBytes()} { - mipmap_offsets.reserve(params.num_levels); - mipmap_sizes.reserve(params.num_levels); + : params{params}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ + params.GetHostSizeInBytes()} { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes.push_back(mipmap_size); - mipmap_offsets.push_back(offset); + mipmap_sizes[level] = mipmap_size; + mipmap_offsets[level] = offset; offset += mipmap_size; } + layer_size = offset; + if (params.is_layered) { + if (params.is_tiled) { + layer_size = + SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); + } + guest_memory_size = layer_size * params.depth; + } else { + guest_memory_size = layer_size; + } } void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 179e80ddba..095deb6026 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,6 +9,7 @@ #include #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -16,16 +17,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template > -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - namespace Tegra { class MemoryManager; } @@ -153,7 +144,7 @@ public: const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = - binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it == mipmap_offsets.end()) { return {}; } @@ -172,8 +163,8 @@ protected: virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - const std::size_t layer_size; - const std::size_t guest_memory_size; + std::size_t layer_size; + std::size_t guest_memory_size; const std::size_t host_memory_size; GPUVAddr gpu_addr{}; CacheAddr cache_addr{}; @@ -268,9 +259,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, + const std::size_t candidate_size) { + if (params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered) || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; @@ -279,8 +272,7 @@ public: } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; - const std::size_t size{view_params.GetGuestSizeInBytes()}; - if (GetMipmapSize(mipmap) != size) { + if (GetMipmapSize(mipmap) != candidate_size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9052152cc..b537b26e2d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -4,13 +4,12 @@ #include -#include "common/cityhash.h" #include "common/alignment.h" +#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( return params; } -u32 SurfaceParams::GetMipWidth(u32 level) const { - return std::max(1U, width >> level); -} - -u32 SurfaceParams::GetMipHeight(u32 level) const { - return std::max(1U, height >> level); -} - -u32 SurfaceParams::GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); -} - bool SurfaceParams::IsLayered() const { switch (target) { case SurfaceTarget::Texture1DArray: @@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false); } -u32 SurfaceParams::GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); -} - -u32 SurfaceParams::GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); -} - -u32 SurfaceParams::GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); -} - -u32 SurfaceParams::GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index ec8efa210e..e0ec1be0ee 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -10,8 +10,9 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/surface.h" #include "video_core/shader/shader_ir.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -50,10 +51,17 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; if (IsPixelFormatASTC(pixel_format)) { + constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * - static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * - static_cast(depth) * 4ULL; + host_size_in_bytes = 0; + for (std::size_t level = 0; level < num_levels; level++) { + const std::size_t width = + Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); + const std::size_t height = + Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); + const std::size_t depth = is_layered ? depth : GetMipDepth(level); + host_size_in_bytes += width * height * depth * rgb8_bpp; + } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } @@ -65,13 +73,19 @@ public: } /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const; + u32 GetMipWidth(u32 level) const { + return std::max(1U, width >> level); + } /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const; + u32 GetMipHeight(u32 level) const { + return std::max(1U, height >> level); + } /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const; + u32 GetMipDepth(u32 level) const { + return is_layered ? depth : std::max(1U, depth >> level); + } /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -79,6 +93,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; @@ -98,16 +118,24 @@ public: std::size_t GetHostLayerSize(u32 level) const; /// Returns the default block width. - u32 GetDefaultBlockWidth() const; + u32 GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); + } /// Returns the default block height. - u32 GetDefaultBlockHeight() const; + u32 GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); + } /// Returns the bits per pixel. - u32 GetBitsPerPixel() const; + u32 GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); + } /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const; + u32 GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); + } /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96d1081476..fbfd1ff0ba 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,10 +120,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - return {}; - } - auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -165,7 +161,9 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); + TSurface dst_surface = GetFermiSurface(dst_config); + ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); + dst_surface->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -270,10 +268,6 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { - // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; - } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -284,12 +278,16 @@ private: return RecycleStrategy::Flush; } } + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } return RecycleStrategy::Ignore; } std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const u8* host_ptr, const bool preserve_contents, + const bool preserve_contents, const bool untopological) { for (auto surface : overlaps) { Unregister(surface); @@ -328,6 +326,7 @@ private: } Unregister(current_surface); Register(new_surface); + new_surface->MarkAsModified(current_surface->IsModified(), Tick()); return {new_surface, new_surface->GetMainView()}; } @@ -351,6 +350,7 @@ private: if (params.target == SurfaceTarget::Texture3D) { return {}; } + bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -358,7 +358,7 @@ private: // We send this cases to recycle as they are more complex to handle return {}; } - const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; @@ -368,6 +368,7 @@ private: if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } + modified |= surface->IsModified(); // Now we got all the data set up const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)}; @@ -381,6 +382,7 @@ private: force_reconfiguration |= surface->IsProtected(); Unregister(surface, true); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; } @@ -399,8 +401,7 @@ private: for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } @@ -418,27 +419,26 @@ private: } } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } - std::optional view = current_surface->EmplaceView(params, gpu_addr); + std::optional view = + current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { std::optional> view = ReconstructSurface(overlaps, params, gpu_addr, host_ptr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } }