mirror of
https://git.suyu.dev/suyu/suyu
synced 2025-01-09 16:03:21 +00:00
Merge pull request #3278 from ReinUsesLisp/vk-memory-manager
renderer_vulkan: Buffer cache, stream buffer and memory manager changes
This commit is contained in:
commit
ee9b4a7f9a
6 changed files with 426 additions and 320 deletions
|
@ -2,124 +2,145 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "core/memory.h"
|
#include "common/bit_util.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "core/core.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
|
namespace {
|
||||||
std::size_t alignment, u8* host_ptr)
|
|
||||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
|
|
||||||
alignment{alignment} {}
|
|
||||||
|
|
||||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
const auto BufferUsage =
|
||||||
Memory::Memory& cpu_memory_,
|
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
|
||||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
|
||||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
|
||||||
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
|
const auto UploadPipelineStage =
|
||||||
cpu_memory_} {
|
vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
|
||||||
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
|
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
|
||||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
vk::PipelineStageFlagBits::eComputeShader;
|
||||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
|
||||||
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
|
const auto UploadAccessBarriers =
|
||||||
vk::AccessFlagBits::eUniformRead;
|
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
|
||||||
stream_buffer =
|
vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
|
||||||
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
|
vk::AccessFlagBits::eIndexRead;
|
||||||
vk::PipelineStageFlagBits::eAllCommands);
|
|
||||||
buffer_handle = stream_buffer->GetBuffer();
|
auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
|
||||||
|
return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
|
CacheAddr cache_addr, std::size_t size)
|
||||||
|
: VideoCommon::BufferBlock{cache_addr, size} {
|
||||||
|
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
|
||||||
|
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
|
||||||
|
vk::BufferUsageFlagBits::eTransferDst,
|
||||||
|
vk::SharingMode::eExclusive, 0, nullptr);
|
||||||
|
|
||||||
|
const auto& dld{device.GetDispatchLoader()};
|
||||||
|
const auto dev{device.GetLogical()};
|
||||||
|
buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||||
|
buffer.commit = memory_manager.Commit(*buffer.handle, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||||
|
|
||||||
|
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
|
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
||||||
|
: VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
|
||||||
|
CreateStreamBuffer(device,
|
||||||
|
scheduler)},
|
||||||
|
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
||||||
|
staging_pool} {}
|
||||||
|
|
||||||
VKBufferCache::~VKBufferCache() = default;
|
VKBufferCache::~VKBufferCache() = default;
|
||||||
|
|
||||||
u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
|
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
|
||||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
|
||||||
ASSERT_MSG(cpu_addr, "Invalid GPU address");
|
|
||||||
|
|
||||||
// Cache management is a big overhead, so only cache entries with a given size.
|
|
||||||
// TODO: Figure out which size is the best for given games.
|
|
||||||
cache &= size >= 2048;
|
|
||||||
|
|
||||||
u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
|
|
||||||
if (cache) {
|
|
||||||
const auto entry = TryGet(host_ptr);
|
|
||||||
if (entry) {
|
|
||||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
|
||||||
return entry->GetOffset();
|
|
||||||
}
|
|
||||||
Unregister(entry);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AlignBuffer(alignment);
|
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
|
||||||
const u64 uploaded_offset = buffer_offset;
|
return buffer->GetHandle();
|
||||||
|
|
||||||
if (host_ptr == nullptr) {
|
|
||||||
return uploaded_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(buffer_ptr, host_ptr, size);
|
const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||||
buffer_ptr += size;
|
size = std::max(size, std::size_t(4));
|
||||||
buffer_offset += size;
|
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
if (cache) {
|
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
|
||||||
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
|
||||||
alignment, host_ptr);
|
});
|
||||||
Register(entry);
|
return &*empty.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
return uploaded_offset;
|
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
|
const u8* data) {
|
||||||
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
|
std::memcpy(staging.commit->Map(size), data, size);
|
||||||
|
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
|
||||||
|
size](auto cmdbuf, auto& dld) {
|
||||||
|
cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
|
||||||
|
cmdbuf.pipelineBarrier(
|
||||||
|
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
|
||||||
|
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
|
||||||
|
offset, size)},
|
||||||
|
{}, dld);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
|
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
AlignBuffer(alignment);
|
u8* data) {
|
||||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
const u64 uploaded_offset = buffer_offset;
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
|
||||||
|
size](auto cmdbuf, auto& dld) {
|
||||||
|
cmdbuf.pipelineBarrier(
|
||||||
|
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
|
||||||
|
vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
vk::PipelineStageFlagBits::eTransfer, {}, {},
|
||||||
|
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
|
||||||
|
vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
|
||||||
|
{}, dld);
|
||||||
|
cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
|
||||||
|
});
|
||||||
|
scheduler.Finish();
|
||||||
|
|
||||||
buffer_ptr += size;
|
std::memcpy(data, staging.commit->Map(size), size);
|
||||||
buffer_offset += size;
|
|
||||||
return uploaded_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
|
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||||
AlignBuffer(alignment);
|
std::size_t dst_offset, std::size_t size) {
|
||||||
u8* const uploaded_ptr = buffer_ptr;
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
const u64 uploaded_offset = buffer_offset;
|
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
|
||||||
|
dst_offset, size](auto cmdbuf, auto& dld) {
|
||||||
buffer_ptr += size;
|
cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
|
||||||
buffer_offset += size;
|
cmdbuf.pipelineBarrier(
|
||||||
return {uploaded_ptr, uploaded_offset};
|
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
|
||||||
}
|
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
|
||||||
|
vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
|
||||||
void VKBufferCache::Reserve(std::size_t max_size) {
|
VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
|
||||||
bool invalidate;
|
vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
|
||||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
|
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
|
||||||
buffer_offset = buffer_offset_base;
|
dst_offset, size)},
|
||||||
|
{}, dld);
|
||||||
if (invalidate) {
|
});
|
||||||
InvalidateAll();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VKBufferCache::Send() {
|
|
||||||
stream_buffer->Send(buffer_offset - buffer_offset_base);
|
|
||||||
}
|
|
||||||
|
|
||||||
void VKBufferCache::AlignBuffer(std::size_t alignment) {
|
|
||||||
// Align the offset, not the mapped pointer
|
|
||||||
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
|
||||||
buffer_ptr += offset_aligned - buffer_offset;
|
|
||||||
buffer_offset = offset_aligned;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -5,105 +5,74 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <tuple>
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
#include "video_core/rasterizer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
namespace Memory {
|
namespace Core {
|
||||||
class Memory;
|
class System;
|
||||||
}
|
|
||||||
|
|
||||||
namespace Tegra {
|
|
||||||
class MemoryManager;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
class VKDevice;
|
class VKDevice;
|
||||||
class VKFence;
|
|
||||||
class VKMemoryManager;
|
class VKMemoryManager;
|
||||||
class VKStreamBuffer;
|
class VKScheduler;
|
||||||
|
|
||||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
||||||
public:
|
public:
|
||||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
|
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
u8* host_ptr);
|
CacheAddr cache_addr, std::size_t size);
|
||||||
|
~CachedBufferBlock();
|
||||||
|
|
||||||
VAddr GetCpuAddr() const override {
|
const vk::Buffer* GetHandle() const {
|
||||||
return cpu_addr;
|
return &*buffer.handle;
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t GetSize() const {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 GetOffset() const {
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t GetAlignment() const {
|
|
||||||
return alignment;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VAddr cpu_addr{};
|
VKBuffer buffer;
|
||||||
std::size_t size{};
|
|
||||||
u64 offset{};
|
|
||||||
std::size_t alignment{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||||
|
|
||||||
|
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
|
||||||
public:
|
public:
|
||||||
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
|
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
||||||
~VKBufferCache();
|
~VKBufferCache();
|
||||||
|
|
||||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
|
||||||
/// allocated.
|
|
||||||
u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
|
|
||||||
|
|
||||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
|
||||||
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
|
|
||||||
|
|
||||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
|
||||||
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
|
|
||||||
|
|
||||||
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
|
|
||||||
void Reserve(std::size_t max_size);
|
|
||||||
|
|
||||||
/// Ensures that the set data is sent to the device.
|
|
||||||
void Send();
|
|
||||||
|
|
||||||
/// Returns the buffer cache handle.
|
|
||||||
vk::Buffer GetBuffer() const {
|
|
||||||
return buffer_handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// We do not have to flush this cache as things in it are never modified by us.
|
void WriteBarrier() override {}
|
||||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
|
||||||
|
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
|
||||||
|
|
||||||
|
const vk::Buffer* ToHandle(const Buffer& buffer) override;
|
||||||
|
|
||||||
|
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
|
const u8* data) override;
|
||||||
|
|
||||||
|
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
|
u8* data) override;
|
||||||
|
|
||||||
|
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||||
|
std::size_t dst_offset, std::size_t size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void AlignBuffer(std::size_t alignment);
|
const VKDevice& device;
|
||||||
|
VKMemoryManager& memory_manager;
|
||||||
Tegra::MemoryManager& tegra_memory_manager;
|
VKScheduler& scheduler;
|
||||||
Memory::Memory& cpu_memory;
|
VKStagingBufferPool& staging_pool;
|
||||||
|
|
||||||
std::unique_ptr<VKStreamBuffer> stream_buffer;
|
|
||||||
vk::Buffer buffer_handle;
|
|
||||||
|
|
||||||
u8* buffer_ptr = nullptr;
|
|
||||||
u64 buffer_offset = 0;
|
|
||||||
u64 buffer_offset_base = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -16,34 +17,32 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
// TODO(Rodrigo): Fine tune this number
|
namespace {
|
||||||
constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
|
|
||||||
|
u64 GetAllocationChunkSize(u64 required_size) {
|
||||||
|
static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
|
||||||
|
auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
|
||||||
|
return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
class VKMemoryAllocation final {
|
class VKMemoryAllocation final {
|
||||||
public:
|
public:
|
||||||
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
|
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
|
||||||
vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
|
vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
|
||||||
: device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
|
: device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
|
||||||
shifted_type{ShiftType(type)}, is_mappable{properties &
|
shifted_type{ShiftType(type)} {}
|
||||||
vk::MemoryPropertyFlagBits::eHostVisible} {
|
|
||||||
if (is_mappable) {
|
|
||||||
const auto dev = device.GetLogical();
|
|
||||||
const auto& dld = device.GetDispatchLoader();
|
|
||||||
base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
~VKMemoryAllocation() {
|
~VKMemoryAllocation() {
|
||||||
const auto dev = device.GetLogical();
|
const auto dev = device.GetLogical();
|
||||||
const auto& dld = device.GetDispatchLoader();
|
const auto& dld = device.GetDispatchLoader();
|
||||||
if (is_mappable)
|
|
||||||
dev.unmapMemory(memory, dld);
|
|
||||||
dev.free(memory, nullptr, dld);
|
dev.free(memory, nullptr, dld);
|
||||||
}
|
}
|
||||||
|
|
||||||
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
|
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
|
||||||
auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
|
auto found = TryFindFreeSection(free_iterator, allocation_size,
|
||||||
static_cast<u64>(alignment));
|
static_cast<u64>(commit_size), static_cast<u64>(alignment));
|
||||||
if (!found) {
|
if (!found) {
|
||||||
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
|
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
|
||||||
static_cast<u64>(alignment));
|
static_cast<u64>(alignment));
|
||||||
|
@ -52,8 +51,7 @@ public:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
u8* address = is_mappable ? base_address + *found : nullptr;
|
auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
|
||||||
auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
|
|
||||||
*found + commit_size);
|
*found + commit_size);
|
||||||
commits.push_back(commit.get());
|
commits.push_back(commit.get());
|
||||||
|
|
||||||
|
@ -65,12 +63,10 @@ public:
|
||||||
|
|
||||||
void Free(const VKMemoryCommitImpl* commit) {
|
void Free(const VKMemoryCommitImpl* commit) {
|
||||||
ASSERT(commit);
|
ASSERT(commit);
|
||||||
const auto it =
|
|
||||||
std::find_if(commits.begin(), commits.end(),
|
const auto it = std::find(std::begin(commits), std::end(commits), commit);
|
||||||
[&](const auto& stored_commit) { return stored_commit == commit; });
|
|
||||||
if (it == commits.end()) {
|
if (it == commits.end()) {
|
||||||
LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
|
UNREACHABLE_MSG("Freeing unallocated commit!");
|
||||||
UNREACHABLE();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
commits.erase(it);
|
commits.erase(it);
|
||||||
|
@ -88,11 +84,11 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
|
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
|
||||||
/// requeriments.
|
/// requirements.
|
||||||
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
|
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
|
||||||
u64 iterator = start;
|
u64 iterator = Common::AlignUp(start, alignment);
|
||||||
while (iterator + size < end) {
|
while (iterator + size <= end) {
|
||||||
const u64 try_left = Common::AlignUp(iterator, alignment);
|
const u64 try_left = iterator;
|
||||||
const u64 try_right = try_left + size;
|
const u64 try_right = try_left + size;
|
||||||
|
|
||||||
bool overlap = false;
|
bool overlap = false;
|
||||||
|
@ -100,7 +96,7 @@ private:
|
||||||
const auto [commit_left, commit_right] = commit->interval;
|
const auto [commit_left, commit_right] = commit->interval;
|
||||||
if (try_left < commit_right && commit_left < try_right) {
|
if (try_left < commit_right && commit_left < try_right) {
|
||||||
// There's an overlap, continue the search where the overlapping commit ends.
|
// There's an overlap, continue the search where the overlapping commit ends.
|
||||||
iterator = commit_right;
|
iterator = Common::AlignUp(commit_right, alignment);
|
||||||
overlap = true;
|
overlap = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -110,6 +106,7 @@ private:
|
||||||
return try_left;
|
return try_left;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No free regions where found, return an empty optional.
|
// No free regions where found, return an empty optional.
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
@ -117,12 +114,8 @@ private:
|
||||||
const VKDevice& device; ///< Vulkan device.
|
const VKDevice& device; ///< Vulkan device.
|
||||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||||
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
|
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
|
||||||
const u64 alloc_size; ///< Size of this allocation.
|
const u64 allocation_size; ///< Size of this allocation.
|
||||||
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
|
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
|
||||||
const bool is_mappable; ///< Whether the allocation is mappable.
|
|
||||||
|
|
||||||
/// Base address of the mapped pointer.
|
|
||||||
u8* base_address{};
|
|
||||||
|
|
||||||
/// Hints where the next free region is likely going to be.
|
/// Hints where the next free region is likely going to be.
|
||||||
u64 free_iterator{};
|
u64 free_iterator{};
|
||||||
|
@ -132,13 +125,15 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
VKMemoryManager::VKMemoryManager(const VKDevice& device)
|
VKMemoryManager::VKMemoryManager(const VKDevice& device)
|
||||||
: device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
|
: device{device}, properties{device.GetPhysical().getMemoryProperties(
|
||||||
is_memory_unified{GetMemoryUnified(props)} {}
|
device.GetDispatchLoader())},
|
||||||
|
is_memory_unified{GetMemoryUnified(properties)} {}
|
||||||
|
|
||||||
VKMemoryManager::~VKMemoryManager() = default;
|
VKMemoryManager::~VKMemoryManager() = default;
|
||||||
|
|
||||||
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
|
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
|
||||||
ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
|
bool host_visible) {
|
||||||
|
const u64 chunk_size = GetAllocationChunkSize(requirements.size);
|
||||||
|
|
||||||
// When a host visible commit is asked, search for host visible and coherent, otherwise search
|
// When a host visible commit is asked, search for host visible and coherent, otherwise search
|
||||||
// for a fast device local type.
|
// for a fast device local type.
|
||||||
|
@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
|
||||||
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
|
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
|
||||||
: vk::MemoryPropertyFlagBits::eDeviceLocal;
|
: vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||||
|
|
||||||
const auto TryCommit = [&]() -> VKMemoryCommit {
|
if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
|
||||||
for (auto& alloc : allocs) {
|
|
||||||
if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
|
|
||||||
return commit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
};
|
|
||||||
|
|
||||||
if (auto commit = TryCommit(); commit) {
|
|
||||||
return commit;
|
return commit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Commit has failed, allocate more memory.
|
// Commit has failed, allocate more memory.
|
||||||
if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
|
if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
|
||||||
// TODO(Rodrigo): Try to use host memory.
|
// TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
|
||||||
LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
|
// Allocation has failed, panic.
|
||||||
UNREACHABLE();
|
UNREACHABLE_MSG("Ran out of VRAM!");
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
|
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
|
||||||
// there's a bug.
|
// there's a bug.
|
||||||
auto commit = TryCommit();
|
auto commit = TryAllocCommit(requirements, wanted_properties);
|
||||||
ASSERT(commit);
|
ASSERT(commit);
|
||||||
return commit;
|
return commit;
|
||||||
}
|
}
|
||||||
|
@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
|
||||||
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
|
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
|
||||||
const auto dev = device.GetLogical();
|
const auto dev = device.GetLogical();
|
||||||
const auto& dld = device.GetDispatchLoader();
|
const auto& dld = device.GetDispatchLoader();
|
||||||
const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
|
auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
|
||||||
auto commit = Commit(requeriments, host_visible);
|
|
||||||
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
|
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
|
||||||
return commit;
|
return commit;
|
||||||
}
|
}
|
||||||
|
@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
|
||||||
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
|
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
|
||||||
const auto dev = device.GetLogical();
|
const auto dev = device.GetLogical();
|
||||||
const auto& dld = device.GetDispatchLoader();
|
const auto& dld = device.GetDispatchLoader();
|
||||||
const auto requeriments = dev.getImageMemoryRequirements(image, dld);
|
auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
|
||||||
auto commit = Commit(requeriments, host_visible);
|
|
||||||
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
|
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
|
||||||
return commit;
|
return commit;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
|
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
|
||||||
u64 size) {
|
u64 size) {
|
||||||
const u32 type = [&]() {
|
const u32 type = [&] {
|
||||||
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
|
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
|
||||||
const auto flags = props.memoryTypes[type_index].propertyFlags;
|
const auto flags = properties.memoryTypes[type_index].propertyFlags;
|
||||||
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
|
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
|
||||||
// The type matches in type and in the wanted properties.
|
// The type matches in type and in the wanted properties.
|
||||||
return type_index;
|
return type_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
|
UNREACHABLE_MSG("Couldn't find a compatible memory type!");
|
||||||
UNREACHABLE();
|
return 0U;
|
||||||
return 0u;
|
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const auto dev = device.GetLogical();
|
const auto dev = device.GetLogical();
|
||||||
|
@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||||
// Try to allocate found type.
|
// Try to allocate found type.
|
||||||
const vk::MemoryAllocateInfo memory_ai(size, type);
|
const vk::MemoryAllocateInfo memory_ai(size, type);
|
||||||
vk::DeviceMemory memory;
|
vk::DeviceMemory memory;
|
||||||
if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
|
if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
|
||||||
res != vk::Result::eSuccess) {
|
res != vk::Result::eSuccess) {
|
||||||
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
|
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
allocs.push_back(
|
allocations.push_back(
|
||||||
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
|
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
|
VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
|
||||||
for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
|
vk::MemoryPropertyFlags wanted_properties) {
|
||||||
if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
|
for (auto& allocation : allocations) {
|
||||||
|
if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
|
||||||
|
return commit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*static*/ bool VKMemoryManager::GetMemoryUnified(
|
||||||
|
const vk::PhysicalDeviceMemoryProperties& properties) {
|
||||||
|
for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
|
||||||
|
if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
|
||||||
// Memory is considered unified when heaps are device local only.
|
// Memory is considered unified when heaps are device local only.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
|
VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
|
||||||
u8* data, u64 begin, u64 end)
|
vk::DeviceMemory memory, u64 begin, u64 end)
|
||||||
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
|
: device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
|
||||||
|
|
||||||
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
||||||
allocation->Free(this);
|
allocation->Free(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* VKMemoryCommitImpl::GetData() const {
|
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
|
||||||
ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
|
const auto dev = device.GetLogical();
|
||||||
return data;
|
const auto address = reinterpret_cast<u8*>(
|
||||||
|
dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
|
||||||
|
return MemoryMap{this, address};
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKMemoryCommitImpl::Unmap() const {
|
||||||
|
const auto dev = device.GetLogical();
|
||||||
|
dev.unmapMemory(memory, device.GetDispatchLoader());
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryMap VKMemoryCommitImpl::Map() const {
|
||||||
|
return Map(interval.second - interval.first);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
class MemoryMap;
|
||||||
class VKDevice;
|
class VKDevice;
|
||||||
class VKMemoryAllocation;
|
class VKMemoryAllocation;
|
||||||
class VKMemoryCommitImpl;
|
class VKMemoryCommitImpl;
|
||||||
|
@ -21,11 +22,12 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
|
||||||
class VKMemoryManager final {
|
class VKMemoryManager final {
|
||||||
public:
|
public:
|
||||||
explicit VKMemoryManager(const VKDevice& device);
|
explicit VKMemoryManager(const VKDevice& device);
|
||||||
|
VKMemoryManager(const VKMemoryManager&) = delete;
|
||||||
~VKMemoryManager();
|
~VKMemoryManager();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commits a memory with the specified requeriments.
|
* Commits a memory with the specified requeriments.
|
||||||
* @param reqs Requeriments returned from a Vulkan call.
|
* @param requirements Requirements returned from a Vulkan call.
|
||||||
* @param host_visible Signals the allocator that it *must* use host visible and coherent
|
* @param host_visible Signals the allocator that it *must* use host visible and coherent
|
||||||
* memory. When passing false, it will try to allocate device local memory.
|
* memory. When passing false, it will try to allocate device local memory.
|
||||||
* @returns A memory commit.
|
* @returns A memory commit.
|
||||||
|
@ -47,25 +49,35 @@ private:
|
||||||
/// Allocates a chunk of memory.
|
/// Allocates a chunk of memory.
|
||||||
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
|
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
|
||||||
|
|
||||||
|
/// Tries to allocate a memory commit.
|
||||||
|
VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
|
||||||
|
vk::MemoryPropertyFlags wanted_properties);
|
||||||
|
|
||||||
/// Returns true if the device uses an unified memory model.
|
/// Returns true if the device uses an unified memory model.
|
||||||
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
|
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
|
||||||
|
|
||||||
const VKDevice& device; ///< Device handler.
|
const VKDevice& device; ///< Device handler.
|
||||||
const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
|
const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||||
const bool is_memory_unified; ///< True if memory model is unified.
|
const bool is_memory_unified; ///< True if memory model is unified.
|
||||||
std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
|
std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
|
||||||
};
|
};
|
||||||
|
|
||||||
class VKMemoryCommitImpl final {
|
class VKMemoryCommitImpl final {
|
||||||
friend VKMemoryAllocation;
|
friend VKMemoryAllocation;
|
||||||
|
friend MemoryMap;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
|
explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
|
||||||
u64 begin, u64 end);
|
vk::DeviceMemory memory, u64 begin, u64 end);
|
||||||
~VKMemoryCommitImpl();
|
~VKMemoryCommitImpl();
|
||||||
|
|
||||||
/// Returns the writeable memory map. The commit has to be mappable.
|
/// Maps a memory region and returns a pointer to it.
|
||||||
u8* GetData() const;
|
/// It's illegal to have more than one memory map at the same time.
|
||||||
|
MemoryMap Map(u64 size, u64 offset = 0) const;
|
||||||
|
|
||||||
|
/// Maps the whole commit and returns a pointer to it.
|
||||||
|
/// It's illegal to have more than one memory map at the same time.
|
||||||
|
MemoryMap Map() const;
|
||||||
|
|
||||||
/// Returns the Vulkan memory handler.
|
/// Returns the Vulkan memory handler.
|
||||||
vk::DeviceMemory GetMemory() const {
|
vk::DeviceMemory GetMemory() const {
|
||||||
|
@ -78,10 +90,46 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Unmaps memory.
|
||||||
|
void Unmap() const;
|
||||||
|
|
||||||
|
const VKDevice& device; ///< Vulkan device.
|
||||||
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
|
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
|
||||||
vk::DeviceMemory memory; ///< Vulkan device memory handler.
|
vk::DeviceMemory memory; ///< Vulkan device memory handler.
|
||||||
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
|
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
|
||||||
u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
|
};
|
||||||
|
|
||||||
|
/// Holds ownership of a memory map.
|
||||||
|
class MemoryMap final {
|
||||||
|
public:
|
||||||
|
explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
|
||||||
|
: commit{commit}, address{address} {}
|
||||||
|
|
||||||
|
~MemoryMap() {
|
||||||
|
if (commit) {
|
||||||
|
commit->Unmap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prematurely releases the memory map.
|
||||||
|
void Release() {
|
||||||
|
commit->Unmap();
|
||||||
|
commit = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the address of the memory map.
|
||||||
|
u8* GetAddress() const {
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the address of the memory map;
|
||||||
|
operator u8*() const {
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
|
||||||
|
u8* address{}; ///< Address to the mapped memory.
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -3,86 +3,144 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_device.h"
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||||
|
|
||||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
|
||||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
|
std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
|
||||||
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
|
vk::MemoryPropertyFlags wanted) {
|
||||||
pipeline_stage} {
|
const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
|
||||||
CreateBuffers(memory_manager, usage);
|
for (u32 i = 0; i < properties.memoryTypeCount; i++) {
|
||||||
ReserveWatches(WATCHES_INITIAL_RESERVE);
|
if (!(filter & (1 << i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
|
||||||
|
vk::BufferUsageFlags usage)
|
||||||
|
: device{device}, scheduler{scheduler} {
|
||||||
|
CreateBuffers(usage);
|
||||||
|
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||||
|
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||||
|
|
||||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
|
std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
|
||||||
ASSERT(size <= buffer_size);
|
ASSERT(size <= STREAM_BUFFER_SIZE);
|
||||||
mapped_size = size;
|
mapped_size = size;
|
||||||
|
|
||||||
if (offset + size > buffer_size) {
|
if (alignment > 0) {
|
||||||
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
|
offset = Common::AlignUp(offset, alignment);
|
||||||
// reset the state.
|
}
|
||||||
invalidation_mark = used_watches;
|
|
||||||
used_watches = 0;
|
WaitPendingOperations(offset);
|
||||||
|
|
||||||
|
bool invalidated = false;
|
||||||
|
if (offset + size > STREAM_BUFFER_SIZE) {
|
||||||
|
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||||
|
invalidation_mark = current_watch_cursor;
|
||||||
|
current_watch_cursor = 0;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
|
||||||
|
|
||||||
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
|
// Swap watches and reset waiting cursors.
|
||||||
}
|
std::swap(previous_watches, current_watches);
|
||||||
|
wait_cursor = 0;
|
||||||
|
wait_bound = 0;
|
||||||
|
|
||||||
void VKStreamBuffer::Send(u64 size) {
|
// Ensure that we don't wait for uncommitted fences.
|
||||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
|
||||||
|
|
||||||
if (invalidation_mark) {
|
|
||||||
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
|
|
||||||
scheduler.Flush();
|
scheduler.Flush();
|
||||||
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
|
|
||||||
[&](auto& resource) { resource->Wait(); });
|
|
||||||
invalidation_mark = std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (used_watches + 1 >= watches.size()) {
|
invalidated = true;
|
||||||
// Ensure that there are enough watches.
|
|
||||||
ReserveWatches(WATCHES_RESERVE_CHUNK);
|
|
||||||
}
|
}
|
||||||
// Add a watch for this allocation.
|
|
||||||
watches[used_watches++]->Watch(scheduler.GetFence());
|
|
||||||
|
|
||||||
offset += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
|
|
||||||
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
|
|
||||||
nullptr);
|
|
||||||
|
|
||||||
const auto dev = device.GetLogical();
|
const auto dev = device.GetLogical();
|
||||||
const auto& dld = device.GetDispatchLoader();
|
const auto& dld = device.GetDispatchLoader();
|
||||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
|
||||||
commit = memory_manager.Commit(*buffer, true);
|
return {pointer, offset, invalidated};
|
||||||
mapped_pointer = commit->GetData();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
|
void VKStreamBuffer::Unmap(u64 size) {
|
||||||
const std::size_t previous_size = watches.size();
|
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||||
watches.resize(previous_size + grow_size);
|
|
||||||
std::generate(watches.begin() + previous_size, watches.end(),
|
const auto dev = device.GetLogical();
|
||||||
[]() { return std::make_unique<VKFenceWatch>(); });
|
dev.unmapMemory(*memory, device.GetDispatchLoader());
|
||||||
|
|
||||||
|
offset += size;
|
||||||
|
|
||||||
|
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||||
|
// Ensure that there are enough watches.
|
||||||
|
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||||
|
}
|
||||||
|
auto& watch = current_watches[current_watch_cursor++];
|
||||||
|
watch.upper_bound = offset;
|
||||||
|
watch.fence.Watch(scheduler.GetFence());
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
|
||||||
|
const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
|
||||||
|
0, nullptr);
|
||||||
|
const auto dev = device.GetLogical();
|
||||||
|
const auto& dld = device.GetDispatchLoader();
|
||||||
|
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||||
|
|
||||||
|
const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
|
||||||
|
// Prefer device local host visible allocations (this should hit AMD's pinned memory).
|
||||||
|
auto type = FindMemoryType(device, requirements.memoryTypeBits,
|
||||||
|
vk::MemoryPropertyFlagBits::eHostVisible |
|
||||||
|
vk::MemoryPropertyFlagBits::eHostCoherent |
|
||||||
|
vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||||
|
if (!type) {
|
||||||
|
// Otherwise search for a host visible allocation.
|
||||||
|
type = FindMemoryType(device, requirements.memoryTypeBits,
|
||||||
|
vk::MemoryPropertyFlagBits::eHostVisible |
|
||||||
|
vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||||
|
ASSERT_MSG(type, "No host visible and coherent memory type found");
|
||||||
|
}
|
||||||
|
const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
|
||||||
|
memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
|
||||||
|
|
||||||
|
dev.bindBufferMemory(*buffer, *memory, 0, dld);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||||
|
watches.resize(watches.size() + grow_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||||
|
if (!invalidation_mark) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
|
||||||
|
auto& watch = previous_watches[wait_cursor];
|
||||||
|
wait_bound = watch.upper_bound;
|
||||||
|
watch.fence.Wait();
|
||||||
|
++wait_cursor;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -4,28 +4,24 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
class VKDevice;
|
class VKDevice;
|
||||||
class VKFence;
|
class VKFence;
|
||||||
class VKFenceWatch;
|
class VKFenceWatch;
|
||||||
class VKResourceManager;
|
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
|
|
||||||
class VKStreamBuffer {
|
class VKStreamBuffer final {
|
||||||
public:
|
public:
|
||||||
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
|
||||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
vk::BufferUsageFlags usage);
|
||||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
|
|
||||||
~VKStreamBuffer();
|
~VKStreamBuffer();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -34,39 +30,47 @@ public:
|
||||||
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
||||||
* offset and a boolean that's true when buffer has been invalidated.
|
* offset and a boolean that's true when buffer has been invalidated.
|
||||||
*/
|
*/
|
||||||
std::tuple<u8*, u64, bool> Reserve(u64 size);
|
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||||
|
|
||||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||||
void Send(u64 size);
|
void Unmap(u64 size);
|
||||||
|
|
||||||
vk::Buffer GetBuffer() const {
|
vk::Buffer GetHandle() const {
|
||||||
return *buffer;
|
return *buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct Watch final {
|
||||||
|
VKFenceWatch fence;
|
||||||
|
u64 upper_bound{};
|
||||||
|
};
|
||||||
|
|
||||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||||
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
|
void CreateBuffers(vk::BufferUsageFlags usage);
|
||||||
|
|
||||||
/// Increases the amount of watches available.
|
/// Increases the amount of watches available.
|
||||||
void ReserveWatches(std::size_t grow_size);
|
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||||
|
|
||||||
|
void WaitPendingOperations(u64 requested_upper_bound);
|
||||||
|
|
||||||
const VKDevice& device; ///< Vulkan device manager.
|
const VKDevice& device; ///< Vulkan device manager.
|
||||||
VKScheduler& scheduler; ///< Command scheduler.
|
VKScheduler& scheduler; ///< Command scheduler.
|
||||||
const u64 buffer_size; ///< Total size of the stream buffer.
|
|
||||||
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
||||||
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
||||||
|
|
||||||
UniqueBuffer buffer; ///< Mapped buffer.
|
UniqueBuffer buffer; ///< Mapped buffer.
|
||||||
VKMemoryCommit commit; ///< Memory commit.
|
UniqueDeviceMemory memory; ///< Memory allocation.
|
||||||
u8* mapped_pointer{}; ///< Pointer to the host visible commit
|
|
||||||
|
|
||||||
u64 offset{}; ///< Buffer iterator.
|
u64 offset{}; ///< Buffer iterator.
|
||||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||||
|
|
||||||
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
|
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||||
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
|
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||||
std::optional<std::size_t>
|
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||||
invalidation_mark{}; ///< Number of watches used in the current invalidation.
|
|
||||||
|
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||||
|
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||||
|
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
Loading…
Reference in a new issue