Query Cache: address issues

This commit is contained in:
Fernando Sahmkow 2023-08-06 09:38:16 +02:00
parent aa6587d854
commit 282ae8fa51
21 changed files with 270 additions and 214 deletions

View file

@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
} }
template <class P> template <class P>
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
ObtainBufferSynchronize sync_info, VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
ObtainBufferOperation post_op) {
const BufferId buffer_id = FindBuffer(cpu_addr, size); const BufferId buffer_id = FindBuffer(cpu_addr, size);
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];

View file

@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() {
case Regs::ClearReport::ZPassPixelCount: case Regs::ClearReport::ZPassPixelCount:
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
break; break;
case Regs::ClearReport::PrimitivesGenerated:
rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
break;
case Regs::ClearReport::VtgPrimitivesOut:
rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
break;
default: default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
break; break;

View file

@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
if (op == GpuSemaphoreOperation::WriteLong) { if (op == GpuSemaphoreOperation::WriteLong) {
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_sequence; const u32 payload = regs.semaphore_sequence;
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
} else { } else {
do { do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
void Puller::ProcessSemaphoreRelease() { void Puller::ProcessSemaphoreRelease() {
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_release; const u32 payload = regs.semaphore_release;
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
} }
void Puller::ProcessSemaphoreAcquire() { void Puller::ProcessSemaphoreAcquire() {

View file

@ -55,6 +55,9 @@ public:
// Unlike other fences, this one doesn't // Unlike other fences, this one doesn't
void SignalOrdering() { void SignalOrdering() {
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
}
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.AccumulateFlushes(); buffer_cache.AccumulateFlushes();
} }
@ -104,13 +107,9 @@ public:
SignalFence(std::move(func)); SignalFence(std::move(func));
} }
void WaitPendingFences(bool force) { void WaitPendingFences([[maybe_unused]] bool force) {
if constexpr (!can_async_check) { if constexpr (!can_async_check) {
if (force) {
TryReleasePendingFences<true>(); TryReleasePendingFences<true>();
} else {
TryReleasePendingFences<false>();
}
} else { } else {
if (!force) { if (!force) {
return; return;
@ -125,7 +124,8 @@ public:
}); });
SignalFence(std::move(func)); SignalFence(std::move(func));
std::unique_lock lk(wait_mutex); std::unique_lock lk(wait_mutex);
wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); wait_cv.wait(
lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
} }
} }

View file

@ -7,21 +7,19 @@
#include <deque> #include <deque>
#include <utility> #include <utility>
#include "common/common_types.h" #include "common/common_types.h"
namespace VideoCommon { namespace VideoCommon {
class BankBase { class BankBase {
protected: protected:
const size_t base_bank_size; const size_t base_bank_size{};
size_t bank_size; size_t bank_size{};
std::atomic<size_t> references; std::atomic<size_t> references{};
size_t current_slot; size_t current_slot{};
public: public:
BankBase(size_t bank_size_) explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {}
: base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
virtual ~BankBase() = default; virtual ~BankBase() = default;
@ -58,11 +56,11 @@ public:
bank_size = current_slot; bank_size = current_slot;
} }
constexpr bool IsClosed() { bool IsClosed() const {
return current_slot >= bank_size; return current_slot >= bank_size;
} }
bool IsDead() { bool IsDead() const {
return IsClosed() && references == 0; return IsClosed() && references == 0;
} }
}; };

View file

@ -9,7 +9,7 @@
namespace VideoCommon { namespace VideoCommon {
enum class QueryFlagBits : u32 { enum class QueryFlagBits : u32 {
HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
@ -24,13 +24,13 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
class QueryBase { class QueryBase {
public: public:
VAddr guest_address; VAddr guest_address{};
QueryFlagBits flags; QueryFlagBits flags{};
u64 value; u64 value{};
protected: protected:
// Default constructor // Default constructor
QueryBase() : guest_address(0), flags{}, value{} {} QueryBase() = default;
// Parameterized constructor // Parameterized constructor
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
@ -51,23 +51,21 @@ public:
class HostQueryBase : public QueryBase { class HostQueryBase : public QueryBase {
public: public:
// Default constructor // Default constructor
HostQueryBase() HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {}
: QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
size_banks{}, start_slot{}, size_slots{} {}
// Parameterized constructor // Parameterized constructor
HostQueryBase(bool isLong, VAddr address) HostQueryBase(bool has_timestamp, VAddr address)
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
start_slot{}, size_slots{} { start_slot{}, size_slots{} {
if (isLong) { if (has_timestamp) {
flags |= QueryFlagBits::HasTimestamp; flags |= QueryFlagBits::HasTimestamp;
} }
} }
u32 start_bank_id; u32 start_bank_id{};
u32 size_banks; u32 size_banks{};
size_t start_slot; size_t start_slot{};
size_t size_slots; size_t size_slots{};
}; };
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -54,7 +54,7 @@ public:
return new_id; return new_id;
} }
bool HasPendingSync() override { bool HasPendingSync() const override {
return !pending_sync.empty(); return !pending_sync.empty();
} }
@ -71,8 +71,10 @@ public:
continue; continue;
} }
query.flags |= QueryFlagBits::IsHostSynced; query.flags |= QueryFlagBits::IsHostSynced;
sync_values.emplace_back(query.guest_address, query.value, sync_values.emplace_back(SyncValuesStruct{
True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); .address = query.guest_address,
.value = query.value,
.size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)});
} }
pending_sync.clear(); pending_sync.clear();
if (sync_values.size() > 0) { if (sync_values.size() > 0) {
@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> {
public: public:
using RuntimeType = typename Traits::RuntimeType; using RuntimeType = typename Traits::RuntimeType;
StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_)
: GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {}
~StubStreamer() override = default; ~StubStreamer() override = default;
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
std::optional<u32> subreport = std::nullopt) override { std::optional<u32> subreport = std::nullopt) override {
size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); size_t new_id =
GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport);
return new_id; return new_id;
} }
private:
u32 stub_value;
}; };
template <typename Traits> template <typename Traits>
@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
if (streamers[i]) { if (streamers[i]) {
streamer_mask |= 1ULL << i; streamer_mask |= 1ULL << streamers[i]->GetId();
} }
} }
} }
@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBase<Traits>* owner; QueryCacheBase<Traits>* owner;
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory; Core::Memory::Memory& cpu_memory;
Traits::RuntimeType& runtime; RuntimeType& runtime;
Tegra::GPU& gpu; Tegra::GPU& gpu;
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
u64 streamer_mask; u64 streamer_mask;
@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
size_t streamer_id = static_cast<size_t>(counter_type); size_t streamer_id = static_cast<size_t>(counter_type);
auto* streamer = impl->streamers[streamer_id]; auto* streamer = impl->streamers[streamer_id];
if (!streamer) [[unlikely]] { if (streamer == nullptr) [[unlikely]] {
if (has_timestamp) { counter_type = QueryType::Payload;
u64 timestamp = impl->gpu.GetTicks(); payload = 1U;
gpu_memory->Write<u64>(addr + 8, timestamp); streamer_id = static_cast<size_t>(counter_type);
gpu_memory->Write<u64>(addr, 1ULL); streamer = impl->streamers[streamer_id];
} else {
gpu_memory->Write<u32>(addr, 1U);
}
return;
} }
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
if (!cpu_addr_opt) [[unlikely]] { if (!cpu_addr_opt) [[unlikely]] {
@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
impl->runtime.EndHostConditionalRendering(); impl->runtime.EndHostConditionalRendering();
return false; return false;
} }
/*if (!Settings::IsGPULevelHigh()) {
impl->runtime.EndHostConditionalRendering();
return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
VideoCommon::CacheType::BufferCache |
VideoCommon::CacheType::QueryCache);
}*/
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
const GPUVAddr address = regs.render_enable.Address(); const GPUVAddr address = regs.render_enable.Address();
switch (mode) { switch (mode) {
@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
// Async downloads // Async downloads
template <typename Traits> template <typename Traits>
void QueryCacheBase<Traits>::CommitAsyncFlushes() { void QueryCacheBase<Traits>::CommitAsyncFlushes() {
// Make sure to have the results synced in Host.
NotifyWFI();
u64 mask{}; u64 mask{};
{ {
std::scoped_lock lk(impl->flush_guard); std::scoped_lock lk(impl->flush_guard);
@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() {
if (mask == 0) { if (mask == 0) {
return; return;
} }
impl->ForEachStreamerIn(mask, u64 ran_mask = ~mask;
[](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); while (mask) {
impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
u64 dep_mask = streamer->GetDependentMask();
if ((dep_mask & ~ran_mask) != 0) {
return;
}
u64 index = streamer->GetId();
ran_mask |= (1ULL << index);
mask &= ~(1ULL << index);
streamer->PushUnsyncedQueries();
});
}
} }
template <typename Traits> template <typename Traits>
@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
if (mask == 0) { if (mask == 0) {
return; return;
} }
u64 ran_mask = 0; u64 ran_mask = ~mask;
u64 next_phase = 0;
while (mask) { while (mask) {
impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
u64 dep_mask = streamer->GetDependenceMask(); u64 dep_mask = streamer->GetDependenceMask();
if ((dep_mask & ~ran_mask) != 0) { if ((dep_mask & ~ran_mask) != 0) {
next_phase |= dep_mask;
return; return;
} }
u64 index = streamer->GetId(); u64 index = streamer->GetId();
@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
mask &= ~(1ULL << index); mask &= ~(1ULL << index);
streamer->PopUnsyncedQueries(); streamer->PopUnsyncedQueries();
}); });
ran_mask |= next_phase;
} }
} }

View file

@ -47,7 +47,7 @@ public:
BitField<0, 27, u32> query_id; BitField<0, 27, u32> query_id;
u32 raw; u32 raw;
std::pair<size_t, size_t> unpack() { std::pair<size_t, size_t> unpack() const {
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
} }
}; };
@ -73,7 +73,7 @@ public:
} }
} }
static u64 BuildMask(std::span<QueryType> types) { static u64 BuildMask(std::span<const QueryType> types) {
u64 mask = 0; u64 mask = 0;
for (auto query_type : types) { for (auto query_type : types) {
mask |= 1ULL << (static_cast<u64>(query_type)); mask |= 1ULL << (static_cast<u64>(query_type));
@ -160,7 +160,7 @@ protected:
} }
} }
using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
void InvalidateQuery(QueryLocation location); void InvalidateQuery(QueryLocation location);
bool IsQueryDirty(QueryLocation location); bool IsQueryDirty(QueryLocation location);
@ -175,7 +175,7 @@ protected:
friend struct QueryCacheBaseImpl; friend struct QueryCacheBaseImpl;
friend RuntimeType; friend RuntimeType;
std::unique_ptr<struct QueryCacheBaseImpl> impl; std::unique_ptr<QueryCacheBaseImpl> impl;
}; };
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -16,7 +16,7 @@ namespace VideoCommon {
class StreamerInterface { class StreamerInterface {
public: public:
StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {}
virtual ~StreamerInterface() = default; virtual ~StreamerInterface() = default;
virtual QueryBase* GetQuery(size_t id) = 0; virtual QueryBase* GetQuery(size_t id) = 0;
@ -37,7 +37,7 @@ public:
/* Do Nothing */ /* Do Nothing */
} }
virtual bool HasPendingSync() { virtual bool HasPendingSync() const {
return false; return false;
} }
@ -52,7 +52,7 @@ public:
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport = std::nullopt) = 0; std::optional<u32> subreport = std::nullopt) = 0;
virtual bool HasUnsyncedQueries() { virtual bool HasUnsyncedQueries() const {
return false; return false;
} }
@ -71,18 +71,28 @@ public:
} }
u64 GetDependenceMask() const { u64 GetDependenceMask() const {
return dependance_mask; return dependence_mask;
}
u64 GetDependentMask() const {
return dependence_mask;
} }
protected: protected:
void MakeDependent(StreamerInterface* depend_on) {
dependence_mask |= 1ULL << depend_on->id;
depend_on->dependent_mask |= 1ULL << id;
}
const size_t id; const size_t id;
const u64 dependance_mask; u64 dependence_mask;
u64 dependent_mask;
}; };
template <typename QueryType> template <typename QueryType>
class SimpleStreamer : public StreamerInterface { class SimpleStreamer : public StreamerInterface {
public: public:
SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
virtual ~SimpleStreamer() = default; virtual ~SimpleStreamer() = default;
protected: protected:

View file

@ -9,10 +9,10 @@
#include <utility> #include <utility>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "video_core/query_cache/types.h"
#include "video_core/cache_types.h" #include "video_core/cache_types.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/query_cache/types.h"
#include "video_core/rasterizer_download_area.h" #include "video_core/rasterizer_download_area.h"
namespace Tegra { namespace Tegra {
@ -57,7 +57,8 @@ public:
virtual void ResetCounter(VideoCommon::QueryType type) = 0; virtual void ResetCounter(VideoCommon::QueryType type) = 0;
/// Records a GPU query and caches it /// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
/// Signal an uniform buffer binding /// Signal an uniform buffer binding
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,

View file

@ -43,7 +43,8 @@ public:
void Clear(u32 layer_count) override; void Clear(u32 layer_count) override;
void DispatchCompute() override; void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override; void ResetCounter(VideoCommon::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;

View file

@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
if (type == VideoCommon::QueryType::ZPassPixelCount64) { if (type == VideoCommon::QueryType::ZPassPixelCount64) {
std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)
? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt };
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
} else { } else {
@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
} }
return; return;
} }
if (type != VideoCommon::QueryType::Payload) {
payload = 1u;
}
std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() {
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
u64 ticks = gpu.GetTicks(); u64 ticks = gpu.GetTicks();
gpu_memory->Write<u64>(gpu_addr + 8, ticks); memory_manager->Write<u64>(gpu_addr + 8, ticks);
gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload));
} else { } else {
gpu_memory->Write<u32>(gpu_addr, payload); memory_manager->Write<u32>(gpu_addr, payload);
} }
});
if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) {
SignalFence(std::move(func));
return;
}
func();
} }
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,

View file

@ -87,7 +87,8 @@ public:
void Clear(u32 layer_count) override; void Clear(u32 layer_count) override;
void DispatchCompute() override; void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override; void ResetCounter(VideoCommon::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;

View file

@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
return {staging.buffer, staging.offset}; return {staging.buffer, staging.offset};
} }
ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
Scheduler& scheduler_, const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
RESOLVE_CONDITIONAL_RENDER_COMP_SPV), RESOLVE_CONDITIONAL_RENDER_COMP_SPV),

View file

@ -7,8 +7,8 @@
#include "video_core/fence_manager.h" #include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
namespace Core { namespace Core {
class System; class System;

View file

@ -11,11 +11,9 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_set.hpp>
#include "common/common_types.h" #include "common/common_types.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/query_cache/query_cache.h" #include "video_core/query_cache/query_cache.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pass.h"
@ -30,6 +28,7 @@
namespace Vulkan { namespace Vulkan {
using Tegra::Engines::Maxwell3D;
using VideoCommon::QueryType; using VideoCommon::QueryType;
namespace { namespace {
@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase {
public: public:
static constexpr size_t BANK_SIZE = 256; static constexpr size_t BANK_SIZE = 256;
static constexpr size_t QUERY_SIZE = 8; static constexpr size_t QUERY_SIZE = 8;
SamplesQueryBank(const Device& device_, size_t index_) explicit SamplesQueryBank(const Device& device_, size_t index_)
: BankBase(BANK_SIZE), device{device_}, index{index_} { : BankBase(BANK_SIZE), device{device_}, index{index_} {
const auto& dev = device.GetLogical(); const auto& dev = device.GetLogical();
query_pool = dev.CreateQueryPool({ query_pool = dev.CreateQueryPool({
@ -109,18 +108,19 @@ struct HostSyncValues {
static constexpr bool GeneratesBaseBuffer = false; static constexpr bool GeneratesBaseBuffer = false;
}; };
template <typename Traits>
class SamplesStreamer : public BaseStreamer { class SamplesStreamer : public BaseStreamer {
public: public:
SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
: BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_} { memory_allocator{memory_allocator_} {
BuildResolveBuffer(); BuildResolveBuffer();
current_bank = nullptr; current_bank = nullptr;
current_query = nullptr; current_query = nullptr;
} }
~SamplesStreamer() = default;
void StartCounter() override { void StartCounter() override {
if (has_started) { if (has_started) {
return; return;
@ -157,7 +157,7 @@ public:
PauseCounter(); PauseCounter();
} }
bool HasPendingSync() override { bool HasPendingSync() const override {
return !pending_sync.empty(); return !pending_sync.empty();
} }
@ -198,7 +198,7 @@ public:
} }
resolve_slots_remaining = resolve_slots; resolve_slots_remaining = resolve_slots;
sync_values_stash.emplace_back(); sync_values_stash.emplace_back();
sync_values = sync_values = &sync_values_stash.back(); sync_values = &sync_values_stash.back();
sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
} }
resolve_slots_remaining--; resolve_slots_remaining--;
@ -207,6 +207,7 @@ public:
const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
(resolve_slots - resolve_slots_remaining - 1); (resolve_slots - resolve_slots_remaining - 1);
VkQueryPool query_pool = bank->GetInnerPool(); VkQueryPool query_pool = bank->GetInnerPool();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([start, amount, base_offset, query_pool, scheduler.Record([start, amount, base_offset, query_pool,
buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
@ -284,7 +285,7 @@ public:
return index; return index;
} }
bool HasUnsyncedQueries() override { bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty(); return !pending_flush_queries.empty();
} }
@ -348,8 +349,8 @@ private:
for (auto q : queries) { for (auto q : queries) {
auto* query = GetQuery(q); auto* query = GetQuery(q);
ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
auto id = bank->GetIndex(); auto id_ = bank->GetIndex();
auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::min()); std::numeric_limits<size_t>::min());
auto& current_pair = pair.first->second; auto& current_pair = pair.first->second;
current_pair.first = std::min(current_pair.first, start); current_pair.first = std::min(current_pair.first, start);
@ -434,13 +435,14 @@ private:
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0, .queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,
}; };
resolve_buffers.emplace_back( resolve_buffers.emplace_back(
std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
} }
static constexpr size_t resolve_slots = 8; static constexpr size_t resolve_slots = 8;
@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase {
public: public:
static constexpr size_t BANK_SIZE = 1024; static constexpr size_t BANK_SIZE = 1024;
static constexpr size_t QUERY_SIZE = 4; static constexpr size_t QUERY_SIZE = 4;
TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator,
size_t index_)
: BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
const VkBufferCreateInfo buffer_ci = { const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@ -525,22 +528,21 @@ private:
vk::Buffer buffer; vk::Buffer buffer;
}; };
template <typename Traits>
class PrimitivesSucceededStreamer; class PrimitivesSucceededStreamer;
template <typename Traits>
class TFBCounterStreamer : public BaseStreamer { class TFBCounterStreamer : public BaseStreamer {
public: public:
TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_pool_) StagingBufferPool& staging_pool_)
: BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
buffers_count = 0; buffers_count = 0;
current_bank = nullptr; current_bank = nullptr;
counter_buffers.fill(VK_NULL_HANDLE); counter_buffers.fill(VK_NULL_HANDLE);
offsets.fill(0); offsets.fill(0);
last_queries.fill(0); last_queries.fill(0);
last_queries_stride.fill(1);
const VkBufferCreateInfo buffer_ci = { const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
@ -564,6 +566,8 @@ public:
} }
} }
~TFBCounterStreamer() = default;
void StartCounter() override { void StartCounter() override {
FlushBeginTFB(); FlushBeginTFB();
has_started = true; has_started = true;
@ -581,15 +585,15 @@ public:
if (has_flushed_end_pending) { if (has_flushed_end_pending) {
FlushEndTFB(); FlushEndTFB();
} }
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
if (regs.transform_feedback_enabled == 0) { if (maxwell3d.regs.transform_feedback_enabled == 0) {
streams_mask = 0; streams_mask = 0;
has_started = false; has_started = false;
} }
}); });
} }
bool HasPendingSync() override { bool HasPendingSync() const override {
return !pending_sync.empty(); return !pending_sync.empty();
} }
@ -650,14 +654,19 @@ public:
return index; return index;
} }
std::optional<VAddr> GetLastQueryStream(size_t stream) { std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) {
if (last_queries[stream] != 0) { if (last_queries[stream] != 0) {
return {last_queries[stream]}; std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
return result;
} }
return std::nullopt; return std::nullopt;
} }
bool HasUnsyncedQueries() override { Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const {
return out_topology;
}
bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty(); return !pending_flush_queries.empty();
} }
@ -762,15 +771,17 @@ private:
void UpdateBuffers() { void UpdateBuffers() {
last_queries.fill(0); last_queries.fill(0);
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { last_queries_stride.fill(1);
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
buffers_count = 0; buffers_count = 0;
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; out_topology = maxwell3d.draw_manager->GetDrawState().topology;
i++) { for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
const auto& tf = regs.transform_feedback; const auto& tf = maxwell3d.regs.transform_feedback;
if (tf.buffers[i].enable == 0) { if (tf.buffers[i].enable == 0) {
continue; continue;
} }
const size_t stream = tf.controls[i].stream; const size_t stream = tf.controls[i].stream;
last_queries_stride[stream] = tf.controls[i].stride;
streams_mask |= 1ULL << stream; streams_mask |= 1ULL << stream;
buffers_count = std::max<size_t>(buffers_count, stream + 1); buffers_count = std::max<size_t>(buffers_count, stream + 1);
} }
@ -785,7 +796,8 @@ private:
}); });
current_bank = &bank_pool.GetBank(current_bank_id); current_bank = &bank_pool.GetBank(current_bank_id);
} }
auto [dont_care, slot] = current_bank->Reserve(); auto [dont_care, other] = current_bank->Reserve();
const size_t slot = other; // workaround to compile bug.
current_bank->AddReference(); current_bank->AddReference();
static constexpr VkMemoryBarrier READ_BARRIER{ static constexpr VkMemoryBarrier READ_BARRIER{
@ -818,11 +830,9 @@ private:
return {current_bank_id, slot}; return {current_bank_id, slot};
} }
template <typename Traits>
friend class PrimitivesSucceededStreamer; friend class PrimitivesSucceededStreamer;
static constexpr size_t NUM_STREAMS = 4; static constexpr size_t NUM_STREAMS = 4;
static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
QueryCacheRuntime& runtime; QueryCacheRuntime& runtime;
const Device& device; const Device& device;
@ -851,6 +861,8 @@ private:
std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
std::array<VkDeviceSize, NUM_STREAMS> offsets{}; std::array<VkDeviceSize, NUM_STREAMS> offsets{};
std::array<VAddr, NUM_STREAMS> last_queries; std::array<VAddr, NUM_STREAMS> last_queries;
std::array<size_t, NUM_STREAMS> last_queries_stride;
Maxwell3D::Regs::PrimitiveTopology out_topology;
u64 streams_mask; u64 streams_mask;
}; };
@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase {
public: public:
// Default constructor // Default constructor
PrimitivesQueryBase() PrimitivesQueryBase()
: VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
dependant_index{}, dependant_manage{} {}
// Parameterized constructor // Parameterized constructor
PrimitivesQueryBase(bool is_long, VAddr address) PrimitivesQueryBase(bool has_timestamp, VAddr address)
: VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
dependant_index{}, dependant_manage{} { if (has_timestamp) {
if (is_long) {
flags |= VideoCommon::QueryFlagBits::HasTimestamp; flags |= VideoCommon::QueryFlagBits::HasTimestamp;
} }
} }
u64 stride; u64 stride{};
VAddr dependant_address; VAddr dependant_address{};
size_t dependant_index; Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
bool dependant_manage; size_t dependant_index{};
bool dependant_manage{};
}; };
template <typename Traits>
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
public: public:
PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_) TFBCounterStreamer& tfb_streamer_,
: VideoCommon::SimpleStreamer<PrimitivesQueryBase>( Core::Memory::Memory& cpu_memory_)
id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)), : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {
MakeDependent(&tfb_streamer);
}
~PrimitivesSucceededStreamer() = default;
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport_) override { std::optional<u32> subreport_) override {
@ -901,8 +915,11 @@ public:
const size_t subreport = static_cast<size_t>(*subreport_); const size_t subreport = static_cast<size_t>(*subreport_);
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
bool must_manage_dependance = false; bool must_manage_dependance = false;
new_query->topology = tfb_streamer.GetOutputTopology();
if (dependant_address_opt) { if (dependant_address_opt) {
new_query->dependant_address = *dependant_address_opt; auto [dep_address, stride] = *dependant_address_opt;
new_query->dependant_address = dep_address;
new_query->stride = stride;
} else { } else {
new_query->dependant_index = new_query->dependant_index =
tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
@ -917,13 +934,13 @@ public:
} }
return index; return index;
} }
new_query->stride = 1;
runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) {
for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
const auto& tf = maxwell3d.regs.transform_feedback;
if (tf.buffers[i].enable == 0) {
continue;
} }
new_query->dependant_manage = must_manage_dependance;
runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
i++) {
const auto& tf = regs.transform_feedback;
if (tf.controls[i].stream != subreport) { if (tf.controls[i].stream != subreport) {
continue; continue;
} }
@ -931,11 +948,14 @@ public:
break; break;
} }
}); });
}
new_query->dependant_manage = must_manage_dependance;
pending_flush_queries.push_back(index); pending_flush_queries.push_back(index);
return index; return index;
} }
bool HasUnsyncedQueries() override { bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty(); return !pending_flush_queries.empty();
} }
@ -960,22 +980,49 @@ public:
} }
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
u64 num_vertices = 0;
if (query->dependant_manage) { if (query->dependant_manage) {
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
query->value = dependant_query->value / query->stride; num_vertices = dependant_query->value / query->stride;
tfb_streamer.Free(query->dependant_index); tfb_streamer.Free(query->dependant_index);
} else { } else {
u8* pointer = cpu_memory.GetPointer(query->dependant_address); u8* pointer = cpu_memory.GetPointer(query->dependant_address);
u32 result; u32 result;
std::memcpy(&result, pointer, sizeof(u32)); std::memcpy(&result, pointer, sizeof(u32));
query->value = static_cast<u64>(result) / query->stride; num_vertices = static_cast<u64>(result) / query->stride;
} }
query->value = [&]() -> u64 {
switch (query->topology) {
case Maxwell3D::Regs::PrimitiveTopology::Points:
return num_vertices;
case Maxwell3D::Regs::PrimitiveTopology::Lines:
return num_vertices / 2;
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
return (num_vertices / 2) + 1;
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
return num_vertices - 1;
case Maxwell3D::Regs::PrimitiveTopology::Patches:
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
return num_vertices / 3;
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return num_vertices - 2;
case Maxwell3D::Regs::PrimitiveTopology::Quads:
return num_vertices / 4;
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
return 1U;
default:
return num_vertices;
}
}();
} }
} }
private: private:
QueryCacheRuntime& runtime; QueryCacheRuntime& runtime;
TFBCounterStreamer<QueryCacheParams>& tfb_streamer; TFBCounterStreamer& tfb_streamer;
Core::Memory::Memory& cpu_memory; Core::Memory::Memory& cpu_memory;
// syncing queue // syncing queue
@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl {
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool), scheduler, memory_allocator, staging_pool),
primitives_succeeded_streamer( primitives_succeeded_streamer(
static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
cpu_memory_),
primitives_needed_minus_suceeded_streamer(
static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
hcr_setup{}, hcr_is_set{}, is_hcr_running{} { hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl {
// Streamers // Streamers
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
SamplesStreamer<QueryCacheParams> sample_streamer; SamplesStreamer sample_streamer;
TFBCounterStreamer<QueryCacheParams> tfb_streamer; TFBCounterStreamer tfb_streamer;
PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer; PrimitivesSucceededStreamer primitives_succeeded_streamer;
VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer;
std::vector<std::pair<VAddr, VAddr>> little_cache; std::vector<std::pair<VAddr, VAddr>> little_cache;
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl {
bool is_hcr_running; bool is_hcr_running;
// maxwell3d // maxwell3d
Tegra::Engines::Maxwell3D* maxwell3d; Maxwell3D* maxwell3d;
}; };
QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
staging_pool_, compute_pass_descriptor_queue, descriptor_pool); staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
} }
void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) {
impl->maxwell3d = maxwell3d; impl->maxwell3d = maxwell3d;
} }
template <typename Func> template <typename Func>
void QueryCacheRuntime::View3DRegs(Func&& func) { void QueryCacheRuntime::View3DRegs(Func&& func) {
func(impl->maxwell3d->regs); func(*impl->maxwell3d);
} }
void QueryCacheRuntime::EndHostConditionalRendering() { void QueryCacheRuntime::EndHostConditionalRendering() {
@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
return &impl->sample_streamer; return &impl->sample_streamer;
case QueryType::StreamingByteCount: case QueryType::StreamingByteCount:
return &impl->tfb_streamer; return &impl->tfb_streamer;
case QueryType::StreamingPrimitivesNeeded:
case QueryType::VtgPrimitivesOut:
case QueryType::StreamingPrimitivesSucceeded: case QueryType::StreamingPrimitivesSucceeded:
return &impl->primitives_succeeded_streamer; return &impl->primitives_succeeded_streamer;
case QueryType::StreamingPrimitivesNeededMinusSucceeded:
return &impl->primitives_needed_minus_suceeded_streamer;
default: default:
return nullptr; return nullptr;
} }

View file

@ -49,7 +49,8 @@ public:
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); VideoCommon::LookupData object_2, bool qc_dirty,
bool equal_check);
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
@ -66,7 +67,7 @@ private:
}; };
struct QueryCacheParams { struct QueryCacheParams {
using RuntimeType = Vulkan::QueryCacheRuntime; using RuntimeType = typename Vulkan::QueryCacheRuntime;
}; };
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;

View file

@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
query_cache.NotifySegment(true); query_cache.NotifySegment(true);
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache.UpdateCounters();
}
#else
// query_cache.UpdateCounters();
#endif
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) { if (!pipeline) {
return; return;
@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() {
query_cache.NotifySegment(true); query_cache.NotifySegment(true);
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache.UpdateCounters();
}
#else
// query_cache.UpdateCounters();
#endif
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork(); FlushWork();
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache.UpdateCounters();
}
#else
// query_cache.UpdateCounters();
#endif
query_cache.NotifySegment(true); query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable); maxwell3d->regs.zpass_pixel_count_enable);

View file

@ -85,7 +85,8 @@ public:
void Clear(u32 layer_count) override; void Clear(u32 layer_count) override;
void DispatchCompute() override; void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override; void ResetCounter(VideoCommon::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;

View file

@ -15,9 +15,13 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCommon {
template <typename Trait>
class QueryCacheBase;
}
namespace Vulkan { namespace Vulkan {
class CommandPool; class CommandPool;
@ -26,6 +30,8 @@ class Framebuffer;
class GraphicsPipeline; class GraphicsPipeline;
class StateTracker; class StateTracker;
struct QueryCacheParams;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do /// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers. /// OpenGL-like operations on Vulkan command buffers.
class Scheduler { class Scheduler {
@ -63,7 +69,7 @@ public:
void InvalidateState(); void InvalidateState();
/// Assigns the query cache. /// Assigns the query cache.
void SetQueryCache(QueryCache& query_cache_) { void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) {
query_cache = &query_cache_; query_cache = &query_cache_;
} }
@ -219,7 +225,7 @@ private:
std::unique_ptr<MasterSemaphore> master_semaphore; std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool; std::unique_ptr<CommandPool> command_pool;
QueryCache* query_cache = nullptr; VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;