mirror of
https://git.suyu.dev/suyu/suyu
synced 2024-12-27 03:42:51 -06:00
MacroHLE: Implement DrawIndexedIndirect & DrawArraysIndirect.
This commit is contained in:
parent
a5a94f52ff
commit
0f89828073
16 changed files with 252 additions and 72 deletions
|
@ -171,7 +171,9 @@ public:
|
||||||
bool is_written, bool is_image);
|
bool is_written, bool is_image);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
||||||
bool synchronize, bool mark_as_written);
|
bool synchronize = true,
|
||||||
|
bool mark_as_written = false,
|
||||||
|
bool discard_downloads = false);
|
||||||
|
|
||||||
void FlushCachedWrites();
|
void FlushCachedWrites();
|
||||||
|
|
||||||
|
@ -203,6 +205,14 @@ public:
|
||||||
/// Return true when a CPU region is modified from the CPU
|
/// Return true when a CPU region is modified from the CPU
|
||||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
|
void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
|
||||||
|
current_draw_indirect = current_draw_indirect_;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
|
||||||
|
|
||||||
|
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
Runtime& runtime;
|
Runtime& runtime;
|
||||||
|
|
||||||
|
@ -275,6 +285,8 @@ private:
|
||||||
|
|
||||||
void BindHostVertexBuffers();
|
void BindHostVertexBuffers();
|
||||||
|
|
||||||
|
void BindHostDrawIndirectBuffers();
|
||||||
|
|
||||||
void BindHostGraphicsUniformBuffers(size_t stage);
|
void BindHostGraphicsUniformBuffers(size_t stage);
|
||||||
|
|
||||||
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
|
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
|
||||||
|
@ -301,6 +313,8 @@ private:
|
||||||
|
|
||||||
void UpdateVertexBuffer(u32 index);
|
void UpdateVertexBuffer(u32 index);
|
||||||
|
|
||||||
|
void UpdateDrawIndirect();
|
||||||
|
|
||||||
void UpdateUniformBuffers(size_t stage);
|
void UpdateUniformBuffers(size_t stage);
|
||||||
|
|
||||||
void UpdateStorageBuffers(size_t stage);
|
void UpdateStorageBuffers(size_t stage);
|
||||||
|
@ -340,6 +354,8 @@ private:
|
||||||
|
|
||||||
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
|
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||||
std::span<BufferCopy> copies);
|
std::span<BufferCopy> copies);
|
||||||
|
|
||||||
|
@ -375,6 +391,8 @@ private:
|
||||||
SlotVector<Buffer> slot_buffers;
|
SlotVector<Buffer> slot_buffers;
|
||||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||||
|
|
||||||
|
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
|
||||||
|
|
||||||
u32 last_index_count = 0;
|
u32 last_index_count = 0;
|
||||||
|
|
||||||
Binding index_buffer;
|
Binding index_buffer;
|
||||||
|
@ -383,6 +401,8 @@ private:
|
||||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||||
|
Binding count_buffer_binding;
|
||||||
|
Binding indirect_buffer_binding;
|
||||||
|
|
||||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||||
|
@ -422,6 +442,7 @@ private:
|
||||||
|
|
||||||
std::vector<BufferId> cached_write_buffer_ids;
|
std::vector<BufferId> cached_write_buffer_ids;
|
||||||
|
|
||||||
|
IntervalSet discarded_ranges;
|
||||||
IntervalSet uncommitted_ranges;
|
IntervalSet uncommitted_ranges;
|
||||||
IntervalSet common_ranges;
|
IntervalSet common_ranges;
|
||||||
std::deque<IntervalSet> committed_ranges;
|
std::deque<IntervalSet> committed_ranges;
|
||||||
|
@ -579,13 +600,17 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
||||||
}};
|
}};
|
||||||
|
|
||||||
boost::container::small_vector<IntervalType, 4> tmp_intervals;
|
boost::container::small_vector<IntervalType, 4> tmp_intervals;
|
||||||
|
const bool is_high_accuracy =
|
||||||
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
|
||||||
auto mirror = [&](VAddr base_address, VAddr base_address_end) {
|
auto mirror = [&](VAddr base_address, VAddr base_address_end) {
|
||||||
const u64 size = base_address_end - base_address;
|
const u64 size = base_address_end - base_address;
|
||||||
const VAddr diff = base_address - *cpu_src_address;
|
const VAddr diff = base_address - *cpu_src_address;
|
||||||
const VAddr new_base_address = *cpu_dest_address + diff;
|
const VAddr new_base_address = *cpu_dest_address + diff;
|
||||||
const IntervalType add_interval{new_base_address, new_base_address + size};
|
const IntervalType add_interval{new_base_address, new_base_address + size};
|
||||||
uncommitted_ranges.add(add_interval);
|
|
||||||
tmp_intervals.push_back(add_interval);
|
tmp_intervals.push_back(add_interval);
|
||||||
|
if (is_high_accuracy) {
|
||||||
|
uncommitted_ranges.add(add_interval);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
ForEachWrittenRange(*cpu_src_address, amount, mirror);
|
ForEachWrittenRange(*cpu_src_address, amount, mirror);
|
||||||
// This subtraction in this order is important for overlapping copies.
|
// This subtraction in this order is important for overlapping copies.
|
||||||
|
@ -677,6 +702,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
||||||
}
|
}
|
||||||
BindHostVertexBuffers();
|
BindHostVertexBuffers();
|
||||||
BindHostTransformFeedbackBuffers();
|
BindHostTransformFeedbackBuffers();
|
||||||
|
if (current_draw_indirect) {
|
||||||
|
BindHostDrawIndirectBuffers();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -796,7 +824,8 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
|
||||||
template <class P>
|
template <class P>
|
||||||
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
||||||
bool synchronize,
|
bool synchronize,
|
||||||
bool mark_as_written) {
|
bool mark_as_written,
|
||||||
|
bool discard_downloads) {
|
||||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||||
if (!cpu_addr) {
|
if (!cpu_addr) {
|
||||||
return {&slot_buffers[NULL_BUFFER_ID], 0};
|
return {&slot_buffers[NULL_BUFFER_ID], 0};
|
||||||
|
@ -804,11 +833,17 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
|
||||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
if (synchronize) {
|
if (synchronize) {
|
||||||
SynchronizeBuffer(buffer, *cpu_addr, size);
|
// SynchronizeBuffer(buffer, *cpu_addr, size);
|
||||||
|
SynchronizeBufferNoModified(buffer, *cpu_addr, size);
|
||||||
}
|
}
|
||||||
if (mark_as_written) {
|
if (mark_as_written) {
|
||||||
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
if (discard_downloads) {
|
||||||
|
IntervalType interval{*cpu_addr, size};
|
||||||
|
ClearDownload(interval);
|
||||||
|
discarded_ranges.subtract(interval);
|
||||||
|
}
|
||||||
return {&buffer, buffer.Offset(*cpu_addr)};
|
return {&buffer, buffer.Offset(*cpu_addr)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -827,10 +862,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::AccumulateFlushes() {
|
void BufferCache<P>::AccumulateFlushes() {
|
||||||
if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
|
|
||||||
uncommitted_ranges.clear();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (uncommitted_ranges.empty()) {
|
if (uncommitted_ranges.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -845,12 +876,15 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
AccumulateFlushes();
|
AccumulateFlushes();
|
||||||
|
|
||||||
|
for (const auto& interval : discarded_ranges) {
|
||||||
|
common_ranges.subtract(interval);
|
||||||
|
}
|
||||||
|
|
||||||
if (committed_ranges.empty()) {
|
if (committed_ranges.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
||||||
const bool is_accuracy_normal =
|
|
||||||
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
|
|
||||||
|
|
||||||
auto it = committed_ranges.begin();
|
auto it = committed_ranges.begin();
|
||||||
while (it != committed_ranges.end()) {
|
while (it != committed_ranges.end()) {
|
||||||
|
@ -875,9 +909,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
buffer.ForEachDownloadRangeAndClear(
|
buffer.ForEachDownloadRangeAndClear(
|
||||||
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
||||||
if (is_accuracy_normal) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
const auto add_download = [&](VAddr start, VAddr end) {
|
||||||
const u64 new_offset = start - buffer_addr;
|
const u64 new_offset = start - buffer_addr;
|
||||||
|
@ -891,7 +922,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
buffer_id,
|
buffer_id,
|
||||||
});
|
});
|
||||||
// Align up to avoid cache conflicts
|
// Align up to avoid cache conflicts
|
||||||
constexpr u64 align = 256ULL;
|
constexpr u64 align = 8ULL;
|
||||||
constexpr u64 mask = ~(align - 1ULL);
|
constexpr u64 mask = ~(align - 1ULL);
|
||||||
total_size_bytes += (new_size + align - 1) & mask;
|
total_size_bytes += (new_size + align - 1) & mask;
|
||||||
largest_copy = std::max(largest_copy, new_size);
|
largest_copy = std::max(largest_copy, new_size);
|
||||||
|
@ -942,12 +973,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::CommitAsyncFlushes() {
|
void BufferCache<P>::CommitAsyncFlushes() {
|
||||||
if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
|
|
||||||
CommitAsyncFlushesHigh();
|
CommitAsyncFlushesHigh();
|
||||||
} else {
|
|
||||||
uncommitted_ranges.clear();
|
|
||||||
committed_ranges.clear();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1063,6 +1089,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
||||||
|
const auto bind_buffer = [this](const Binding& binding) {
|
||||||
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
|
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||||
|
};
|
||||||
|
if (current_draw_indirect->include_count) {
|
||||||
|
bind_buffer(count_buffer_binding);
|
||||||
|
}
|
||||||
|
bind_buffer(indirect_buffer_binding);
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||||
u32 dirty = ~0U;
|
u32 dirty = ~0U;
|
||||||
|
@ -1294,6 +1333,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
||||||
UpdateStorageBuffers(stage);
|
UpdateStorageBuffers(stage);
|
||||||
UpdateTextureBuffers(stage);
|
UpdateTextureBuffers(stage);
|
||||||
}
|
}
|
||||||
|
if (current_draw_indirect) {
|
||||||
|
UpdateDrawIndirect();
|
||||||
|
}
|
||||||
} while (has_deleted_buffers);
|
} while (has_deleted_buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1383,6 +1425,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::UpdateDrawIndirect() {
|
||||||
|
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
|
||||||
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||||
|
if (!cpu_addr) {
|
||||||
|
binding = NULL_BINDING;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
binding = Binding{
|
||||||
|
.cpu_addr = *cpu_addr,
|
||||||
|
.size = static_cast<u32>(size),
|
||||||
|
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
if (current_draw_indirect->include_count) {
|
||||||
|
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||||
|
}
|
||||||
|
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||||
|
indirect_buffer_binding);
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||||
|
@ -1704,6 +1767,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||||
|
boost::container::small_vector<BufferCopy, 4> copies;
|
||||||
|
u64 total_size_bytes = 0;
|
||||||
|
u64 largest_copy = 0;
|
||||||
|
IntervalSet found_sets{};
|
||||||
|
auto make_copies = [&] {
|
||||||
|
for (auto& interval : found_sets) {
|
||||||
|
const std::size_t sub_size = interval.upper() - interval.lower();
|
||||||
|
const VAddr cpu_addr = interval.lower();
|
||||||
|
copies.push_back(BufferCopy{
|
||||||
|
.src_offset = total_size_bytes,
|
||||||
|
.dst_offset = cpu_addr - buffer.CpuAddr(),
|
||||||
|
.size = sub_size,
|
||||||
|
});
|
||||||
|
total_size_bytes += sub_size;
|
||||||
|
largest_copy = std::max(largest_copy, sub_size);
|
||||||
|
}
|
||||||
|
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
||||||
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
||||||
|
};
|
||||||
|
buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
||||||
|
const VAddr base_adr = buffer.CpuAddr() + range_offset;
|
||||||
|
const VAddr end_adr = base_adr + range_size;
|
||||||
|
const IntervalType add_interval{base_adr, end_adr};
|
||||||
|
found_sets.add(add_interval);
|
||||||
|
});
|
||||||
|
if (found_sets.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const IntervalType search_interval{cpu_addr, cpu_addr + size};
|
||||||
|
auto it = common_ranges.lower_bound(search_interval);
|
||||||
|
auto it_end = common_ranges.upper_bound(search_interval);
|
||||||
|
if (it == common_ranges.end()) {
|
||||||
|
make_copies();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
while (it != it_end) {
|
||||||
|
found_sets.subtract(*it);
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
make_copies();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||||
std::span<BufferCopy> copies) {
|
std::span<BufferCopy> copies) {
|
||||||
|
@ -1963,4 +2071,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||||
|
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||||
|
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||||
|
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||||
|
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -97,6 +97,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||||
if (dma_state.non_incrementing) {
|
if (dma_state.non_incrementing) {
|
||||||
const u32 max_write = static_cast<u32>(
|
const u32 max_write = static_cast<u32>(
|
||||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||||
|
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||||
CallMultiMethod(&command_header.argument, max_write);
|
CallMultiMethod(&command_header.argument, max_write);
|
||||||
dma_state.method_count -= max_write;
|
dma_state.method_count -= max_write;
|
||||||
dma_state.is_last_call = true;
|
dma_state.is_last_call = true;
|
||||||
|
@ -175,7 +176,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
||||||
dma_state.method_count);
|
dma_state.method_count);
|
||||||
} else {
|
} else {
|
||||||
auto subchannel = subchannels[dma_state.subchannel];
|
auto subchannel = subchannels[dma_state.subchannel];
|
||||||
subchannel->current_dma_segment = dma_state.dma_get;
|
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||||
dma_state.method_count);
|
dma_state.method_count);
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,6 +157,7 @@ private:
|
||||||
u32 method_count; ///< Current method count
|
u32 method_count; ///< Current method count
|
||||||
u32 length_pending; ///< Large NI command length pending
|
u32 length_pending; ///< Large NI command length pending
|
||||||
GPUVAddr dma_get; ///< Currently read segment
|
GPUVAddr dma_get; ///< Currently read segment
|
||||||
|
u32 dma_word_offset; ///< Current word ofset from address
|
||||||
bool non_incrementing; ///< Current command's NI flag
|
bool non_incrementing; ///< Current command's NI flag
|
||||||
bool is_last_call;
|
bool is_last_call;
|
||||||
};
|
};
|
||||||
|
|
|
@ -216,7 +216,7 @@ void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
|
||||||
UpdateTopology();
|
UpdateTopology();
|
||||||
|
|
||||||
if (maxwell3d->ShouldExecute()) {
|
if (maxwell3d->ShouldExecute()) {
|
||||||
maxwell3d->rasterizer->DrawIndirect(draw_indexed);
|
maxwell3d->rasterizer->DrawIndirect();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -33,7 +33,10 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IndirectParams {
|
struct IndirectParams {
|
||||||
GPUVAddr start_address;
|
bool is_indexed;
|
||||||
|
bool include_count;
|
||||||
|
GPUVAddr count_start_address;
|
||||||
|
GPUVAddr indirect_start_address;
|
||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
size_t max_draw_counts;
|
size_t max_draw_counts;
|
||||||
size_t stride;
|
size_t stride;
|
||||||
|
|
|
@ -130,11 +130,15 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||||
|
for (size_t i = 0; i < amount; i++) {
|
||||||
|
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||||
|
}
|
||||||
|
|
||||||
// Call the macro when there are no more parameters in the command buffer
|
// Call the macro when there are no more parameters in the command buffer
|
||||||
if (is_last_call) {
|
if (is_last_call) {
|
||||||
CallMacroMethod(executing_macro, macro_params);
|
CallMacroMethod(executing_macro, macro_params);
|
||||||
macro_params.clear();
|
macro_params.clear();
|
||||||
|
macro_addresses.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3067,6 +3067,15 @@ public:
|
||||||
std::unique_ptr<DrawManager> draw_manager;
|
std::unique_ptr<DrawManager> draw_manager;
|
||||||
friend class DrawManager;
|
friend class DrawManager;
|
||||||
|
|
||||||
|
std::vector<u8> inline_index_draw_indexes;
|
||||||
|
std::vector<GPUVAddr> macro_addresses;
|
||||||
|
|
||||||
|
Core::System& system;
|
||||||
|
MemoryManager& memory_manager;
|
||||||
|
|
||||||
|
/// Handles a write to the CLEAR_BUFFERS register.
|
||||||
|
void ProcessClearBuffers(u32 layer_count);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitializeRegisterDefaults();
|
void InitializeRegisterDefaults();
|
||||||
|
|
||||||
|
@ -3126,9 +3135,6 @@ private:
|
||||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||||
std::optional<u64> GetQueryResult();
|
std::optional<u64> GetQueryResult();
|
||||||
|
|
||||||
Core::System& system;
|
|
||||||
MemoryManager& memory_manager;
|
|
||||||
|
|
||||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||||
|
|
||||||
/// Start offsets of each macro in macro_memory
|
/// Start offsets of each macro in macro_memory
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/macro/macro.h"
|
#include "video_core/macro/macro.h"
|
||||||
#include "video_core/macro/macro_hle.h"
|
#include "video_core/macro/macro_hle.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
@ -24,15 +25,14 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
|
parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||||
maxwell3d.draw_manager->DrawArray(
|
maxwell3d.draw_manager->DrawArray(
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||||
parameters[3], parameters[1], parameters[4], instance_count);
|
parameters[3], parameters[1], parameters[4], instance_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
|
||||||
const u32 element_base = parameters[4];
|
const u32 element_base = parameters[4];
|
||||||
const u32 base_instance = parameters[5];
|
const u32 base_instance = parameters[5];
|
||||||
maxwell3d.regs.vertex_id_base = element_base;
|
maxwell3d.regs.vertex_id_base = element_base;
|
||||||
|
@ -41,9 +41,18 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
maxwell3d.CallMethod(0x8e4, element_base, true);
|
maxwell3d.CallMethod(0x8e4, element_base, true);
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||||
|
|
||||||
maxwell3d.draw_manager->DrawIndex(
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
params.is_indexed = true;
|
||||||
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
params.include_count = false;
|
||||||
|
params.count_start_address = 0;
|
||||||
|
params.indirect_start_address = maxwell3d.macro_addresses[1];
|
||||||
|
params.buffer_size = 5 * sizeof(u32);
|
||||||
|
params.max_draw_counts = 1;
|
||||||
|
params.stride = 0;
|
||||||
|
|
||||||
|
maxwell3d.draw_manager->DrawIndexedIndirect(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 0,
|
||||||
|
1U << 18);
|
||||||
|
|
||||||
maxwell3d.regs.vertex_id_base = 0x0;
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
@ -51,8 +60,9 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multidraw Indirect
|
// Multidraw Indixed Indirect
|
||||||
void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d,
|
||||||
|
const std::vector<u32>& parameters) {
|
||||||
const u32 start_indirect = parameters[0];
|
const u32 start_indirect = parameters[0];
|
||||||
const u32 end_indirect = parameters[1];
|
const u32 end_indirect = parameters[1];
|
||||||
if (start_indirect >= end_indirect) {
|
if (start_indirect >= end_indirect) {
|
||||||
|
@ -66,7 +76,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
// size of each indirect segment
|
// size of each indirect segment
|
||||||
const u32 indirect_words = 5 + padding;
|
const u32 indirect_words = 5 + padding;
|
||||||
const u32 stride = indirect_words * sizeof(u32);
|
const u32 stride = indirect_words * sizeof(u32);
|
||||||
const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32);
|
|
||||||
const std::size_t draw_count = end_indirect - start_indirect;
|
const std::size_t draw_count = end_indirect - start_indirect;
|
||||||
u32 lowest_first = std::numeric_limits<u32>::max();
|
u32 lowest_first = std::numeric_limits<u32>::max();
|
||||||
u32 highest_limit = std::numeric_limits<u32>::min();
|
u32 highest_limit = std::numeric_limits<u32>::min();
|
||||||
|
@ -80,12 +89,16 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
|
|
||||||
const u32 base_vertex = parameters[8];
|
const u32 base_vertex = parameters[8];
|
||||||
const u32 base_instance = parameters[9];
|
const u32 base_instance = parameters[9];
|
||||||
|
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
params.start_address = start_address;
|
params.is_indexed = true;
|
||||||
params.buffer_size = sizeof(u32) + stride * draw_count;
|
params.include_count = true;
|
||||||
|
params.count_start_address = maxwell3d.macro_addresses[4];
|
||||||
|
params.indirect_start_address = maxwell3d.macro_addresses[5];
|
||||||
|
params.buffer_size = stride * draw_count;
|
||||||
params.max_draw_counts = draw_count;
|
params.max_draw_counts = draw_count;
|
||||||
params.stride = stride;
|
params.stride = stride;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
@ -93,7 +106,7 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multi-layer Clear
|
// Multi-layer Clear
|
||||||
void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||||
ASSERT(parameters.size() == 1);
|
ASSERT(parameters.size() == 1);
|
||||||
|
|
||||||
const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
||||||
|
@ -107,10 +120,10 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||||
|
|
||||||
constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
|
constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
|
||||||
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
|
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
|
||||||
{0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
|
{0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect},
|
||||||
{0x0217920100488FF7, &HLE_0217920100488FF7},
|
{0x0217920100488FF7, &HLE_DrawIndexedIndirect},
|
||||||
{0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164},
|
{0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect},
|
||||||
{0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B},
|
{0xEAD26C3E2109B06B, &HLE_MultiLayerClear},
|
||||||
}};
|
}};
|
||||||
|
|
||||||
class HLEMacroImpl final : public CachedMacro {
|
class HLEMacroImpl final : public CachedMacro {
|
||||||
|
|
|
@ -43,7 +43,7 @@ public:
|
||||||
virtual void Draw(bool is_indexed, u32 instance_count) = 0;
|
virtual void Draw(bool is_indexed, u32 instance_count) = 0;
|
||||||
|
|
||||||
/// Dispatches an indirect draw invocation
|
/// Dispatches an indirect draw invocation
|
||||||
virtual void DrawIndirect(bool is_indexed) {}
|
virtual void DrawIndirect() {}
|
||||||
|
|
||||||
/// Clear the current framebuffer
|
/// Clear the current framebuffer
|
||||||
virtual void Clear(u32 layer_count) = 0;
|
virtual void Clear(u32 layer_count) = 0;
|
||||||
|
|
|
@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
|
||||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
|
||||||
|
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||||
if (device.IsExtTransformFeedbackSupported()) {
|
if (device.IsExtTransformFeedbackSupported()) {
|
||||||
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||||
}
|
}
|
||||||
|
@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
|
||||||
if (device.IsExtTransformFeedbackSupported()) {
|
if (device.IsExtTransformFeedbackSupported()) {
|
||||||
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||||
}
|
}
|
||||||
|
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||||
null_buffer = device.GetLogical().CreateBuffer(create_info);
|
null_buffer = device.GetLogical().CreateBuffer(create_info);
|
||||||
if (device.HasDebuggingToolAttached()) {
|
if (device.HasDebuggingToolAttached()) {
|
||||||
null_buffer.SetObjectNameEXT("Null buffer");
|
null_buffer.SetObjectNameEXT("Null buffer");
|
||||||
|
|
|
@ -225,25 +225,40 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::DrawIndirect(bool is_indexed) {
|
void RasterizerVulkan::DrawIndirect() {
|
||||||
PrepareDraw(is_indexed, [this, is_indexed] {
|
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
|
||||||
const auto params = maxwell3d->draw_manager->GetIndirectParams();
|
buffer_cache.SetDrawIndirect(¶ms);
|
||||||
const auto [buffer, offset] = buffer_cache.ObtainBuffer(
|
PrepareDraw(params.is_indexed, [this, ¶ms] {
|
||||||
params.start_address, static_cast<u32>(params.buffer_size), true, false);
|
const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
|
||||||
scheduler.Record([buffer_obj = buffer->Handle(), offset,
|
if (params.include_count) {
|
||||||
max_draw_counts = params.max_draw_counts, stride = params.stride,
|
const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
|
||||||
is_indexed](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([draw_buffer_obj = draw_buffer->Handle(),
|
||||||
if (is_indexed) {
|
buffer_obj = buffer->Handle(), offset_base, offset,
|
||||||
cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset,
|
params](vk::CommandBuffer cmdbuf) {
|
||||||
static_cast<u32>(max_draw_counts),
|
if (params.is_indexed) {
|
||||||
static_cast<u32>(stride));
|
cmdbuf.DrawIndexedIndirectCount(
|
||||||
|
buffer_obj, offset, draw_buffer_obj, offset_base,
|
||||||
|
static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride));
|
||||||
} else {
|
} else {
|
||||||
cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset,
|
cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base,
|
||||||
static_cast<u32>(max_draw_counts),
|
static_cast<u32>(params.max_draw_counts),
|
||||||
static_cast<u32>(stride));
|
static_cast<u32>(params.stride));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
|
||||||
|
if (params.is_indexed) {
|
||||||
|
cmdbuf.DrawIndexedIndirect(buffer_obj, offset,
|
||||||
|
static_cast<u32>(params.max_draw_counts),
|
||||||
|
static_cast<u32>(params.stride));
|
||||||
|
} else {
|
||||||
|
cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts),
|
||||||
|
static_cast<u32>(params.stride));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
buffer_cache.SetDrawIndirect(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::Clear(u32 layer_count) {
|
void RasterizerVulkan::Clear(u32 layer_count) {
|
||||||
|
@ -425,9 +440,6 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
|
||||||
|
|
||||||
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
|
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
|
||||||
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
|
||||||
if (!Settings::IsGPULevelHigh()) {
|
|
||||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
|
||||||
}
|
|
||||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||||
buffer_cache.IsRegionGpuModified(addr, size);
|
buffer_cache.IsRegionGpuModified(addr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,7 @@ public:
|
||||||
~RasterizerVulkan() override;
|
~RasterizerVulkan() override;
|
||||||
|
|
||||||
void Draw(bool is_indexed, u32 instance_count) override;
|
void Draw(bool is_indexed, u32 instance_count) override;
|
||||||
void DrawIndirect(bool is_indexed) override;
|
void DrawIndirect() override;
|
||||||
void Clear(u32 layer_count) override;
|
void Clear(u32 layer_count) override;
|
||||||
void DispatchCompute() override;
|
void DispatchCompute() override;
|
||||||
void ResetCounter(VideoCore::QueryType type) override;
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
|
|
|
@ -351,7 +351,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
.dualSrcBlend = true,
|
.dualSrcBlend = true,
|
||||||
.logicOp = true,
|
.logicOp = true,
|
||||||
.multiDrawIndirect = true,
|
.multiDrawIndirect = true,
|
||||||
.drawIndirectFirstInstance = false,
|
.drawIndirectFirstInstance = true,
|
||||||
.depthClamp = true,
|
.depthClamp = true,
|
||||||
.depthBiasClamp = true,
|
.depthBiasClamp = true,
|
||||||
.fillModeNonSolid = true,
|
.fillModeNonSolid = true,
|
||||||
|
@ -1024,6 +1024,8 @@ void Device::CheckSuitability(bool requires_swapchain) const {
|
||||||
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
||||||
std::make_pair(features.imageCubeArray, "imageCubeArray"),
|
std::make_pair(features.imageCubeArray, "imageCubeArray"),
|
||||||
std::make_pair(features.independentBlend, "independentBlend"),
|
std::make_pair(features.independentBlend, "independentBlend"),
|
||||||
|
std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"),
|
||||||
|
std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"),
|
||||||
std::make_pair(features.depthClamp, "depthClamp"),
|
std::make_pair(features.depthClamp, "depthClamp"),
|
||||||
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
||||||
std::make_pair(features.largePoints, "largePoints"),
|
std::make_pair(features.largePoints, "largePoints"),
|
||||||
|
@ -1117,6 +1119,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
||||||
test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
|
test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
|
||||||
test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
|
test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
|
||||||
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||||
|
test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true);
|
||||||
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||||
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||||
test(has_ext_primitive_topology_list_restart,
|
test(has_ext_primitive_topology_list_restart,
|
||||||
|
|
|
@ -451,6 +451,7 @@ private:
|
||||||
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
|
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
|
||||||
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
|
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
|
||||||
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
|
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
|
||||||
|
bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count.
|
||||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
|
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
|
||||||
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
|
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
|
||||||
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
|
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
|
||||||
|
|
|
@ -94,8 +94,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkCmdDispatch);
|
X(vkCmdDispatch);
|
||||||
X(vkCmdDraw);
|
X(vkCmdDraw);
|
||||||
X(vkCmdDrawIndexed);
|
X(vkCmdDrawIndexed);
|
||||||
X(vkCmdDrawIndirectCount);
|
X(vkCmdDrawIndirect);
|
||||||
X(vkCmdDrawIndexedIndirectCount);
|
X(vkCmdDrawIndexedIndirect);
|
||||||
|
X(vkCmdDrawIndirectCountKHR);
|
||||||
|
X(vkCmdDrawIndexedIndirectCountKHR);
|
||||||
X(vkCmdEndQuery);
|
X(vkCmdEndQuery);
|
||||||
X(vkCmdEndRenderPass);
|
X(vkCmdEndRenderPass);
|
||||||
X(vkCmdEndTransformFeedbackEXT);
|
X(vkCmdEndTransformFeedbackEXT);
|
||||||
|
|
|
@ -213,8 +213,10 @@ struct DeviceDispatch : InstanceDispatch {
|
||||||
PFN_vkCmdDispatch vkCmdDispatch{};
|
PFN_vkCmdDispatch vkCmdDispatch{};
|
||||||
PFN_vkCmdDraw vkCmdDraw{};
|
PFN_vkCmdDraw vkCmdDraw{};
|
||||||
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
|
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
|
||||||
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
|
PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
|
||||||
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
|
PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
|
||||||
|
PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{};
|
||||||
|
PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{};
|
||||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||||
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
||||||
|
@ -1021,16 +1023,26 @@ public:
|
||||||
first_instance);
|
first_instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DrawIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count,
|
||||||
|
u32 stride) const noexcept {
|
||||||
|
dld->vkCmdDrawIndirect(handle, src_buffer, src_offset, draw_count, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawIndexedIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count,
|
||||||
|
u32 stride) const noexcept {
|
||||||
|
dld->vkCmdDrawIndexedIndirect(handle, src_buffer, src_offset, draw_count, stride);
|
||||||
|
}
|
||||||
|
|
||||||
void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
|
void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
|
||||||
VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
|
VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
|
||||||
dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset,
|
dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset,
|
||||||
draw_count, stride);
|
draw_count, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
|
void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
|
||||||
VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
|
VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
|
||||||
u32 stride) const noexcept {
|
u32 stride) const noexcept {
|
||||||
dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer,
|
dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer,
|
||||||
count_offset, draw_count, stride);
|
count_offset, draw_count, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue