mirror of
https://git.suyu.dev/suyu/suyu
synced 2024-11-01 04:47:53 +00:00
Merge pull request #6540 from Kelebek1/nvdec
Slightly refactor NVDEC and codecs for readability and safety
This commit is contained in:
commit
c770fa9823
10 changed files with 544 additions and 378 deletions
|
@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
|||
case ThiMethod::SetMethod1:
|
||||
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
|
||||
static_cast<u32>(nvdec_thi_state.method_0));
|
||||
nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
|
||||
data);
|
||||
nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
|
|||
av_free(ptr);
|
||||
}
|
||||
|
||||
Codec::Codec(GPU& gpu_)
|
||||
: gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
||||
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
|
||||
: gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
||||
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
|
||||
|
||||
Codec::~Codec() {
|
||||
|
@ -43,46 +43,48 @@ Codec::~Codec() {
|
|||
avcodec_close(av_codec_ctx);
|
||||
}
|
||||
|
||||
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
|
||||
if (current_codec != codec) {
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
|
||||
current_codec = codec;
|
||||
void Codec::Initialize() {
|
||||
AVCodecID codec{AV_CODEC_ID_NONE};
|
||||
switch (current_codec) {
|
||||
case NvdecCommon::VideoCodec::H264:
|
||||
codec = AV_CODEC_ID_H264;
|
||||
break;
|
||||
case NvdecCommon::VideoCodec::Vp9:
|
||||
codec = AV_CODEC_ID_VP9;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
av_codec = avcodec_find_decoder(codec);
|
||||
av_codec_ctx = avcodec_alloc_context3(av_codec);
|
||||
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
|
||||
|
||||
// TODO(ameerj): libavcodec gpu hw acceleration
|
||||
|
||||
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
|
||||
if (av_error < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
|
||||
avcodec_close(av_codec_ctx);
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void Codec::StateWrite(u32 offset, u64 arguments) {
|
||||
u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
|
||||
std::memcpy(state_offset, &arguments, sizeof(u64));
|
||||
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
|
||||
if (current_codec != codec) {
|
||||
current_codec = codec;
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
|
||||
}
|
||||
}
|
||||
|
||||
void Codec::Decode() {
|
||||
bool is_first_frame = false;
|
||||
const bool is_first_frame = !initialized;
|
||||
if (!initialized) {
|
||||
if (current_codec == NvdecCommon::VideoCodec::H264) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
|
||||
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
|
||||
} else {
|
||||
LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
|
||||
return;
|
||||
}
|
||||
|
||||
av_codec_ctx = avcodec_alloc_context3(av_codec);
|
||||
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
|
||||
|
||||
// TODO(ameerj): libavcodec gpu hw acceleration
|
||||
|
||||
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
|
||||
if (av_error < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
|
||||
avcodec_close(av_codec_ctx);
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
is_first_frame = true;
|
||||
Initialize();
|
||||
}
|
||||
bool vp9_hidden_frame = false;
|
||||
|
||||
bool vp9_hidden_frame = false;
|
||||
AVPacket packet{};
|
||||
av_init_packet(&packet);
|
||||
std::vector<u8> frame_data;
|
||||
|
@ -95,7 +97,7 @@ void Codec::Decode() {
|
|||
}
|
||||
|
||||
packet.data = frame_data.data();
|
||||
packet.size = static_cast<int>(frame_data.size());
|
||||
packet.size = static_cast<s32>(frame_data.size());
|
||||
|
||||
avcodec_send_packet(av_codec_ctx, &packet);
|
||||
|
||||
|
@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
|
|||
return current_codec;
|
||||
}
|
||||
|
||||
std::string_view Codec::GetCurrentCodecName() const {
|
||||
switch (current_codec) {
|
||||
case NvdecCommon::VideoCodec::None:
|
||||
return "None";
|
||||
case NvdecCommon::VideoCodec::H264:
|
||||
return "H264";
|
||||
case NvdecCommon::VideoCodec::Vp8:
|
||||
return "VP8";
|
||||
case NvdecCommon::VideoCodec::H265:
|
||||
return "H265";
|
||||
case NvdecCommon::VideoCodec::Vp9:
|
||||
return "VP9";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
|
@ -42,15 +42,15 @@ class VP9;
|
|||
|
||||
class Codec {
|
||||
public:
|
||||
explicit Codec(GPU& gpu);
|
||||
explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
|
||||
~Codec();
|
||||
|
||||
/// Initialize the codec, returning success or failure
|
||||
void Initialize();
|
||||
|
||||
/// Sets NVDEC video stream codec
|
||||
void SetTargetCodec(NvdecCommon::VideoCodec codec);
|
||||
|
||||
/// Populate NvdecRegisters state with argument value at the provided offset
|
||||
void StateWrite(u32 offset, u64 arguments);
|
||||
|
||||
/// Call decoders to construct headers, decode AVFrame with ffmpeg
|
||||
void Decode();
|
||||
|
||||
|
@ -59,6 +59,8 @@ public:
|
|||
|
||||
/// Returns the value of current_codec
|
||||
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
|
||||
/// Return name of the current codec
|
||||
[[nodiscard]] std::string_view GetCurrentCodecName() const;
|
||||
|
||||
private:
|
||||
bool initialized{};
|
||||
|
@ -68,10 +70,10 @@ private:
|
|||
AVCodecContext* av_codec_ctx{nullptr};
|
||||
|
||||
GPU& gpu;
|
||||
const NvdecCommon::NvdecRegisters& state;
|
||||
std::unique_ptr<Decoder::H264> h264_decoder;
|
||||
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
||||
|
||||
NvdecCommon::NvdecRegisters state{};
|
||||
std::queue<AVFramePtr> av_frames{};
|
||||
};
|
||||
|
||||
|
|
|
@ -45,135 +45,130 @@ H264::~H264() = default;
|
|||
|
||||
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
|
||||
bool is_first_frame) {
|
||||
H264DecoderContext context{};
|
||||
H264DecoderContext context;
|
||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
||||
|
||||
const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
|
||||
const s64 frame_number = context.h264_parameter_set.frame_number.Value();
|
||||
if (!is_first_frame && frame_number != 0) {
|
||||
frame.resize(context.frame_data_size);
|
||||
|
||||
frame.resize(context.stream_len);
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
|
||||
} else {
|
||||
/// Encode header
|
||||
H264BitWriter writer{};
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(7, 5);
|
||||
writer.WriteU(100, 8);
|
||||
writer.WriteU(0, 8);
|
||||
writer.WriteU(31, 8);
|
||||
writer.WriteUe(0);
|
||||
const auto chroma_format_idc =
|
||||
static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
|
||||
writer.WriteUe(chroma_format_idc);
|
||||
if (chroma_format_idc == 3) {
|
||||
writer.WriteBit(false);
|
||||
}
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
|
||||
writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
|
||||
writer.WriteUe(order_cnt_type);
|
||||
if (order_cnt_type == 0) {
|
||||
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
|
||||
} else if (order_cnt_type == 1) {
|
||||
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
|
||||
writer.WriteSe(0);
|
||||
writer.WriteSe(0);
|
||||
writer.WriteUe(0);
|
||||
}
|
||||
|
||||
const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
|
||||
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
|
||||
writer.WriteUe(16);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(pic_height - 1);
|
||||
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
|
||||
if (!context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
|
||||
}
|
||||
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
writer.End();
|
||||
|
||||
// H264 PPS
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(8, 5);
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
|
||||
writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
|
||||
s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
|
||||
pic_init_qp = (pic_init_qp << 26) >> 26;
|
||||
writer.WriteSe(pic_init_qp);
|
||||
writer.WriteSe(0);
|
||||
s32 chroma_qp_index_offset =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
|
||||
chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset);
|
||||
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
|
||||
writer.WriteBit(true);
|
||||
|
||||
for (s32 index = 0; index < 6; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x4 =
|
||||
std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
|
||||
writer.WriteScalingList(matrix_x4, index * 16, 16);
|
||||
}
|
||||
|
||||
if (context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
|
||||
context.scaling_matrix_8.end());
|
||||
|
||||
writer.WriteScalingList(matrix_x8, index * 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
s32 chroma_qp_index_offset2 =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
|
||||
chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset2);
|
||||
|
||||
writer.End();
|
||||
|
||||
const auto& encoded_header = writer.GetByteArray();
|
||||
frame.resize(encoded_header.size() + context.frame_data_size);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
|
||||
frame.data() + encoded_header.size(),
|
||||
context.frame_data_size);
|
||||
return frame;
|
||||
}
|
||||
|
||||
// Encode header
|
||||
H264BitWriter writer{};
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(7, 5);
|
||||
writer.WriteU(100, 8);
|
||||
writer.WriteU(0, 8);
|
||||
writer.WriteU(31, 8);
|
||||
writer.WriteUe(0);
|
||||
const u32 chroma_format_idc =
|
||||
static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
|
||||
writer.WriteUe(chroma_format_idc);
|
||||
if (chroma_format_idc == 3) {
|
||||
writer.WriteBit(false);
|
||||
}
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
|
||||
|
||||
const auto order_cnt_type =
|
||||
static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
|
||||
writer.WriteUe(order_cnt_type);
|
||||
if (order_cnt_type == 0) {
|
||||
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
|
||||
} else if (order_cnt_type == 1) {
|
||||
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
|
||||
writer.WriteSe(0);
|
||||
writer.WriteSe(0);
|
||||
writer.WriteUe(0);
|
||||
}
|
||||
|
||||
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
|
||||
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
|
||||
writer.WriteUe(16);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(pic_height - 1);
|
||||
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
|
||||
if (!context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
|
||||
}
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
writer.End();
|
||||
|
||||
// H264 PPS
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(8, 5);
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
|
||||
writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
|
||||
s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
|
||||
writer.WriteSe(pic_init_qp);
|
||||
writer.WriteSe(0);
|
||||
s32 chroma_qp_index_offset =
|
||||
static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset);
|
||||
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
|
||||
writer.WriteBit(true);
|
||||
|
||||
for (s32 index = 0; index < 6; index++) {
|
||||
writer.WriteBit(true);
|
||||
std::span<const u8> matrix{context.weight_scale};
|
||||
writer.WriteScalingList(matrix, index * 16, 16);
|
||||
}
|
||||
|
||||
if (context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
writer.WriteBit(true);
|
||||
std::span<const u8> matrix{context.weight_scale_8x8};
|
||||
writer.WriteScalingList(matrix, index * 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
s32 chroma_qp_index_offset2 =
|
||||
static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset2);
|
||||
|
||||
writer.End();
|
||||
|
||||
const auto& encoded_header = writer.GetByteArray();
|
||||
frame.resize(encoded_header.size() + context.stream_len);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
|
||||
frame.data() + encoded_header.size(), context.stream_len);
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
|
|||
WriteBits(state ? 1 : 0, 1);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
|
||||
void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
|
||||
std::vector<u8> scan(count);
|
||||
if (count == 16) {
|
||||
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
@ -48,7 +50,7 @@ public:
|
|||
|
||||
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
|
||||
/// Writes the scaling matrices of the sream
|
||||
void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
|
||||
void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
|
||||
|
||||
/// Return the bitstream as a vector.
|
||||
[[nodiscard]] std::vector<u8>& GetByteArray();
|
||||
|
@ -78,40 +80,110 @@ public:
|
|||
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
|
||||
|
||||
private:
|
||||
struct H264ParameterSet {
|
||||
u32 log2_max_pic_order_cnt{};
|
||||
u32 delta_pic_order_always_zero_flag{};
|
||||
u32 frame_mbs_only_flag{};
|
||||
u32 pic_width_in_mbs{};
|
||||
u32 pic_height_in_map_units{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 entropy_coding_mode_flag{};
|
||||
u32 bottom_field_pic_order_flag{};
|
||||
u32 num_refidx_l0_default_active{};
|
||||
u32 num_refidx_l1_default_active{};
|
||||
u32 deblocking_filter_control_flag{};
|
||||
u32 redundant_pic_count_flag{};
|
||||
u32 transform_8x8_mode_flag{};
|
||||
INSERT_PADDING_WORDS(9);
|
||||
u64 flags{};
|
||||
u32 frame_number{};
|
||||
u32 frame_number2{};
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_BYTES(0x48);
|
||||
u32 frame_data_size{};
|
||||
INSERT_PADDING_BYTES(0xc);
|
||||
H264ParameterSet h264_parameter_set{};
|
||||
INSERT_PADDING_BYTES(0x100);
|
||||
std::array<u8, 0x60> scaling_matrix_4;
|
||||
std::array<u8, 0x80> scaling_matrix_8;
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
|
||||
|
||||
std::vector<u8> frame;
|
||||
GPU& gpu;
|
||||
|
||||
struct H264ParameterSet {
|
||||
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
|
||||
s32 delta_pic_order_always_zero_flag; ///< 0x04
|
||||
s32 frame_mbs_only_flag; ///< 0x08
|
||||
u32 pic_width_in_mbs; ///< 0x0C
|
||||
u32 frame_height_in_map_units; ///< 0x10
|
||||
union { ///< 0x14
|
||||
BitField<0, 2, u32> tile_format;
|
||||
BitField<2, 3, u32> gob_height;
|
||||
};
|
||||
u32 entropy_coding_mode_flag; ///< 0x18
|
||||
s32 pic_order_present_flag; ///< 0x1C
|
||||
s32 num_refidx_l0_default_active; ///< 0x20
|
||||
s32 num_refidx_l1_default_active; ///< 0x24
|
||||
s32 deblocking_filter_control_present_flag; ///< 0x28
|
||||
s32 redundant_pic_cnt_present_flag; ///< 0x2C
|
||||
u32 transform_8x8_mode_flag; ///< 0x30
|
||||
u32 pitch_luma; ///< 0x34
|
||||
u32 pitch_chroma; ///< 0x38
|
||||
u32 luma_top_offset; ///< 0x3C
|
||||
u32 luma_bot_offset; ///< 0x40
|
||||
u32 luma_frame_offset; ///< 0x44
|
||||
u32 chroma_top_offset; ///< 0x48
|
||||
u32 chroma_bot_offset; ///< 0x4C
|
||||
u32 chroma_frame_offset; ///< 0x50
|
||||
u32 hist_buffer_size; ///< 0x54
|
||||
union { ///< 0x58
|
||||
union {
|
||||
BitField<0, 1, u64> mbaff_frame;
|
||||
BitField<1, 1, u64> direct_8x8_inference;
|
||||
BitField<2, 1, u64> weighted_pred;
|
||||
BitField<3, 1, u64> constrained_intra_pred;
|
||||
BitField<4, 1, u64> ref_pic;
|
||||
BitField<5, 1, u64> field_pic;
|
||||
BitField<6, 1, u64> bottom_field;
|
||||
BitField<7, 1, u64> second_field;
|
||||
} flags;
|
||||
BitField<8, 4, u64> log2_max_frame_num_minus4;
|
||||
BitField<12, 2, u64> chroma_format_idc;
|
||||
BitField<14, 2, u64> pic_order_cnt_type;
|
||||
BitField<16, 6, s64> pic_init_qp_minus26;
|
||||
BitField<22, 5, s64> chroma_qp_index_offset;
|
||||
BitField<27, 5, s64> second_chroma_qp_index_offset;
|
||||
BitField<32, 2, u64> weighted_bipred_idc;
|
||||
BitField<34, 7, u64> curr_pic_idx;
|
||||
BitField<41, 5, u64> curr_col_idx;
|
||||
BitField<46, 16, u64> frame_number;
|
||||
BitField<62, 1, u64> frame_surfaces;
|
||||
BitField<63, 1, u64> output_memory_layout;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
|
||||
u32 stream_len; ///< 0x0048
|
||||
INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
|
||||
H264ParameterSet h264_parameter_set; ///< 0x0058
|
||||
INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
|
||||
std::array<u8, 0x60> weight_scale; ///< 0x01C0
|
||||
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(H264ParameterSet, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
|
||||
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
|
||||
ASSERT_POSITION(frame_mbs_only_flag, 0x08);
|
||||
ASSERT_POSITION(pic_width_in_mbs, 0x0C);
|
||||
ASSERT_POSITION(frame_height_in_map_units, 0x10);
|
||||
ASSERT_POSITION(tile_format, 0x14);
|
||||
ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
|
||||
ASSERT_POSITION(pic_order_present_flag, 0x1C);
|
||||
ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
|
||||
ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
|
||||
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
|
||||
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
|
||||
ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
|
||||
ASSERT_POSITION(pitch_luma, 0x34);
|
||||
ASSERT_POSITION(pitch_chroma, 0x38);
|
||||
ASSERT_POSITION(luma_top_offset, 0x3C);
|
||||
ASSERT_POSITION(luma_bot_offset, 0x40);
|
||||
ASSERT_POSITION(luma_frame_offset, 0x44);
|
||||
ASSERT_POSITION(chroma_top_offset, 0x48);
|
||||
ASSERT_POSITION(chroma_bot_offset, 0x4C);
|
||||
ASSERT_POSITION(chroma_frame_offset, 0x50);
|
||||
ASSERT_POSITION(hist_buffer_size, 0x54);
|
||||
ASSERT_POSITION(flags, 0x58);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(H264DecoderContext, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(stream_len, 0x48);
|
||||
ASSERT_POSITION(h264_parameter_set, 0x58);
|
||||
ASSERT_POSITION(weight_scale, 0x1C0);
|
||||
#undef ASSERT_POSITION
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
|
|
|
@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
|
|||
}
|
||||
|
||||
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
|
||||
PictureInfo picture_info{};
|
||||
PictureInfo picture_info;
|
||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
|
||||
Vp9PictureInfo vp9_info = picture_info.Convert();
|
||||
|
||||
|
@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
|
|||
}
|
||||
|
||||
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
|
||||
EntropyProbs entropy{};
|
||||
EntropyProbs entropy;
|
||||
gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
|
||||
entropy.Convert(dst);
|
||||
}
|
||||
|
|
|
@ -15,10 +15,10 @@ class GPU;
|
|||
|
||||
namespace Decoder {
|
||||
struct Vp9FrameDimensions {
|
||||
s16 width{};
|
||||
s16 height{};
|
||||
s16 luma_pitch{};
|
||||
s16 chroma_pitch{};
|
||||
s16 width;
|
||||
s16 height;
|
||||
s16 luma_pitch;
|
||||
s16 chroma_pitch;
|
||||
};
|
||||
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
|
||||
|
||||
|
@ -49,87 +49,87 @@ enum class TxMode {
|
|||
};
|
||||
|
||||
struct Segmentation {
|
||||
u8 enabled{};
|
||||
u8 update_map{};
|
||||
u8 temporal_update{};
|
||||
u8 abs_delta{};
|
||||
std::array<u32, 8> feature_mask{};
|
||||
std::array<std::array<s16, 4>, 8> feature_data{};
|
||||
u8 enabled;
|
||||
u8 update_map;
|
||||
u8 temporal_update;
|
||||
u8 abs_delta;
|
||||
std::array<u32, 8> feature_mask;
|
||||
std::array<std::array<s16, 4>, 8> feature_data;
|
||||
};
|
||||
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
|
||||
|
||||
struct LoopFilter {
|
||||
u8 mode_ref_delta_enabled{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
u8 mode_ref_delta_enabled;
|
||||
std::array<s8, 4> ref_deltas;
|
||||
std::array<s8, 2> mode_deltas;
|
||||
};
|
||||
static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
|
||||
|
||||
struct Vp9EntropyProbs {
|
||||
std::array<u8, 36> y_mode_prob{};
|
||||
std::array<u8, 64> partition_prob{};
|
||||
std::array<u8, 1728> coef_probs{};
|
||||
std::array<u8, 8> switchable_interp_prob{};
|
||||
std::array<u8, 28> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 10> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
std::array<u8, 6> tx_32x32_prob{};
|
||||
std::array<u8, 4> tx_16x16_prob{};
|
||||
std::array<u8, 2> tx_8x8_prob{};
|
||||
std::array<u8, 3> skip_probs{};
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<u8, 20> classes{};
|
||||
std::array<u8, 2> class_0{};
|
||||
std::array<u8, 20> prob_bits{};
|
||||
std::array<u8, 12> class_0_fr{};
|
||||
std::array<u8, 6> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
std::array<u8, 36> y_mode_prob; ///< 0x0000
|
||||
std::array<u8, 64> partition_prob; ///< 0x0024
|
||||
std::array<u8, 1728> coef_probs; ///< 0x0064
|
||||
std::array<u8, 8> switchable_interp_prob; ///< 0x0724
|
||||
std::array<u8, 28> inter_mode_prob; ///< 0x072C
|
||||
std::array<u8, 4> intra_inter_prob; ///< 0x0748
|
||||
std::array<u8, 5> comp_inter_prob; ///< 0x074C
|
||||
std::array<u8, 10> single_ref_prob; ///< 0x0751
|
||||
std::array<u8, 5> comp_ref_prob; ///< 0x075B
|
||||
std::array<u8, 6> tx_32x32_prob; ///< 0x0760
|
||||
std::array<u8, 4> tx_16x16_prob; ///< 0x0766
|
||||
std::array<u8, 2> tx_8x8_prob; ///< 0x076A
|
||||
std::array<u8, 3> skip_probs; ///< 0x076C
|
||||
std::array<u8, 3> joints; ///< 0x076F
|
||||
std::array<u8, 2> sign; ///< 0x0772
|
||||
std::array<u8, 20> classes; ///< 0x0774
|
||||
std::array<u8, 2> class_0; ///< 0x0788
|
||||
std::array<u8, 20> prob_bits; ///< 0x078A
|
||||
std::array<u8, 12> class_0_fr; ///< 0x079E
|
||||
std::array<u8, 6> fr; ///< 0x07AA
|
||||
std::array<u8, 2> class_0_hp; ///< 0x07B0
|
||||
std::array<u8, 2> high_precision; ///< 0x07B2
|
||||
};
|
||||
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
|
||||
|
||||
struct Vp9PictureInfo {
|
||||
bool is_key_frame{};
|
||||
bool intra_only{};
|
||||
bool last_frame_was_key{};
|
||||
bool frame_size_changed{};
|
||||
bool error_resilient_mode{};
|
||||
bool last_frame_shown{};
|
||||
bool show_frame{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
s32 base_q_index{};
|
||||
s32 y_dc_delta_q{};
|
||||
s32 uv_dc_delta_q{};
|
||||
s32 uv_ac_delta_q{};
|
||||
bool lossless{};
|
||||
s32 transform_mode{};
|
||||
bool allow_high_precision_mv{};
|
||||
s32 interp_filter{};
|
||||
s32 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
s32 log2_tile_cols{};
|
||||
s32 log2_tile_rows{};
|
||||
bool segment_enabled{};
|
||||
bool segment_map_update{};
|
||||
bool segment_map_temporal_update{};
|
||||
s32 segment_abs_delta{};
|
||||
std::array<u32, 8> segment_feature_enable{};
|
||||
std::array<std::array<s16, 4>, 8> segment_feature_data{};
|
||||
bool mode_ref_delta_enabled{};
|
||||
bool use_prev_in_find_mv_refs{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
Vp9EntropyProbs entropy{};
|
||||
Vp9FrameDimensions frame_size{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u32 bitstream_size{};
|
||||
std::array<u64, 4> frame_offsets{};
|
||||
std::array<bool, 4> refresh_frame{};
|
||||
bool is_key_frame;
|
||||
bool intra_only;
|
||||
bool last_frame_was_key;
|
||||
bool frame_size_changed;
|
||||
bool error_resilient_mode;
|
||||
bool last_frame_shown;
|
||||
bool show_frame;
|
||||
std::array<s8, 4> ref_frame_sign_bias;
|
||||
s32 base_q_index;
|
||||
s32 y_dc_delta_q;
|
||||
s32 uv_dc_delta_q;
|
||||
s32 uv_ac_delta_q;
|
||||
bool lossless;
|
||||
s32 transform_mode;
|
||||
bool allow_high_precision_mv;
|
||||
s32 interp_filter;
|
||||
s32 reference_mode;
|
||||
s8 comp_fixed_ref;
|
||||
std::array<s8, 2> comp_var_ref;
|
||||
s32 log2_tile_cols;
|
||||
s32 log2_tile_rows;
|
||||
bool segment_enabled;
|
||||
bool segment_map_update;
|
||||
bool segment_map_temporal_update;
|
||||
s32 segment_abs_delta;
|
||||
std::array<u32, 8> segment_feature_enable;
|
||||
std::array<std::array<s16, 4>, 8> segment_feature_data;
|
||||
bool mode_ref_delta_enabled;
|
||||
bool use_prev_in_find_mv_refs;
|
||||
std::array<s8, 4> ref_deltas;
|
||||
std::array<s8, 2> mode_deltas;
|
||||
Vp9EntropyProbs entropy;
|
||||
Vp9FrameDimensions frame_size;
|
||||
u8 first_level;
|
||||
u8 sharpness_level;
|
||||
u32 bitstream_size;
|
||||
std::array<u64, 4> frame_offsets;
|
||||
std::array<bool, 4> refresh_frame;
|
||||
};
|
||||
|
||||
struct Vp9FrameContainer {
|
||||
|
@ -138,35 +138,35 @@ struct Vp9FrameContainer {
|
|||
};
|
||||
|
||||
struct PictureInfo {
|
||||
INSERT_PADDING_WORDS(12);
|
||||
u32 bitstream_size{};
|
||||
INSERT_PADDING_WORDS(5);
|
||||
Vp9FrameDimensions last_frame_size{};
|
||||
Vp9FrameDimensions golden_frame_size{};
|
||||
Vp9FrameDimensions alt_frame_size{};
|
||||
Vp9FrameDimensions current_frame_size{};
|
||||
u32 vp9_flags{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u8 base_q_index{};
|
||||
u8 y_dc_delta_q{};
|
||||
u8 uv_ac_delta_q{};
|
||||
u8 uv_dc_delta_q{};
|
||||
u8 lossless{};
|
||||
u8 tx_mode{};
|
||||
u8 allow_high_precision_mv{};
|
||||
u8 interp_filter{};
|
||||
u8 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
u8 log2_tile_cols{};
|
||||
u8 log2_tile_rows{};
|
||||
Segmentation segmentation{};
|
||||
LoopFilter loop_filter{};
|
||||
INSERT_PADDING_BYTES(5);
|
||||
u32 surface_params{};
|
||||
INSERT_PADDING_WORDS(3);
|
||||
INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
|
||||
u32 bitstream_size; ///< 0x30
|
||||
INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
|
||||
Vp9FrameDimensions last_frame_size; ///< 0x48
|
||||
Vp9FrameDimensions golden_frame_size; ///< 0x50
|
||||
Vp9FrameDimensions alt_frame_size; ///< 0x58
|
||||
Vp9FrameDimensions current_frame_size; ///< 0x60
|
||||
u32 vp9_flags; ///< 0x68
|
||||
std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
|
||||
u8 first_level; ///< 0x70
|
||||
u8 sharpness_level; ///< 0x71
|
||||
u8 base_q_index; ///< 0x72
|
||||
u8 y_dc_delta_q; ///< 0x73
|
||||
u8 uv_ac_delta_q; ///< 0x74
|
||||
u8 uv_dc_delta_q; ///< 0x75
|
||||
u8 lossless; ///< 0x76
|
||||
u8 tx_mode; ///< 0x77
|
||||
u8 allow_high_precision_mv; ///< 0x78
|
||||
u8 interp_filter; ///< 0x79
|
||||
u8 reference_mode; ///< 0x7A
|
||||
s8 comp_fixed_ref; ///< 0x7B
|
||||
std::array<s8, 2> comp_var_ref; ///< 0x7C
|
||||
u8 log2_tile_cols; ///< 0x7E
|
||||
u8 log2_tile_rows; ///< 0x7F
|
||||
Segmentation segmentation; ///< 0x80
|
||||
LoopFilter loop_filter; ///< 0xE4
|
||||
INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
|
||||
u32 surface_params; ///< 0xF0
|
||||
INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
|
||||
|
||||
[[nodiscard]] Vp9PictureInfo Convert() const {
|
||||
return {
|
||||
|
@ -176,6 +176,7 @@ struct PictureInfo {
|
|||
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
|
||||
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
|
||||
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
|
||||
.show_frame = false,
|
||||
.ref_frame_sign_bias = ref_frame_sign_bias,
|
||||
.base_q_index = base_q_index,
|
||||
.y_dc_delta_q = y_dc_delta_q,
|
||||
|
@ -204,45 +205,48 @@ struct PictureInfo {
|
|||
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
|
||||
.ref_deltas = loop_filter.ref_deltas,
|
||||
.mode_deltas = loop_filter.mode_deltas,
|
||||
.entropy{},
|
||||
.frame_size = current_frame_size,
|
||||
.first_level = first_level,
|
||||
.sharpness_level = sharpness_level,
|
||||
.bitstream_size = bitstream_size,
|
||||
.frame_offsets{},
|
||||
.refresh_frame{},
|
||||
};
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
|
||||
|
||||
struct EntropyProbs {
|
||||
INSERT_PADDING_BYTES(1024);
|
||||
std::array<u8, 28> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
INSERT_PADDING_BYTES(80);
|
||||
std::array<u8, 2> tx_8x8_prob{};
|
||||
std::array<u8, 4> tx_16x16_prob{};
|
||||
std::array<u8, 6> tx_32x32_prob{};
|
||||
std::array<u8, 4> y_mode_prob_e8{};
|
||||
std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
|
||||
INSERT_PADDING_BYTES(64);
|
||||
std::array<u8, 64> partition_prob{};
|
||||
INSERT_PADDING_BYTES(10);
|
||||
std::array<u8, 8> switchable_interp_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 3> skip_probs{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<u8, 2> class_0{};
|
||||
std::array<u8, 6> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
std::array<u8, 20> classes{};
|
||||
std::array<u8, 12> class_0_fr{};
|
||||
std::array<u8, 20> pred_bits{};
|
||||
std::array<u8, 10> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
INSERT_PADDING_BYTES(17);
|
||||
std::array<u8, 2304> coef_probs{};
|
||||
INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
|
||||
std::array<u8, 28> inter_mode_prob; ///< 0x0400
|
||||
std::array<u8, 4> intra_inter_prob; ///< 0x041C
|
||||
INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
|
||||
std::array<u8, 2> tx_8x8_prob; ///< 0x0470
|
||||
std::array<u8, 4> tx_16x16_prob; ///< 0x0472
|
||||
std::array<u8, 6> tx_32x32_prob; ///< 0x0476
|
||||
std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
|
||||
std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
|
||||
INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
|
||||
std::array<u8, 64> partition_prob; ///< 0x04E0
|
||||
INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
|
||||
std::array<u8, 8> switchable_interp_prob; ///< 0x052A
|
||||
std::array<u8, 5> comp_inter_prob; ///< 0x0532
|
||||
std::array<u8, 3> skip_probs; ///< 0x0537
|
||||
INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
|
||||
std::array<u8, 3> joints; ///< 0x053B
|
||||
std::array<u8, 2> sign; ///< 0x053E
|
||||
std::array<u8, 2> class_0; ///< 0x0540
|
||||
std::array<u8, 6> fr; ///< 0x0542
|
||||
std::array<u8, 2> class_0_hp; ///< 0x0548
|
||||
std::array<u8, 2> high_precision; ///< 0x054A
|
||||
std::array<u8, 20> classes; ///< 0x054C
|
||||
std::array<u8, 12> class_0_fr; ///< 0x0560
|
||||
std::array<u8, 20> pred_bits; ///< 0x056C
|
||||
std::array<u8, 10> single_ref_prob; ///< 0x0580
|
||||
std::array<u8, 5> comp_ref_prob; ///< 0x058A
|
||||
INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
|
||||
std::array<u8, 2304> coef_probs; ///< 0x05A0
|
||||
|
||||
void Convert(Vp9EntropyProbs& fc) {
|
||||
fc.inter_mode_prob = inter_mode_prob;
|
||||
|
@ -293,10 +297,45 @@ struct RefPoolElement {
|
|||
};
|
||||
|
||||
struct FrameContexts {
|
||||
s64 from{};
|
||||
bool adapted{};
|
||||
Vp9EntropyProbs probs{};
|
||||
s64 from;
|
||||
bool adapted;
|
||||
Vp9EntropyProbs probs;
|
||||
};
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(partition_prob, 0x0024);
|
||||
ASSERT_POSITION(switchable_interp_prob, 0x0724);
|
||||
ASSERT_POSITION(sign, 0x0772);
|
||||
ASSERT_POSITION(class_0_fr, 0x079E);
|
||||
ASSERT_POSITION(high_precision, 0x07B2);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(PictureInfo, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(bitstream_size, 0x30);
|
||||
ASSERT_POSITION(last_frame_size, 0x48);
|
||||
ASSERT_POSITION(first_level, 0x70);
|
||||
ASSERT_POSITION(segmentation, 0x80);
|
||||
ASSERT_POSITION(loop_filter, 0xE4);
|
||||
ASSERT_POSITION(surface_params, 0xF0);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(EntropyProbs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(inter_mode_prob, 0x400);
|
||||
ASSERT_POSITION(tx_8x8_prob, 0x470);
|
||||
ASSERT_POSITION(partition_prob, 0x4E0);
|
||||
ASSERT_POSITION(class_0, 0x540);
|
||||
ASSERT_POSITION(class_0_fr, 0x560);
|
||||
ASSERT_POSITION(coef_probs, 0x5A0);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
}; // namespace Decoder
|
||||
}; // namespace Tegra
|
||||
|
|
|
@ -8,22 +8,21 @@
|
|||
|
||||
namespace Tegra {
|
||||
|
||||
Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
|
||||
#define NVDEC_REG_INDEX(field_name) \
|
||||
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
|
||||
|
||||
Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
|
||||
|
||||
Nvdec::~Nvdec() = default;
|
||||
|
||||
void Nvdec::ProcessMethod(Method method, u32 argument) {
|
||||
if (method == Method::SetVideoCodec) {
|
||||
codec->StateWrite(static_cast<u32>(method), argument);
|
||||
} else {
|
||||
codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
|
||||
}
|
||||
void Nvdec::ProcessMethod(u32 method, u32 argument) {
|
||||
state.reg_array[method] = static_cast<u64>(argument) << 8;
|
||||
|
||||
switch (method) {
|
||||
case Method::SetVideoCodec:
|
||||
case NVDEC_REG_INDEX(set_codec_id):
|
||||
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
|
||||
break;
|
||||
case Method::Execute:
|
||||
case NVDEC_REG_INDEX(execute):
|
||||
Execute();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -14,16 +14,11 @@ class GPU;
|
|||
|
||||
class Nvdec {
|
||||
public:
|
||||
enum class Method : u32 {
|
||||
SetVideoCodec = 0x80,
|
||||
Execute = 0xc0,
|
||||
};
|
||||
|
||||
explicit Nvdec(GPU& gpu);
|
||||
~Nvdec();
|
||||
|
||||
/// Writes the method into the state, Invoke Execute() if encountered
|
||||
void ProcessMethod(Method method, u32 argument);
|
||||
void ProcessMethod(u32 method, u32 argument);
|
||||
|
||||
/// Return most recently decoded frame
|
||||
[[nodiscard]] AVFramePtr GetFrame();
|
||||
|
@ -33,6 +28,7 @@ private:
|
|||
void Execute();
|
||||
|
||||
GPU& gpu;
|
||||
NvdecCommon::NvdecRegisters state;
|
||||
std::unique_ptr<Codec> codec;
|
||||
};
|
||||
} // namespace Tegra
|
||||
|
|
|
@ -4,40 +4,13 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::NvdecCommon {
|
||||
|
||||
struct NvdecRegisters {
|
||||
INSERT_PADDING_WORDS(256);
|
||||
u64 set_codec_id{};
|
||||
INSERT_PADDING_WORDS(254);
|
||||
u64 set_platform_id{};
|
||||
u64 picture_info_offset{};
|
||||
u64 frame_bitstream_offset{};
|
||||
u64 frame_number{};
|
||||
u64 h264_slice_data_offsets{};
|
||||
u64 h264_mv_dump_offset{};
|
||||
INSERT_PADDING_WORDS(6);
|
||||
u64 frame_stats_offset{};
|
||||
u64 h264_last_surface_luma_offset{};
|
||||
u64 h264_last_surface_chroma_offset{};
|
||||
std::array<u64, 17> surface_luma_offset{};
|
||||
std::array<u64, 17> surface_chroma_offset{};
|
||||
INSERT_PADDING_WORDS(132);
|
||||
u64 vp9_entropy_probs_offset{};
|
||||
u64 vp9_backward_updates_offset{};
|
||||
u64 vp9_last_frame_segmap_offset{};
|
||||
u64 vp9_curr_frame_segmap_offset{};
|
||||
INSERT_PADDING_WORDS(2);
|
||||
u64 vp9_last_frame_mvs_offset{};
|
||||
u64 vp9_curr_frame_mvs_offset{};
|
||||
INSERT_PADDING_WORDS(2);
|
||||
};
|
||||
static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
|
||||
|
||||
enum class VideoCodec : u32 {
|
||||
enum class VideoCodec : u64 {
|
||||
None = 0x0,
|
||||
H264 = 0x3,
|
||||
Vp8 = 0x5,
|
||||
|
@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
|
|||
Vp9 = 0x9,
|
||||
};
|
||||
|
||||
// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
|
||||
// doubling the sizes here is compensating for that.
|
||||
struct NvdecRegisters {
|
||||
static constexpr std::size_t NUM_REGS = 0x178;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
|
||||
VideoCodec set_codec_id; ///< 0x0400
|
||||
INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
|
||||
u64 execute; ///< 0x0600
|
||||
INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
|
||||
struct { ///< 0x0800
|
||||
union {
|
||||
BitField<0, 3, VideoCodec> codec;
|
||||
BitField<4, 1, u64> gp_timer_on;
|
||||
BitField<13, 1, u64> mb_timer_on;
|
||||
BitField<14, 1, u64> intra_frame_pslc;
|
||||
BitField<17, 1, u64> all_intra_frame;
|
||||
};
|
||||
} control_params;
|
||||
u64 picture_info_offset; ///< 0x0808
|
||||
u64 frame_bitstream_offset; ///< 0x0810
|
||||
u64 frame_number; ///< 0x0818
|
||||
u64 h264_slice_data_offsets; ///< 0x0820
|
||||
u64 h264_mv_dump_offset; ///< 0x0828
|
||||
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
|
||||
u64 frame_stats_offset; ///< 0x0848
|
||||
u64 h264_last_surface_luma_offset; ///< 0x0850
|
||||
u64 h264_last_surface_chroma_offset; ///< 0x0858
|
||||
std::array<u64, 17> surface_luma_offset; ///< 0x0860
|
||||
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
|
||||
INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
|
||||
u64 vp9_entropy_probs_offset; ///< 0x0B80
|
||||
u64 vp9_backward_updates_offset; ///< 0x0B88
|
||||
u64 vp9_last_frame_segmap_offset; ///< 0x0B90
|
||||
u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
|
||||
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
|
||||
u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
|
||||
u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
|
||||
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
|
||||
};
|
||||
std::array<u64, NUM_REGS> reg_array;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(set_codec_id, 0x80);
|
||||
ASSERT_REG_POSITION(execute, 0xC0);
|
||||
ASSERT_REG_POSITION(control_params, 0x100);
|
||||
ASSERT_REG_POSITION(picture_info_offset, 0x101);
|
||||
ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
|
||||
ASSERT_REG_POSITION(frame_number, 0x103);
|
||||
ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
|
||||
ASSERT_REG_POSITION(frame_stats_offset, 0x109);
|
||||
ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
|
||||
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
|
||||
ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
|
||||
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
|
||||
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
|
||||
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
|
||||
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
|
||||
ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
|
||||
ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
|
||||
ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::NvdecCommon
|
||||
|
|
Loading…
Reference in a new issue