fix[sys_cfg]: Fix HAL layer buffer alignment issue

Platform:
RK3399, vp9/h265
RK3562/RK3576/RK3588, vp9

Error case:
The stride result calculated by sys_cfg is not
synchronized with the HAL layer.

Change-Id: I98f4e18fcb9af19850ca7dbd201bded8137e7861
Signed-off-by: Hongjin Li <vic.hong@rock-chips.com>
This commit is contained in:
Hongjin Li 2025-03-04 11:36:26 +08:00 committed by Herman Chen
parent 91812a40ba
commit f23096522b
5 changed files with 73 additions and 75 deletions

View file

@ -727,8 +727,6 @@ static void update_stream_buffer(MppBuffer streambuf, HalTaskInfo *syn)
MPP_RET hal_h265d_rkv_gen_regs(void *hal, HalTaskInfo *syn)
{
RK_S32 i = 0;
RK_S32 log2_min_cb_size;
RK_S32 width, height;
RK_S32 stride_y, stride_uv, virstrid_y, virstrid_yuv;
H265d_REGS_t *hw_regs;
RK_S32 ret = MPP_SUCCESS;
@ -739,6 +737,7 @@ MPP_RET hal_h265d_rkv_gen_regs(void *hal, HalTaskInfo *syn)
RK_U32 sw_ref_valid = 0;
RK_U32 stream_buf_size = 0;
HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
MppFrame mframe;
if (syn->dec.flags.parse_err ||
(syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
@ -797,20 +796,12 @@ MPP_RET hal_h265d_rkv_gen_regs(void *hal, HalTaskInfo *syn)
hw_regs = (H265d_REGS_t*)reg_ctx->hw_regs;
memset(hw_regs, 0, sizeof(H265d_REGS_t));
log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
stride_y = ((MPP_ALIGN(width, 64)
* (dxva_cxt->pp.bit_depth_luma_minus8 + 8)) >> 3);
stride_uv = ((MPP_ALIGN(width, 64)
* (dxva_cxt->pp.bit_depth_chroma_minus8 + 8)) >> 3);
stride_y = hevc_hor_align(stride_y);
stride_uv = hevc_hor_align(stride_uv);
virstrid_y = hevc_ver_align(height) * stride_y;
virstrid_yuv = virstrid_y + stride_uv * hevc_ver_align(height) / 2;
mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
SLOT_FRAME_PTR, &mframe);
stride_y = mpp_frame_get_hor_stride(mframe);
stride_uv = mpp_frame_get_hor_stride(mframe);
virstrid_y = mpp_frame_get_ver_stride(mframe) * stride_y;
virstrid_yuv = virstrid_y + stride_uv * mpp_frame_get_ver_stride(mframe) / 2;
hw_regs->sw_picparameter.sw_slice_num = dxva_cxt->slice_count;
hw_regs->sw_picparameter.sw_y_hor_virstride = stride_y >> 4;

View file

@ -248,7 +248,6 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
{
RK_S32 i;
RK_U8 bit_depth = 0;
RK_U32 pic_h[3] = { 0 };
RK_U32 ref_frame_width_y;
RK_U32 ref_frame_height_y;
RK_S32 stream_len = 0, aglin_offset = 0;
@ -261,12 +260,16 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
RK_U32 sw_uv_virstride;
RK_U32 sw_yuv_virstride ;
RK_U8 ref_idx = 0;
RK_U8 ref_frame_idx = 0;
RK_U32 *reg_ref_base = 0;
RK_S32 intraFlag = 0;
MppBuffer framebuf = NULL;
HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
Vp9dRkvCtx *hw_ctx = (Vp9dRkvCtx*)p_hal->hw_ctx;
DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
MppFrame mframe = NULL;
mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
if (p_hal->fast_mode) {
for (i = 0; i < MAX_GEN_REG; i++) {
@ -304,15 +307,12 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
//--- caculate the yuv_frame_size and mv_size
bit_depth = pic_param->BitDepthMinus8Luma + 8;
pic_h[0] = vp9_ver_align(pic_param->height); //p_cm->height;
pic_h[1] = vp9_ver_align(pic_param->height) / 2; //(p_cm->height + 1) / 2;
pic_h[2] = pic_h[1];
sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
sw_uv_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
sw_y_virstride = sw_y_hor_virstride * mpp_frame_get_ver_stride(mframe);
sw_uv_virstride = pic_h[1] * sw_uv_hor_virstride;
sw_uv_virstride = sw_uv_hor_virstride * mpp_frame_get_ver_stride(mframe) / 2;
sw_yuv_virstride = sw_y_virstride + sw_uv_virstride;
vp9_hw_regs->swreg3_picpar.sw_y_hor_virstride = sw_y_hor_virstride;
@ -359,24 +359,29 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
reg_ref_base = &vp9_hw_regs->swreg11_vp9_referlast_base;
for (i = 0; i < 3; i++) {
ref_idx = pic_param->frame_refs[i].Index7Bits;
ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
pic_h[0] = vp9_ver_align(ref_frame_height_y);
pic_h[1] = vp9_ver_align(ref_frame_height_y) / 2;
y_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * pic_h[0];
uv_virstride = uv_hor_virstride * pic_h[1];
if (ref_frame_idx < 0x7f) {
mpp_buf_slot_get_prop(p_hal->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe);
y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
uv_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(mframe);
uv_virstride = uv_hor_virstride * mpp_frame_get_ver_stride(mframe) / 2;
} else {
y_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
uv_virstride = uv_hor_virstride * vp9_ver_align(ref_frame_height_y) / 2;
}
yuv_virstride = y_virstride + uv_virstride;
if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {
mpp_buf_slot_get_prop(p_hal->slots, pic_param->ref_frame_map[ref_idx].Index7Bits, SLOT_BUFFER, &framebuf);
}
if (ref_frame_idx < 0x7f)
mpp_buf_slot_get_prop(p_hal->slots, ref_frame_idx, SLOT_BUFFER, &framebuf);
if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {
if (ref_frame_idx < 0x7f) {
switch (i) {
case 0: {
vp9_hw_regs->swreg17_vp9_frame_size_last.sw_framewidth_last = ref_frame_width_y;
vp9_hw_regs->swreg17_vp9_frame_size_last.sw_frameheight_last = ref_frame_height_y;
vp9_hw_regs->swreg37_vp9_lastf_hor_virstride.sw_vp9_lastfy_hor_virstride = y_hor_virstride;
@ -403,7 +408,6 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
}
default:
break;
}
/*0 map to 11*/
@ -412,7 +416,7 @@ MPP_RET hal_vp9d_rkv_gen_regs(void *hal, HalTaskInfo *task)
if (framebuf != NULL) {
reg_ref_base[i] = mpp_buffer_get_fd(framebuf);
} else {
mpp_log("ref buff address is no valid used out as base slot index 0x%x", pic_param->ref_frame_map[ref_idx].Index7Bits);
mpp_log("ref buff address is no valid used out as base slot index 0x%x", ref_frame_idx);
reg_ref_base[i] = vp9_hw_regs->swreg7_decout_base; //set
}
} else {

View file

@ -413,7 +413,6 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
{
RK_S32 i;
RK_U8 bit_depth = 0;
RK_U32 pic_h[3] = { 0 };
RK_U32 ref_frame_width_y;
RK_U32 ref_frame_height_y;
RK_S32 stream_len = 0, aglin_offset = 0;
@ -621,9 +620,6 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
//--- caculate the yuv_frame_size and mv_size
bit_depth = pic_param->BitDepthMinus8Luma + 8;
pic_h[0] = vp9_ver_align(pic_param->height);
pic_h[1] = vp9_ver_align(pic_param->height) / 2;
pic_h[2] = pic_h[1];
{
MppFrame mframe = NULL;
@ -641,9 +637,9 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
vp9_hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
vp9_hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
} else {
sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
sw_uv_hor_virstride = sw_y_hor_virstride;
sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
vp9_hw_regs->common.reg012.fbc_e = 0;
vp9_hw_regs->common.reg018.y_hor_virstride = sw_y_hor_virstride;
@ -682,8 +678,6 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
pic_h[0] = vp9_ver_align(ref_frame_height_y);
pic_h[1] = vp9_ver_align(ref_frame_height_y) / 2;
if (ref_frame_idx < 0x7f)
mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &frame);
@ -696,8 +690,13 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
y_hor_virstride = uv_hor_virstride = fbc_hdr_stride >> 4;
y_virstride = fbd_offset;
} else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * pic_h[0];
if (frame) {
y_hor_virstride = uv_hor_virstride = mpp_frame_get_hor_stride(frame) >> 4;
y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(frame);
} else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
}
}
if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {

View file

@ -426,7 +426,6 @@ static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
{
RK_S32 i;
RK_U8 bit_depth = 0;
RK_U32 pic_h[3] = { 0 };
RK_U32 ref_frame_width_y;
RK_U32 ref_frame_height_y;
RK_S32 stream_len = 0, aglin_offset = 0;
@ -631,9 +630,6 @@ static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
//--- caculate the yuv_frame_size and mv_size
bit_depth = pic_param->BitDepthMinus8Luma + 8;
pic_h[0] = vp9_ver_align(pic_param->height);
pic_h[1] = vp9_ver_align(pic_param->height) / 2;
pic_h[2] = pic_h[1];
{
MppFrame mframe = NULL;
@ -651,9 +647,9 @@ static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
vp9_hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
vp9_hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
} else {
sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
sw_uv_hor_virstride = sw_y_hor_virstride;
sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
vp9_hw_regs->common.reg012.fbc_e = 0;
vp9_hw_regs->common.reg018.y_hor_virstride = sw_y_hor_virstride;
@ -692,8 +688,6 @@ static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
pic_h[0] = vp9_ver_align(ref_frame_height_y);
pic_h[1] = vp9_ver_align(ref_frame_height_y) / 2;
if (ref_frame_idx < 0x7f)
mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &frame);
@ -706,8 +700,13 @@ static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
y_hor_virstride = uv_hor_virstride = fbc_hdr_stride >> 4;
y_virstride = fbd_offset;
} else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * pic_h[0];
if (frame) {
y_hor_virstride = uv_hor_virstride = mpp_frame_get_hor_stride(frame) >> 4;
y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(frame);
} else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
}
}
if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {

View file

@ -684,7 +684,6 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
{
RK_S32 i;
RK_U8 bit_depth = 0;
RK_U32 pic_h[3] = { 0 };
RK_U32 ref_frame_width_y;
RK_U32 ref_frame_height_y;
RK_S32 stream_len = 0, aglin_offset = 0;
@ -696,6 +695,7 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
RK_U32 sw_y_virstride;
RK_U32 sw_uv_virstride;
RK_U8 ref_idx = 0;
RK_U8 ref_frame_idx = 0;
RK_U32 *reg_ref_base = NULL;
RK_U32 *reg_payload_ref_base = NULL;
RK_S32 intraFlag = 0;
@ -711,6 +711,7 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
RK_S32 mv_size = pic_param->width * pic_param->height / 2;
RK_U32 frame_ctx_id = pic_param->frame_context_idx;
MppFrame mframe;
MppFrame ref_frame = NULL;
if (p_hal->fast_mode) {
for (i = 0; i < MAX_GEN_REG; i++) {
@ -847,9 +848,6 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
//--- caculate the yuv_frame_size and mv_size
bit_depth = pic_param->BitDepthMinus8Luma + 8;
pic_h[0] = vp9_ver_align(pic_param->height);
pic_h[1] = vp9_ver_align(pic_param->height) / 2;
pic_h[2] = pic_h[1];
{
mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
@ -867,9 +865,9 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
/* error stride */
vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = fbc_hdr_stride / 64;
} else {
sw_y_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
sw_uv_hor_virstride = sw_y_hor_virstride;
sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
sw_uv_virstride = sw_y_virstride / 2;
vp9_hw_regs->ctrl_regs.reg9.fbc_e = 0;
@ -953,23 +951,30 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
reg_payload_ref_base = vp9_hw_regs->vp9d_addrs.reg195_210_payload_st_ref_base;
for (i = 0; i < 3; i++) {
ref_idx = pic_param->frame_refs[i].Index7Bits;
ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
pic_h[0] = vp9_ver_align(ref_frame_height_y);
pic_h[1] = vp9_ver_align(ref_frame_height_y) / 2;
if (ref_frame_idx < 0x7f)
mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &ref_frame);
if (fbc_en) {
y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64;
if (*compat_ext_fbc_hdr_256_odd)
y_hor_virstride = uv_hor_virstride = (MPP_ALIGN(ref_frame_width_y, 256) | 256) / 64;
} else {
y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
if (ref_frame)
y_hor_virstride = uv_hor_virstride = (mpp_frame_get_hor_stride(ref_frame) >> 4);
else
y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
}
y_virstride = y_hor_virstride * pic_h[0];
if (ref_frame)
y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(ref_frame);
else
y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {
mpp_buf_slot_get_prop(p_hal ->slots, pic_param->ref_frame_map[ref_idx].Index7Bits, SLOT_BUFFER, &framebuf);
if (ref_frame_idx < 0x7f) {
mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_BUFFER, &framebuf);
if (hw_ctx->origin_bufs && mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, pic_param->ref_frame_map[ref_idx].Index7Bits);
origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, ref_frame_idx);
framebuf = origin_buf->buf[0];
}
@ -1000,11 +1005,11 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
reg_ref_base[i] = mpp_buffer_get_fd(framebuf);
reg_payload_ref_base[i] = mpp_buffer_get_fd(framebuf);
} else {
mpp_log("ref buff address is no valid used out as base slot index 0x%x", pic_param->ref_frame_map[ref_idx].Index7Bits);
mpp_log("ref buff address is no valid used out as base slot index 0x%x", ref_frame_idx);
reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
}
mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, pic_param->ref_frame_map[ref_idx].Index7Bits);
mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, ref_frame_idx);
} else {
reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;