feat[vepu510]: Add smart v3 interface

Change-Id: Ib2dea60f794f1ef404cf91b75053d8cf2639fd6b
Signed-off-by: timkingh.huang <timkingh.huang@rock-chips.com>
This commit is contained in:
timkingh.huang 2025-05-13 10:56:52 +08:00 committed by Herman Chen
parent fb6f170362
commit 560ac10baf
17 changed files with 248 additions and 90 deletions

View file

@ -29,7 +29,7 @@
#define HAL_H265E_DBG_RCKUT_REGS (0x00000040)
#define HAL_H265E_DBG_WGT_REGS (0x00000080)
#define HAL_H265E_DBG_RDO_REGS (0x000000C0)
#define HAL_H265E_DBG_ST_REGS (0x00000100) /* status registers */
#define HAL_H265E_DBG_INPUT (0x00020000)
#define HAL_H265E_DBG_OUTPUT (0x00040000)
@ -49,6 +49,7 @@
#define hal_h265e_dbg_rckut(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_RCKUT_REGS, fmt, ## __VA_ARGS__)
#define hal_h265e_dbg_wgt(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_WGT_REGS, fmt, ## __VA_ARGS__)
#define hal_h265e_dbg_rdo(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_RDO_REGS, fmt, ## __VA_ARGS__)
#define hal_h265e_dbg_st(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_ST_REGS, fmt, ## __VA_ARGS__)
#define hal_h265e_dbg_input(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_INPUT, fmt, ## __VA_ARGS__)
#define hal_h265e_dbg_output(fmt, ...) hal_h265e_dbg(HAL_H265E_DBG_OUTPUT, fmt, ## __VA_ARGS__)

View file

@ -2205,6 +2205,12 @@ typedef struct Vepu510H265RoiBlkCfg {
RK_U32 mdc_adju_intra : 4;
} Vepu510H265RoiBlkCfg;
typedef struct Vepu510NpuOut_t {
RK_S32 found_objects;
/* npu object flag: one byte for each block16x16 */
RK_U8 *object_seg_map;
} Vepu510NpuOut;
#ifdef __cplusplus
extern "C" {
#endif

View file

@ -124,8 +124,8 @@ typedef struct HalH264eVepu510Ctx_t {
/* roi */
void *roi_data;
MppBufferGroup roi_grp;
MppBuffer roi_base_cfg_buf;
RK_S32 roi_base_buf_size;
MppBuffer roir_buf;
RK_S32 roir_buf_size;
/* two-pass deflicker */
MppBuffer buf_pass1;
@ -160,6 +160,7 @@ typedef struct HalH264eVepu510Ctx_t {
void *tune;
RK_S32 smart_en;
RK_S32 qpmap_en;
RK_S32 sp_enc_en;
} HalH264eVepu510Ctx;
#include "hal_h264e_vepu510_tune.c"
@ -241,10 +242,10 @@ static MPP_RET hal_h264e_vepu510_deinit(void *hal)
p->hw_recn = NULL;
}
if (p->roi_base_cfg_buf) {
mpp_buffer_put(p->roi_base_cfg_buf);
p->roi_base_cfg_buf = NULL;
p->roi_base_buf_size = 0;
if (p->roir_buf) {
mpp_buffer_put(p->roir_buf);
p->roir_buf = NULL;
p->roir_buf_size = 0;
}
if (p->roi_grp) {
@ -555,6 +556,7 @@ static MPP_RET hal_h264e_vepu510_get_task(void *hal, HalEncTask *task)
ctx->smart_en = (ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC);
ctx->qpmap_en = ctx->cfg->tune.deblur_en;
ctx->sp_enc_en = ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SE;
if (updated & SYN_TYPE_FLAG(H264E_SYN_CFG))
setup_hal_bufs(ctx);
@ -1224,7 +1226,7 @@ static void setup_vepu510_rc_base(HalVepu510RegSet *regs, HalH264eVepu510Ctx *ct
reg_frm->common.rc_qp.rc_min_qp = qp_min;
reg_frm->common.rc_tgt.ctu_ebit = mb_target_bits_mul_16;
if (rc->rc_mode == MPP_ENC_RC_MODE_SMTRC) {
if (rc->rc_mode == MPP_ENC_RC_MODE_SMTRC || rc->rc_mode == MPP_ENC_RC_MODE_SE) {
reg_frm->common.rc_qp.rc_qp_range = 0;
} else {
reg_frm->common.rc_qp.rc_qp_range = (slice->slice_type == H264_I_SLICE) ?
@ -1419,7 +1421,7 @@ static MPP_RET setup_vepu510_intra_refresh(HalVepu510RegSet *regs, HalH264eVepu5
RK_U32 refresh_num = ctx->cfg->rc.refresh_num;
RK_U32 stride_h = MPP_ALIGN(mb_w, 4);
RK_U32 stride_v = MPP_ALIGN(mb_h, 4);
RK_U32 roi_base_buf_size = stride_h * stride_v * 8;
RK_U32 roir_buf_size = stride_h * stride_v * 8;
RK_U32 i = 0;
hal_h264e_dbg_func("enter\n");
@ -1429,15 +1431,15 @@ static MPP_RET setup_vepu510_intra_refresh(HalVepu510RegSet *regs, HalH264eVepu5
goto RET;
}
if (NULL == ctx->roi_base_cfg_buf) {
if (NULL == ctx->roir_buf) {
if (NULL == ctx->roi_grp)
mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
mpp_buffer_get(ctx->roi_grp, &ctx->roi_base_cfg_buf, roi_base_buf_size);
ctx->roi_base_buf_size = roi_base_buf_size;
mpp_buffer_get(ctx->roi_grp, &ctx->roir_buf, roir_buf_size);
ctx->roir_buf_size = roir_buf_size;
}
mpp_assert(ctx->roi_base_cfg_buf);
void *base_cfg_buf = mpp_buffer_get_ptr(ctx->roi_base_cfg_buf);
mpp_assert(ctx->roir_buf);
void *base_cfg_buf = mpp_buffer_get_ptr(ctx->roir_buf);
Vepu510RoiH264BsCfg base_cfg;
Vepu510RoiH264BsCfg *base_cfg_ptr = (Vepu510RoiH264BsCfg *)base_cfg_buf;
@ -1523,7 +1525,7 @@ static void setup_vepu510_recn_refr(HalH264eVepu510Ctx *ctx, HalVepu510RegSet *r
if (refr && refr->cnt) {
MppBuffer buf_pixel = refr->buf[0];
MppBuffer buf_thumb = refr->buf[1];
MppBuffer buf_smear = curr->buf[2];
MppBuffer buf_smear = refr->buf[2];
RK_S32 fd = mpp_buffer_get_fd(buf_pixel);
mpp_assert(buf_pixel);
@ -2195,7 +2197,7 @@ static MPP_RET hal_h264e_vepu510_gen_regs(void *hal, HalEncTask *task)
setup_vepu510_recn_refr(ctx, regs);
reg_frm->common.meiw_addr = task->md_info ? mpp_buffer_get_fd(task->md_info) : 0;
reg_frm->common.enc_pic.mei_stor = 0;
reg_frm->common.enc_pic.mei_stor = task->md_info ? 1 : 0;
reg_frm->common.pic_ofst.pic_ofst_y = mpp_frame_get_offset_y(task->frame);
reg_frm->common.pic_ofst.pic_ofst_x = mpp_frame_get_offset_x(task->frame);
@ -2212,7 +2214,7 @@ static MPP_RET hal_h264e_vepu510_gen_regs(void *hal, HalEncTask *task)
vepu510_set_roi(&ctx->regs_set->reg_rc_roi.roi_cfg, ctx->roi_data,
ctx->cfg->prep.width, ctx->cfg->prep.height);
vepu510_h264e_tune_reg_patch(ctx->tune);
vepu510_h264e_tune_reg_patch(ctx->tune, task);
/* two pass register patch */
if (frm->save_pass1)

View file

@ -23,13 +23,17 @@
typedef struct HalH264eVepu510Tune_t {
HalH264eVepu510Ctx *ctx;
RK_U8 *qm_mv_buf; /* qpmap mv buffer */
RK_U32 qm_mv_buf_size;
Vepu510NpuOut *obj_out; /* object map from npu */
RK_S32 pre_madp[2];
RK_S32 pre_madi[2];
} HalH264eVepu510Tune;
static HalH264eVepu510Tune *vepu510_h264e_tune_init(HalH264eVepu510Ctx *ctx)
{
HalH264eVepu510Tune *tune = mpp_malloc(HalH264eVepu510Tune, 1);
HalH264eVepu510Tune *tune = mpp_calloc(HalH264eVepu510Tune, 1);
if (NULL == tune)
return tune;
@ -43,15 +47,84 @@ static HalH264eVepu510Tune *vepu510_h264e_tune_init(HalH264eVepu510Ctx *ctx)
static void vepu510_h264e_tune_deinit(void *tune)
{
HalH264eVepu510Tune * t = (HalH264eVepu510Tune *)tune;
MPP_FREE(t->qm_mv_buf);
MPP_FREE(tune);
}
static void vepu510_h264e_tune_reg_patch(void *p)
static MPP_RET vepu510_h264e_tune_qpmap_init(HalH264eVepu510Tune *tune)
{
HalH264eVepu510Ctx *ctx = tune->ctx;
HalVepu510RegSet *regs = ctx->regs_set;
H264eVepu510Frame *reg_frm = &regs->reg_frm;
RK_S32 w64 = MPP_ALIGN(ctx->cfg->prep.width, 64);
RK_S32 h16 = MPP_ALIGN(ctx->cfg->prep.height, 16);
RK_S32 roir_buf_fd = -1;
if (ctx->roi_data) {
//TODO: external qpmap buffer
} else {
if (NULL == ctx->roir_buf) {
if (NULL == ctx->roi_grp)
mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
//TODO: bmap_mdc_dpth = 1 ???
ctx->roir_buf_size = w64 * h16 / 256 * 4;
mpp_buffer_get(ctx->roi_grp, &ctx->roir_buf, ctx->roir_buf_size);
}
roir_buf_fd = mpp_buffer_get_fd(ctx->roir_buf);
}
if (ctx->roir_buf == NULL) {
mpp_err("failed to get roir_buf\n");
return MPP_ERR_MALLOC;
}
reg_frm->common.adr_roir = roir_buf_fd;
if (tune->qm_mv_buf == NULL) {
tune->qm_mv_buf_size = w64 * h16 / 256;
tune->qm_mv_buf = mpp_calloc(RK_U8, tune->qm_mv_buf_size);
if (NULL == tune->qm_mv_buf) {
mpp_err("failed to get qm_mv_buf\n");
return MPP_ERR_MALLOC;
}
}
hal_h264e_dbg_detail("roir_buf_fd %d, size %d qm_mv_buf %p size %d\n",
roir_buf_fd, ctx->roir_buf_size, tune->qm_mv_buf,
tune->qm_mv_buf_size);
return MPP_OK;
}
static void vepu510_h264e_tune_qpmap(void *p, HalEncTask *task)
{
HalH264eVepu510Tune *tune = (HalH264eVepu510Tune *)p;
MPP_RET ret = MPP_OK;
(void)task;
hal_h264e_dbg_func("enter\n");
ret = vepu510_h264e_tune_qpmap_init(tune);
if (ret != MPP_OK) {
mpp_err("failed to init qpmap\n");
return;
}
hal_h264e_dbg_func("leave\n");
}
static void vepu510_h264e_tune_reg_patch(void *p, HalEncTask *task)
{
HalH264eVepu510Tune *tune = (HalH264eVepu510Tune *)p;
if (NULL == tune)
return;
HalH264eVepu510Ctx *ctx = tune->ctx;
if (ctx->qpmap_en && (task->md_info != NULL))
vepu510_h264e_tune_qpmap(tune, task);
}
static void vepu510_h264e_tune_stat_update(void *p, HalEncTask *task)

View file

@ -154,6 +154,7 @@ typedef struct H265eV510HalContext_t {
RK_S32 qpmap_en;
RK_S32 smart_en;
RK_S32 sp_enc_en;
/* external line buffer over 3K */
MppBufferGroup ext_line_buf_grp;
@ -861,7 +862,7 @@ static void vepu510_h265_smear_cfg(H265eVepu510Sqi *reg, H265eV510HalContext *ct
reg->smear_opt_cfg0.anti_smear_en = 1;
if (deblur_en == 0)
reg->smear_opt_cfg0.anti_smear_en = 0;
reg->smear_opt_cfg0.smear_strength = smear_strength[deblur_str] + smear_flag_bndry_wgt[deblur_en];
reg->smear_opt_cfg0.smear_strength = smear_strength[deblur_str] + smear_flag_bndry_wgt[flag_cover];
reg->smear_opt_cfg0.thre_mv_inconfor_cime = 8;
reg->smear_opt_cfg0.thre_mv_confor_cime = 2;
reg->smear_opt_cfg0.thre_mv_inconfor_cime_gmv = 8;
@ -961,7 +962,7 @@ static void vepu510_h265_global_cfg_set(H265eV510HalContext *ctx, H265eV510RegSe
if (hw->qbias_en) {
reg_param->qnt_bias_comb.qnt_f_bias_i = hw->qbias_i;
reg_param->qnt_bias_comb.qnt_f_bias_p = hw->qbias_p;
} else if (ctx->smart_en) {
} else if (ctx->smart_en || ctx->sp_enc_en) {
reg_param->qnt_bias_comb.qnt_f_bias_i = 144;
}
@ -1335,7 +1336,7 @@ static MPP_RET vepu510_h265_set_rc_regs(H265eV510HalContext *ctx, H265eV510RegSe
reg_frm->common.rc_qp.rc_min_qp = rc_cfg->quality_min;
reg_frm->common.rc_tgt.ctu_ebit = ctu_target_bits_mul_16;
if (ctx->smart_en) {
if (ctx->smart_en || ctx->sp_enc_en) {
reg_frm->common.rc_qp.rc_qp_range = 0;
} else {
reg_frm->common.rc_qp.rc_qp_range = (ctx->frame_type == INTRA_FRAME) ?
@ -2508,6 +2509,7 @@ MPP_RET hal_h265e_v510_get_task(void *hal, HalEncTask *task)
ctx->dpb = (H265eDpb*)ctx->syn->dpb;
ctx->smart_en = (ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC);
ctx->qpmap_en = ctx->cfg->tune.deblur_en;
ctx->sp_enc_en = ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SE;
if (vepu510_h265_setup_hal_bufs(ctx)) {
hal_h265e_err("vepu541_h265_allocate_buffers failed, free buffers and return\n");

View file

@ -22,6 +22,7 @@ typedef struct HalH265eVepu510Tune_t {
RK_U8 *qm_mv_buf; /* qpmap move flag buffer */
RK_U32 qm_mv_buf_size;
Vepu510NpuOut *obj_out; /* object map from npu */
RK_S32 pre_madp[2];
RK_S32 pre_madi[2];
@ -144,17 +145,86 @@ static void vepu510_h265e_tune_aq(HalH265eVepu510Tune *tune)
r->aq_clip.aq8_rnge = 10;
r->aq_clip.aq16_dif0 = 12;
r->aq_clip.aq16_dif1 = 12;
r->aq_clip.aq_rme_en = 1;
r->aq_clip.aq_cme_en = 1;
}
static MPP_RET vepu510_h265e_tune_qpmap_init(HalH265eVepu510Tune *tune)
{
H265eV510HalContext *ctx = tune->ctx;
Vepu510H265eFrmCfg *frm = ctx->frm;
H265eV510RegSet *regs = frm->regs_set;
H265eVepu510Frame *reg_frm = &regs->reg_frm;
RK_S32 w32 = MPP_ALIGN(ctx->cfg->prep.width, 32);
RK_S32 h32 = MPP_ALIGN(ctx->cfg->prep.height, 32);
RK_S32 roir_buf_fd = -1;
if (frm->roi_data) {
//TODO: external qpmap buffer
} else {
if (NULL == frm->roir_buf) {
if (NULL == ctx->roi_grp)
mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
//TODO: bmap_mdc_dpth = 1 ???
frm->roir_buf_size = w32 * h32 / 256 * 4;
mpp_buffer_get(ctx->roi_grp, &frm->roir_buf, frm->roir_buf_size);
}
roir_buf_fd = mpp_buffer_get_fd(frm->roir_buf);
}
if (frm->roir_buf == NULL) {
mpp_err("failed to get roir_buf\n");
return MPP_ERR_MALLOC;
}
reg_frm->common.adr_roir = roir_buf_fd;
if (tune->qm_mv_buf == NULL) {
tune->qm_mv_buf_size = w32 * h32 / 256;
tune->qm_mv_buf = mpp_calloc(RK_U8, tune->qm_mv_buf_size);
if (NULL == tune->qm_mv_buf) {
mpp_err("failed to get qm_mv_buf\n");
return MPP_ERR_MALLOC;
}
}
hal_h265e_dbg_ctl("roir_buf_fd %d, size %d qm_mv_buf %p size %d\n",
roir_buf_fd, frm->roir_buf_size, tune->qm_mv_buf,
tune->qm_mv_buf_size);
return MPP_OK;
}
static void vepu510_h265e_tune_qpmap(void *p, HalEncTask *task)
{
MPP_RET ret = MPP_OK;
HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
(void)task;
hal_h265e_dbg_func("enter\n");
ret = vepu510_h265e_tune_qpmap_init(tune);
if (ret != MPP_OK) {
mpp_err("failed to init qpmap\n");
return;
}
hal_h265e_dbg_func("leave\n");
}
static void vepu510_h265e_tune_reg_patch(void *p, HalEncTask *task)
{
HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
(void)task;
if (NULL == tune)
return;
H265eV510HalContext *ctx = tune->ctx;
vepu510_h265e_tune_aq(tune);
if (ctx->qpmap_en && (task->md_info != NULL)) {
vepu510_h265e_tune_qpmap(tune, task);
}
}
static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
@ -165,6 +235,7 @@ static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
if (NULL == tune)
return;
hal_h265e_dbg_func("enter\n");
H265eV510HalContext *ctx = tune->ctx;;
RK_S32 task_idx = task->flags.reg_idx;
Vepu510H265eFrmCfg *frm = ctx->frms[task_idx];
@ -172,9 +243,10 @@ static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
H265eV510RegSet *regs_set = frm->regs_set;
H265eV510StatusElem *elem = frm->regs_ret;
MppEncCfgSet *cfg = ctx->cfg;
RK_S32 w32 = MPP_ALIGN(cfg->prep.width, 32);
RK_S32 h32 = MPP_ALIGN(cfg->prep.height, 32);
RK_U32 b16_num = MPP_ALIGN(cfg->prep.width, 16) * MPP_ALIGN(cfg->prep.height, 16) / 256;
RK_U32 madi_cnt = 0, madp_cnt = 0;
RK_S32 i = 0;
RK_U32 madi_th_cnt0 = elem->st.st_madi_lt_num0.madi_th_lt_cnt0 +
elem->st.st_madi_rt_num0.madi_th_rt_cnt0 +
@ -231,8 +303,8 @@ static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
motion_level = 0;
hal_rc_ret->motion_level = motion_level;
}
hal_h265e_dbg_output("complex_level %d motion_level %d\n",
hal_rc_ret->complex_level, hal_rc_ret->motion_level);
hal_h265e_dbg_st("frame %d complex_level %d motion_level %d\n",
ctx->frame_num - 1, hal_rc_ret->complex_level, hal_rc_ret->motion_level);
fb->st_madi = madi_th_cnt0 * regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
madi_th_cnt1 * (regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
@ -259,51 +331,13 @@ static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
fb->st_mb_num += elem->st.st_bnum_b16.num_b16;
fb->frame_type = task->rc_task->frm.is_intra ? INTRA_FRAME : INTER_P_FRAME;
hal_rc_ret->bit_real += fb->out_strm_size * 8;
hal_h265e_dbg_output("bit_real %d quality_real %d\n",
hal_rc_ret->bit_real, hal_rc_ret->quality_real);
{
/* This code snippet may be unnecessary, but it is kept for rv1103b compatibility. */
RK_S32 bit_tgt = hal_rc_ret->bit_target;
RK_S32 bit_real = hal_rc_ret->bit_real;
RK_S32 real_lvl = 0;
hal_rc_ret->madi = elem->st.madi16_sum / fb->st_mb_num;
hal_rc_ret->madp = elem->st.madp16_sum / fb->st_mb_num;
hal_rc_ret->dsp_y_avg = elem->st.dsp_y_sum / (w32 / 4 * h32 / 4);
memcpy(fb->tgt_sub_real_lvl, ctx->last_frame_fb.tgt_sub_real_lvl, 6 * sizeof(RK_S8));
for (i = 3; i >= 0; i--)
fb->tgt_sub_real_lvl[i + 1] = fb->tgt_sub_real_lvl[i];
hal_h265e_dbg_st("frame %d bit_real %d quality_real %d dsp_y_avg %3d\n", ctx->frame_num - 1,
hal_rc_ret->bit_real, hal_rc_ret->quality_real, hal_rc_ret->dsp_y_avg);
if (bit_tgt > bit_real) {
fb->tgt_sub_real_lvl[0] = (bit_tgt > bit_real * 6 / 4) ? 3 :
(bit_tgt > bit_real * 5 / 4) ? 2 :
(bit_tgt > bit_real * 9 / 8) ? 1 : 0;
} else {
fb->tgt_sub_real_lvl[0] = (bit_real > bit_tgt * 2) ? -5 :
(bit_real > bit_tgt * 7 / 4) ? -4 :
(bit_real > bit_tgt * 6 / 4) ? -3 :
(bit_real > bit_tgt * 5 / 4) ? -2 : -1;
}
for (i = 0; i < 5; i ++)
real_lvl += fb->tgt_sub_real_lvl[i];
if (task->rc_task->frm.is_intra)
fb->tgt_sub_real_lvl[5] = 0;
if (real_lvl < -9)
fb->tgt_sub_real_lvl[5] = 2;
else if (real_lvl < -2 && fb->tgt_sub_real_lvl[5] < 2)
fb->tgt_sub_real_lvl[5] = 1;
}
if (fb->st_mb_num)
fb->st_madi = fb->st_madi / fb->st_mb_num;
else
fb->st_madi = 0;
if (fb->st_ctu_num)
fb->st_madp = fb->st_madp / fb->st_ctu_num;
else
fb->st_madp = 0;
hal_rc_ret->madi = fb->st_madi;
hal_rc_ret->madp = fb->st_madp; /* unused ?? */
hal_h265e_dbg_func("leave\n");
}