From: Tomasz Figa Date: Fri, 17 Jul 2015 10:31:54 +0000 (+0900) Subject: CHROMIUM: [media] rk3288-vpu: Workaround for encode after decode X-Git-Tag: firefly_0821_release~2302 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=60a3c09c3eef10c99b7bb4a916bc2cfa2ae78fff;p=firefly-linux-kernel-4.4.55.git CHROMIUM: [media] rk3288-vpu: Workaround for encode after decode On RK3288 there is an issue with certain hardware state being corrupted while decoding certain streams, which affects encoding task run directly after that decoding task. To reinitialize the state properly, a dummy encoding of a single 64x64 pixels keyframe must be performed before the real encoding is run. This patch adds necessary workaround code to the driver, which makes it execute an encoding task using dummy buffers with static parameters manually selected for lowest performance overhead and to assure that aforementioned hardware state is reinitialized. BUG=chrome-os-partner:41585 TEST=AppRTC loopback Signed-off-by: Tomasz Figa Reviewed-on: https://chromium-review.googlesource.com/286284 Reviewed-by: Pawel Osciak Change-Id: I019d1983633ec2cf2818956a7bf988314d853cdf Signed-off-by: Jeffy Chen Signed-off-by: Yakir Yang --- diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu.c index 17728371896b..dc92f2b9471e 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu.c @@ -93,6 +93,17 @@ static void __rk3288_vpu_dequeue_run_locked(struct rk3288_vpu_ctx *ctx) ctx->run.dst = dst; } +static struct rk3288_vpu_ctx * +rk3288_vpu_encode_after_decode_war(struct rk3288_vpu_ctx *ctx) +{ + struct rk3288_vpu_dev *dev = ctx->dev; + + if (dev->was_decoding && rk3288_vpu_ctx_is_encoder(ctx)) + return dev->dummy_encode_ctx; + + return ctx; +} + static void rk3288_vpu_try_run(struct rk3288_vpu_dev *dev) { struct rk3288_vpu_ctx *ctx = NULL; @@ -115,10 +126,24 @@ static void rk3288_vpu_try_run(struct rk3288_vpu_dev *dev) goto out; ctx = list_entry(dev->ready_ctxs.next, struct rk3288_vpu_ctx, list); - list_del_init(&ctx->list); + + /* + * WAR for corrupted hardware state when encoding directly after + * certain decoding runs. + * + * If previous context was decoding and currently picked one is + * encoding then we need to execute a dummy encode with proper + * settings to reinitialize certain internal hardware state. + */ + ctx = rk3288_vpu_encode_after_decode_war(ctx); + + if (!rk3288_vpu_ctx_is_dummy_encode(ctx)) { + list_del_init(&ctx->list); + __rk3288_vpu_dequeue_run_locked(ctx); + } dev->current_ctx = ctx; - __rk3288_vpu_dequeue_run_locked(ctx); + dev->was_decoding = !rk3288_vpu_ctx_is_encoder(ctx); out: spin_unlock_irqrestore(&dev->irqlock, flags); @@ -145,8 +170,6 @@ static void __rk3288_vpu_try_context_locked(struct rk3288_vpu_dev *dev, void rk3288_vpu_run_done(struct rk3288_vpu_ctx *ctx, enum vb2_buffer_state result) { - struct vb2_buffer *src = &ctx->run.src->b; - struct vb2_buffer *dst = &ctx->run.dst->b; struct rk3288_vpu_dev *dev = ctx->dev; unsigned long flags; @@ -155,9 +178,14 @@ void rk3288_vpu_run_done(struct rk3288_vpu_ctx *ctx, if (ctx->run_ops->run_done) ctx->run_ops->run_done(ctx, result); - dst->v4l2_buf.timestamp = src->v4l2_buf.timestamp; - vb2_buffer_done(&ctx->run.src->b, result); - vb2_buffer_done(&ctx->run.dst->b, result); + if (!rk3288_vpu_ctx_is_dummy_encode(ctx)) { + struct vb2_buffer *src = &ctx->run.src->b; + struct vb2_buffer *dst = &ctx->run.dst->b; + + dst->v4l2_buf.timestamp = src->v4l2_buf.timestamp; + vb2_buffer_done(&ctx->run.src->b, result); + vb2_buffer_done(&ctx->run.dst->b, result); + } dev->current_ctx = NULL; wake_up_all(&dev->run_wq); @@ -603,6 +631,12 @@ static int rk3288_vpu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, vpu); + ret = rk3288_vpu_enc_init_dummy_ctx(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to create dummy encode context\n"); + goto err_dummy_enc; + } + /* encoder */ vfd = video_device_alloc(); if (!vfd) { @@ -674,6 +708,8 @@ err_dec_alloc: err_enc_reg: video_device_release(vpu->vfd_enc); err_enc_alloc: + rk3288_vpu_enc_free_dummy_ctx(vpu); +err_dummy_enc: v4l2_device_unregister(&vpu->v4l2_dev); err_v4l2_dev_reg: vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx_vm); @@ -704,6 +740,7 @@ static int rk3288_vpu_remove(struct platform_device *pdev) video_unregister_device(vpu->vfd_dec); video_unregister_device(vpu->vfd_enc); + rk3288_vpu_enc_free_dummy_ctx(vpu); v4l2_device_unregister(&vpu->v4l2_dev); vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx_vm); vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx); diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h index 155693cb79e4..9ac44e82c42f 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h @@ -156,6 +156,11 @@ enum rk3288_vpu_state { * @current_ctx: Context being currently processed by hardware. * @run_wq: Wait queue to wait for run completion. * @watchdog_work: Delayed work for hardware timeout handling. + * @dummy_encode_ctx: Context used to run dummy frame encoding to initialize + * encoder hardware state. + * @dummy_encode_src: Source buffers used for dummy frame encoding. + * @dummy_encode_dst: Desintation buffer used for dummy frame encoding. + * @was_decoding: Indicates whether last run context was a decoder. */ struct rk3288_vpu_dev { struct v4l2_device v4l2_dev; @@ -180,6 +185,10 @@ struct rk3288_vpu_dev { struct rk3288_vpu_ctx *current_ctx; wait_queue_head_t run_wq; struct delayed_work watchdog_work; + struct rk3288_vpu_ctx *dummy_encode_ctx; + struct rk3288_vpu_aux_buf dummy_encode_src[VIDEO_MAX_PLANES]; + struct rk3288_vpu_aux_buf dummy_encode_dst; + bool was_decoding; }; /** @@ -439,6 +448,13 @@ static inline bool rk3288_vpu_ctx_is_encoder(struct rk3288_vpu_ctx *ctx) return ctx->vpu_dst_fmt->codec_mode != RK_VPU_CODEC_NONE; } +static inline bool rk3288_vpu_ctx_is_dummy_encode(struct rk3288_vpu_ctx *ctx) +{ + struct rk3288_vpu_dev *dev = ctx->dev; + + return ctx == dev->dummy_encode_ctx; +} + int rk3288_vpu_ctrls_setup(struct rk3288_vpu_ctx *ctx, const struct v4l2_ctrl_ops *ctrl_ops, struct rk3288_vpu_control *controls, diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c index 9a0a6ac9e5fc..1f6e9225a2e8 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c @@ -1391,4 +1391,127 @@ void rk3288_vpu_enc_exit(struct rk3288_vpu_ctx *ctx) rk3288_vpu_aux_buf_free(vpu, &ctx->run.priv_dst); rk3288_vpu_aux_buf_free(vpu, &ctx->run.priv_src); +}; + +/* + * WAR for encoder state corruption after decoding + */ + +static const struct rk3288_vpu_run_ops dummy_encode_run_ops = { + /* No ops needed for dummy encoding. */ +}; + +#define DUMMY_W 64 +#define DUMMY_H 64 +#define DUMMY_SRC_FMT V4L2_PIX_FMT_YUYV +#define DUMMY_DST_FMT V4L2_PIX_FMT_VP8 +#define DUMMY_DST_SIZE (32 * 1024) + +int rk3288_vpu_enc_init_dummy_ctx(struct rk3288_vpu_dev *dev) +{ + struct rk3288_vpu_ctx *ctx; + int ret; + int i; + + ctx = devm_kzalloc(dev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = dev; + + ctx->vpu_src_fmt = find_format(DUMMY_SRC_FMT, false); + ctx->src_fmt.width = DUMMY_W; + ctx->src_fmt.height = DUMMY_H; + ctx->src_fmt.pixelformat = ctx->vpu_src_fmt->fourcc; + ctx->src_fmt.num_planes = ctx->vpu_src_fmt->num_planes; + + calculate_plane_sizes(ctx->vpu_src_fmt, ctx->src_fmt.width, + ctx->src_fmt.height, &ctx->src_fmt); + + ctx->vpu_dst_fmt = find_format(DUMMY_DST_FMT, true); + ctx->dst_fmt.width = ctx->src_fmt.width; + ctx->dst_fmt.height = ctx->src_fmt.height; + ctx->dst_fmt.pixelformat = ctx->vpu_dst_fmt->fourcc; + ctx->dst_fmt.plane_fmt[0].sizeimage = DUMMY_DST_SIZE; + ctx->dst_fmt.plane_fmt[0].bytesperline = 0; + ctx->dst_fmt.num_planes = 1; + + INIT_LIST_HEAD(&ctx->src_queue); + + ctx->src_crop.left = 0; + ctx->src_crop.top = 0; + ctx->src_crop.width = ctx->src_fmt.width; + ctx->src_crop.left = ctx->src_fmt.height; + + INIT_LIST_HEAD(&ctx->dst_queue); + INIT_LIST_HEAD(&ctx->list); + + ctx->run.vp8e.reg_params = rk3288_vpu_vp8e_get_dummy_params(); + ctx->run_ops = &dummy_encode_run_ops; + + ctx->run.dst = devm_kzalloc(dev->dev, sizeof(*ctx->run.dst), + GFP_KERNEL); + if (!ctx->run.dst) + return -ENOMEM; + + ret = rk3288_vpu_aux_buf_alloc(dev, &ctx->run.priv_src, + RK3288_HW_PARAMS_SIZE); + if (ret) + return ret; + + ret = rk3288_vpu_aux_buf_alloc(dev, &ctx->run.priv_dst, + RK3288_RET_PARAMS_SIZE); + if (ret) + goto err_free_priv_src; + + for (i = 0; i < ctx->src_fmt.num_planes; ++i) { + ret = rk3288_vpu_aux_buf_alloc(dev, &dev->dummy_encode_src[i], + ctx->src_fmt.plane_fmt[i].sizeimage); + if (ret) + goto err_free_src; + + memset(dev->dummy_encode_src[i].cpu, 0, + dev->dummy_encode_src[i].size); + } + + ret = rk3288_vpu_aux_buf_alloc(dev, &dev->dummy_encode_dst, + ctx->dst_fmt.plane_fmt[0].sizeimage); + if (ret) + goto err_free_src; + + memset(dev->dummy_encode_dst.cpu, 0, dev->dummy_encode_dst.size); + + ret = rk3288_vpu_init(ctx); + if (ret) + goto err_free_dst; + + dev->dummy_encode_ctx = ctx; + + return 0; + +err_free_dst: + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_dst); +err_free_src: + for (i = 0; i < ctx->src_fmt.num_planes; ++i) + if (dev->dummy_encode_src[i].cpu) + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_src[i]); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_dst); +err_free_priv_src: + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_src); + + return ret; +} + +void rk3288_vpu_enc_free_dummy_ctx(struct rk3288_vpu_dev *dev) +{ + struct rk3288_vpu_ctx *ctx = dev->dummy_encode_ctx; + int i; + + rk3288_vpu_deinit(ctx); + + for (i = 0; i < ctx->src_fmt.num_planes; ++i) + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_src[i]); + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_dst); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_src); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_dst); } diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h index 80b71c2a5979..4b1979d5d2ef 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h @@ -30,5 +30,7 @@ const struct v4l2_ioctl_ops *get_enc_v4l2_ioctl_ops(void); struct rk3288_vpu_fmt *get_enc_def_fmt(bool src); int rk3288_vpu_enc_init(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_enc_exit(struct rk3288_vpu_ctx *ctx); +int rk3288_vpu_enc_init_dummy_ctx(struct rk3288_vpu_dev *dev); +void rk3288_vpu_enc_free_dummy_ctx(struct rk3288_vpu_dev *dev); #endif /* RK3288_VPU_ENC_H_ */ diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h index ee5a1466bb05..f8325536295b 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h @@ -171,6 +171,7 @@ void rk3288_vpu_vp8e_exit(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_vp8e_run(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx, enum vb2_buffer_state result); +const struct rk3288_vp8e_reg_params *rk3288_vpu_vp8e_get_dummy_params(void); void rk3288_vpu_vp8e_assemble_bitstream(struct rk3288_vpu_ctx *ctx, struct rk3288_vpu_buf *dst_buf); diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c index 25684d32c233..ce02712dc9fc 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c @@ -201,8 +201,13 @@ static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu, rec_buf_dma += rounded_size * 3 / 2; ctx->hw.vp8e.ref_rec_ptr ^= 1; - dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0); - dst_size = vb2_plane_size(&ctx->run.dst->b, 0); + if (rk3288_vpu_ctx_is_dummy_encode(ctx)) { + dst_dma = vpu->dummy_encode_dst.dma; + dst_size = vpu->dummy_encode_dst.size; + } else { + dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0); + dst_size = vb2_plane_size(&ctx->run.dst->b, 0); + } /* * stream addr-->| @@ -266,12 +271,24 @@ static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu, VEPU_REG_ADDR_REC_CHROMA); /* Source buffer. */ - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_Y), VEPU_REG_ADDR_IN_LUMA); - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_CB), VEPU_REG_ADDR_IN_CB); - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_CR), VEPU_REG_ADDR_IN_CR); + if (rk3288_vpu_ctx_is_dummy_encode(ctx)) { + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_Y].dma, + VEPU_REG_ADDR_IN_LUMA); + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CB].dma, + VEPU_REG_ADDR_IN_CB); + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CR].dma, + VEPU_REG_ADDR_IN_CR); + } else { + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_Y), + VEPU_REG_ADDR_IN_LUMA); + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_CB), + VEPU_REG_ADDR_IN_CB); + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_CR), + VEPU_REG_ADDR_IN_CR); + } /* Source parameters. */ vepu_write_relaxed(vpu, enc_in_img_ctrl(ctx), VEPU_REG_IN_IMG_CTRL); @@ -408,3 +425,110 @@ void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx, rk3288_vpu_run_done(ctx, result); } + +/* + * WAR for encoder state corruption after decoding + */ + +static const struct rk3288_vp8e_reg_params dummy_encode_reg_params = { + /* 00000014 */ .hdr_len = 0x00000000, + /* 00000038 */ .enc_ctrl = VEPU_REG_ENC_CTRL_KEYFRAME_BIT, + /* 00000040 */ .enc_ctrl0 = 0x00000000, + /* 00000044 */ .enc_ctrl1 = 0x00000000, + /* 00000048 */ .enc_ctrl2 = 0x00040014, + /* 0000004c */ .enc_ctrl3 = 0x404083c0, + /* 00000050 */ .enc_ctrl5 = 0x01006bff, + /* 00000054 */ .enc_ctrl4 = 0x00000039, + /* 00000058 */ .str_hdr_rem_msb = 0x85848805, + /* 0000005c */ .str_hdr_rem_lsb = 0x02000000, + /* 00000064 */ .mad_ctrl = 0x00000000, + /* 0000006c */ .qp_val = { + /* 0000006c */ 0x020213b1, + /* 00000070 */ 0x02825249, + /* 00000074 */ 0x048409d8, + /* 00000078 */ 0x03834c30, + /* 0000007c */ 0x020213b1, + /* 00000080 */ 0x02825249, + /* 00000084 */ 0x00340e0d, + /* 00000088 */ 0x401c1a15, + }, + /* 0000008c */ .bool_enc = 0x00018140, + /* 00000090 */ .vp8_ctrl0 = 0x000695c0, + /* 00000094 */ .rlc_ctrl = 0x14000000, + /* 00000098 */ .mb_ctrl = 0x00000000, + /* 000000d4 */ .rgb_yuv_coeff = { + /* 000000d4 */ 0x962b4c85, + /* 000000d8 */ 0x90901d50, + }, + /* 000000dc */ .rgb_mask_msb = 0x0000b694, + /* 000000e0 */ .intra_area_ctrl = 0xffffffff, + /* 000000e4 */ .cir_intra_ctrl = 0x00000000, + /* 000000f0 */ .first_roi_area = 0xffffffff, + /* 000000f4 */ .second_roi_area = 0xffffffff, + /* 000000f8 */ .mvc_ctrl = 0x01780000, + /* 00000100 */ .intra_penalty = { + /* 00000100 */ 0x00010005, + /* 00000104 */ 0x00015011, + /* 00000108 */ 0x0000c005, + /* 0000010c */ 0x00016010, + /* 00000110 */ 0x0001a018, + /* 00000114 */ 0x00018015, + /* 00000118 */ 0x0001d01a, + }, + /* 00000120 */ .seg_qp = { + /* 00000120 */ 0x020213b1, + /* 00000124 */ 0x02825249, + /* 00000128 */ 0x048409d8, + /* 0000012c */ 0x03834c30, + /* 00000130 */ 0x020213b1, + /* 00000134 */ 0x02825249, + /* 00000138 */ 0x00340e0d, + /* 0000013c */ 0x341c1a15, + /* 00000140 */ 0x020213b1, + /* 00000144 */ 0x02825249, + /* 00000148 */ 0x048409d8, + /* 0000014c */ 0x03834c30, + /* 00000150 */ 0x020213b1, + /* 00000154 */ 0x02825249, + /* 00000158 */ 0x00340e0d, + /* 0000015c */ 0x341c1a15, + /* 00000160 */ 0x020213b1, + /* 00000164 */ 0x02825249, + /* 00000168 */ 0x048409d8, + /* 0000016c */ 0x03834c30, + /* 00000170 */ 0x020213b1, + /* 00000174 */ 0x02825249, + /* 00000178 */ 0x00340e0d, + /* 0000017c */ 0x341c1a15, + }, + /* 00000180 */ .dmv_4p_1p_penalty = { + /* 00000180 */ 0x00020406, + /* 00000184 */ 0x080a0c0e, + /* 00000188 */ 0x10121416, + /* 0000018c */ 0x181a1c1e, + /* 00000190 */ 0x20222426, + /* 00000194 */ 0x282a2c2e, + /* 00000198 */ 0x30323436, + /* 0000019c */ 0x383a3c3e, + /* 000001a0 */ 0x40424446, + /* 000001a4 */ 0x484a4c4e, + /* 000001a8 */ 0x50525456, + /* 000001ac */ 0x585a5c5e, + /* 000001b0 */ 0x60626466, + /* 000001b4 */ 0x686a6c6e, + /* 000001b8 */ 0x70727476, + /* NOTE: Further 17 registers set to 0. */ + }, + /* + * NOTE: Following registers all set to 0: + * - dmv_qpel_penalty, + * - vp8_ctrl1, + * - bit_cost_golden, + * - loop_flt_delta. + */ +}; + +const struct rk3288_vp8e_reg_params *rk3288_vpu_vp8e_get_dummy_params(void) +{ + return &dummy_encode_reg_params; +}