xref: /linux/drivers/media/platform/verisilicon/hantro_postproc.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro G1 post-processor support
4  *
5  * Copyright (C) 2019 Collabora, Ltd.
6  */
7 
8 #include <linux/dma-mapping.h>
9 #include <linux/types.h>
10 
11 #include "hantro.h"
12 #include "hantro_hw.h"
13 #include "hantro_g1_regs.h"
14 #include "hantro_g2_regs.h"
15 #include "hantro_v4l2.h"
16 
17 #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
18 { \
19 	hantro_reg_write(vpu, \
20 			 &hantro_g1_postproc_regs.reg_name, \
21 			 val); \
22 }
23 
24 #define HANTRO_PP_REG_WRITE_RELAXED(vpu, reg_name, val) \
25 { \
26 	hantro_reg_write_relaxed(vpu, \
27 				 &hantro_g1_postproc_regs.reg_name, \
28 				 val); \
29 }
30 
31 #define VPU_PP_IN_YUYV			0x0
32 #define VPU_PP_IN_NV12			0x1
33 #define VPU_PP_IN_YUV420		0x2
34 #define VPU_PP_IN_YUV240_TILED		0x5
35 #define VPU_PP_OUT_RGB			0x0
36 #define VPU_PP_OUT_YUYV			0x3
37 
38 static const struct hantro_postproc_regs hantro_g1_postproc_regs = {
39 	.pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1},
40 	.max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f},
41 	.clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1},
42 	.out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1},
43 	.out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1},
44 	.out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff},
45 	.input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff},
46 	.input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff},
47 	.output_width = {G1_REG_PP_CONTROL, 4, 0x7ff},
48 	.output_height = {G1_REG_PP_CONTROL, 15, 0x7ff},
49 	.input_fmt = {G1_REG_PP_CONTROL, 29, 0x7},
50 	.output_fmt = {G1_REG_PP_CONTROL, 26, 0x7},
51 	.orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff},
52 	.display_width = {G1_REG_PP_DISPLAY_WIDTH_IN_EXT, 0, 0xfff},
53 	.input_width_ext = {G1_REG_PP_DISPLAY_WIDTH_IN_EXT, 26, 0x7},
54 	.input_height_ext = {G1_REG_PP_DISPLAY_WIDTH_IN_EXT, 29, 0x7},
55 };
56 
hantro_needs_postproc(const struct hantro_ctx * ctx,const struct hantro_fmt * fmt)57 bool hantro_needs_postproc(const struct hantro_ctx *ctx,
58 			   const struct hantro_fmt *fmt)
59 {
60 	if (ctx->is_encoder)
61 		return false;
62 
63 	if (ctx->need_postproc)
64 		return true;
65 
66 	return fmt->postprocessed;
67 }
68 
hantro_postproc_g1_enable(struct hantro_ctx * ctx)69 static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
70 {
71 	struct hantro_dev *vpu = ctx->dev;
72 	struct vb2_v4l2_buffer *dst_buf;
73 	u32 src_pp_fmt, dst_pp_fmt;
74 	dma_addr_t dst_dma;
75 
76 	/* Turn on pipeline mode. Must be done first. */
77 	HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x1);
78 
79 	src_pp_fmt = VPU_PP_IN_NV12;
80 
81 	switch (ctx->vpu_dst_fmt->fourcc) {
82 	case V4L2_PIX_FMT_YUYV:
83 		dst_pp_fmt = VPU_PP_OUT_YUYV;
84 		break;
85 	default:
86 		WARN(1, "output format %d not supported by the post-processor, this wasn't expected.",
87 		     ctx->vpu_dst_fmt->fourcc);
88 		dst_pp_fmt = 0;
89 		break;
90 	}
91 
92 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
93 	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
94 
95 	HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1);
96 	HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1);
97 	HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1);
98 	HANTRO_PP_REG_WRITE(vpu, max_burst, 16);
99 	HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma);
100 	HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width));
101 	HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height));
102 	HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt);
103 	HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt);
104 	HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width);
105 	HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height);
106 	HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width));
107 	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
108 	HANTRO_PP_REG_WRITE(vpu, input_width_ext, MB_WIDTH(ctx->dst_fmt.width) >> 9);
109 	HANTRO_PP_REG_WRITE(vpu, input_height_ext, MB_HEIGHT(ctx->dst_fmt.height >> 8));
110 }
111 
down_scale_factor(struct hantro_ctx * ctx)112 static int down_scale_factor(struct hantro_ctx *ctx)
113 {
114 	if (ctx->src_fmt.width <= ctx->dst_fmt.width)
115 		return 0;
116 
117 	return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width);
118 }
119 
hantro_postproc_g2_enable(struct hantro_ctx * ctx)120 static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
121 {
122 	struct hantro_dev *vpu = ctx->dev;
123 	struct vb2_v4l2_buffer *dst_buf;
124 	int down_scale = down_scale_factor(ctx);
125 	int out_depth;
126 	size_t chroma_offset;
127 	dma_addr_t dst_dma;
128 
129 	dst_buf = hantro_get_dst_buf(ctx);
130 	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
131 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
132 			ctx->dst_fmt.height;
133 
134 	if (down_scale) {
135 		hantro_reg_write(vpu, &g2_down_scale_e, 1);
136 		hantro_reg_write(vpu, &g2_down_scale_y, down_scale >> 2);
137 		hantro_reg_write(vpu, &g2_down_scale_x, down_scale >> 2);
138 		hantro_write_addr(vpu, G2_DS_DST, dst_dma);
139 		hantro_write_addr(vpu, G2_DS_DST_CHR, dst_dma + (chroma_offset >> down_scale));
140 	} else {
141 		hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
142 		hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset);
143 	}
144 
145 	out_depth = hantro_get_format_depth(ctx->dst_fmt.pixelformat);
146 	if (ctx->dev->variant->legacy_regs) {
147 		u8 pp_shift = 0;
148 
149 		if (out_depth > 8)
150 			pp_shift = 16 - out_depth;
151 
152 		hantro_reg_write(ctx->dev, &g2_rs_out_bit_depth, out_depth);
153 		hantro_reg_write(ctx->dev, &g2_pp_pix_shift, pp_shift);
154 	} else {
155 		hantro_reg_write(vpu, &g2_output_8_bits, out_depth > 8 ? 0 : 1);
156 		hantro_reg_write(vpu, &g2_output_format, out_depth > 8 ? 1 : 0);
157 	}
158 	hantro_reg_write(vpu, &g2_out_rs_e, 1);
159 }
160 
hantro_postproc_g2_enum_framesizes(struct hantro_ctx * ctx,struct v4l2_frmsizeenum * fsize)161 static int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx,
162 					      struct v4l2_frmsizeenum *fsize)
163 {
164 	/**
165 	 * G2 scaler can scale down by 0, 2, 4 or 8
166 	 * use fsize->index has power of 2 diviser
167 	 **/
168 	if (fsize->index > 3)
169 		return -EINVAL;
170 
171 	if (!ctx->src_fmt.width || !ctx->src_fmt.height)
172 		return -EINVAL;
173 
174 	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
175 	fsize->discrete.width = ctx->src_fmt.width >> fsize->index;
176 	fsize->discrete.height = ctx->src_fmt.height >> fsize->index;
177 
178 	return 0;
179 }
180 
hantro_postproc_free(struct hantro_ctx * ctx)181 void hantro_postproc_free(struct hantro_ctx *ctx)
182 {
183 	struct hantro_dev *vpu = ctx->dev;
184 	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
185 	struct vb2_queue *queue = &m2m_ctx->cap_q_ctx.q;
186 	unsigned int i;
187 
188 	for (i = 0; i < queue->max_num_buffers; ++i) {
189 		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
190 
191 		if (priv->cpu) {
192 			dma_free_attrs(vpu->dev, priv->size, priv->cpu,
193 				       priv->dma, priv->attrs);
194 			priv->cpu = NULL;
195 		}
196 	}
197 }
198 
hantro_postproc_buffer_size(struct hantro_ctx * ctx)199 static unsigned int hantro_postproc_buffer_size(struct hantro_ctx *ctx)
200 {
201 	unsigned int buf_size;
202 
203 	buf_size = ctx->ref_fmt.plane_fmt[0].sizeimage;
204 	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
205 		buf_size += hantro_h264_mv_size(ctx->ref_fmt.width,
206 						ctx->ref_fmt.height);
207 	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
208 		buf_size += hantro_vp9_mv_size(ctx->ref_fmt.width,
209 					       ctx->ref_fmt.height);
210 	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) {
211 		buf_size += hantro_hevc_mv_size(ctx->ref_fmt.width,
212 						ctx->ref_fmt.height);
213 		if (ctx->hevc_dec.use_compression)
214 			buf_size += hantro_hevc_compressed_size(ctx->ref_fmt.width,
215 								ctx->ref_fmt.height);
216 	}
217 	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME)
218 		buf_size += hantro_av1_mv_size(ctx->ref_fmt.width,
219 					       ctx->ref_fmt.height);
220 
221 	return buf_size;
222 }
223 
hantro_postproc_alloc(struct hantro_ctx * ctx,int index)224 static int hantro_postproc_alloc(struct hantro_ctx *ctx, int index)
225 {
226 	struct hantro_dev *vpu = ctx->dev;
227 	struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index];
228 	unsigned int buf_size = hantro_postproc_buffer_size(ctx);
229 
230 	if (!buf_size)
231 		return -EINVAL;
232 
233 	/*
234 	 * The buffers on this queue are meant as intermediate
235 	 * buffers for the decoder, so no mapping is needed.
236 	 */
237 	priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
238 	priv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &priv->dma,
239 				    GFP_KERNEL, priv->attrs);
240 	if (!priv->cpu)
241 		return -ENOMEM;
242 	priv->size = buf_size;
243 
244 	return 0;
245 }
246 
hantro_postproc_init(struct hantro_ctx * ctx)247 int hantro_postproc_init(struct hantro_ctx *ctx)
248 {
249 	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
250 	struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
251 	unsigned int num_buffers = vb2_get_num_buffers(cap_queue);
252 	unsigned int i;
253 	int ret;
254 
255 	for (i = 0; i < num_buffers; i++) {
256 		ret = hantro_postproc_alloc(ctx, i);
257 		if (ret) {
258 			hantro_postproc_free(ctx);
259 			return ret;
260 		}
261 	}
262 
263 	return 0;
264 }
265 
266 dma_addr_t
hantro_postproc_get_dec_buf_addr(struct hantro_ctx * ctx,int index)267 hantro_postproc_get_dec_buf_addr(struct hantro_ctx *ctx, int index)
268 {
269 	struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index];
270 	unsigned int buf_size = hantro_postproc_buffer_size(ctx);
271 	struct hantro_dev *vpu = ctx->dev;
272 	int ret;
273 
274 	if (priv->size < buf_size && priv->cpu) {
275 		/* buffer is too small, release it */
276 		dma_free_attrs(vpu->dev, priv->size, priv->cpu,
277 			       priv->dma, priv->attrs);
278 		priv->cpu = NULL;
279 	}
280 
281 	if (!priv->cpu) {
282 		/* buffer not already allocated, try getting a new one */
283 		ret = hantro_postproc_alloc(ctx, index);
284 		if (ret)
285 			return 0;
286 	}
287 
288 	if (!priv->cpu)
289 		return 0;
290 
291 	return priv->dma;
292 }
293 
hantro_postproc_g1_disable(struct hantro_ctx * ctx)294 static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
295 {
296 	struct hantro_dev *vpu = ctx->dev;
297 
298 	HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x0);
299 }
300 
hantro_postproc_g2_disable(struct hantro_ctx * ctx)301 static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
302 {
303 	struct hantro_dev *vpu = ctx->dev;
304 
305 	hantro_reg_write(vpu, &g2_out_rs_e, 0);
306 }
307 
hantro_postproc_disable(struct hantro_ctx * ctx)308 void hantro_postproc_disable(struct hantro_ctx *ctx)
309 {
310 	struct hantro_dev *vpu = ctx->dev;
311 
312 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable)
313 		vpu->variant->postproc_ops->disable(ctx);
314 }
315 
hantro_postproc_enable(struct hantro_ctx * ctx)316 void hantro_postproc_enable(struct hantro_ctx *ctx)
317 {
318 	struct hantro_dev *vpu = ctx->dev;
319 
320 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable)
321 		vpu->variant->postproc_ops->enable(ctx);
322 }
323 
hanto_postproc_enum_framesizes(struct hantro_ctx * ctx,struct v4l2_frmsizeenum * fsize)324 int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
325 				   struct v4l2_frmsizeenum *fsize)
326 {
327 	struct hantro_dev *vpu = ctx->dev;
328 
329 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes)
330 		return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize);
331 
332 	return -EINVAL;
333 }
334 
335 const struct hantro_postproc_ops hantro_g1_postproc_ops = {
336 	.enable = hantro_postproc_g1_enable,
337 	.disable = hantro_postproc_g1_disable,
338 };
339 
340 const struct hantro_postproc_ops hantro_g2_postproc_ops = {
341 	.enable = hantro_postproc_g2_enable,
342 	.disable = hantro_postproc_g2_disable,
343 	.enum_framesizes = hantro_postproc_g2_enum_framesizes,
344 };
345