xref: /linux/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c (revision d01ca8708d95a561f6462a15cad94a2c0bec7042)
1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4 
5 #include "spl_fixpt31_32.h"
6 
7 static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
8 static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
9 static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
10 
abs_i64(long long arg)11 static inline unsigned long long abs_i64(
12 	long long arg)
13 {
14 	if (arg > 0)
15 		return (unsigned long long)arg;
16 	else
17 		return (unsigned long long)(-arg);
18 }
19 
20 /*
21  * @brief
22  * result = dividend / divisor
23  * *remainder = dividend % divisor
24  */
spl_complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)25 static inline unsigned long long spl_complete_integer_division_u64(
26 	unsigned long long dividend,
27 	unsigned long long divisor,
28 	unsigned long long *remainder)
29 {
30 	unsigned long long result;
31 
32 	result = spl_div64_u64_rem(dividend, divisor, remainder);
33 
34 	return result;
35 }
36 
37 
38 #define FRACTIONAL_PART_MASK \
39 	((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
40 
41 #define GET_INTEGER_PART(x) \
42 	((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
43 
44 #define GET_FRACTIONAL_PART(x) \
45 	(FRACTIONAL_PART_MASK & (x))
46 
spl_fixpt_from_fraction(long long numerator,long long denominator)47 struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
48 {
49 	struct spl_fixed31_32 res;
50 
51 	bool arg1_negative = numerator < 0;
52 	bool arg2_negative = denominator < 0;
53 
54 	unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
55 	unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
56 
57 	unsigned long long remainder;
58 
59 	/* determine integer part */
60 
61 	unsigned long long res_value = spl_complete_integer_division_u64(
62 		arg1_value, arg2_value, &remainder);
63 
64 	SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
65 
66 	/* determine fractional part */
67 	{
68 		unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
69 
70 		do {
71 			remainder <<= 1;
72 
73 			res_value <<= 1;
74 
75 			if (remainder >= arg2_value) {
76 				res_value |= 1;
77 				remainder -= arg2_value;
78 			}
79 		} while (--i != 0);
80 	}
81 
82 	/* round up LSB */
83 	{
84 		unsigned long long summand = (remainder << 1) >= arg2_value;
85 
86 		SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
87 
88 		res_value += summand;
89 	}
90 
91 	res.value = (long long)res_value;
92 
93 	if (arg1_negative ^ arg2_negative)
94 		res.value = -res.value;
95 
96 	return res;
97 }
98 
spl_fixpt_mul(struct spl_fixed31_32 arg1,struct spl_fixed31_32 arg2)99 struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
100 {
101 	struct spl_fixed31_32 res;
102 
103 	bool arg1_negative = arg1.value < 0;
104 	bool arg2_negative = arg2.value < 0;
105 
106 	unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
107 	unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
108 
109 	unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
110 	unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
111 
112 	unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
113 	unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
114 
115 	unsigned long long tmp;
116 
117 	res.value = arg1_int * arg2_int;
118 
119 	SPL_ASSERT(res.value <= (long long)LONG_MAX);
120 
121 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
122 
123 	tmp = arg1_int * arg2_fra;
124 
125 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
126 
127 	res.value += tmp;
128 
129 	tmp = arg2_int * arg1_fra;
130 
131 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
132 
133 	res.value += tmp;
134 
135 	tmp = arg1_fra * arg2_fra;
136 
137 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
138 		(tmp >= (unsigned long long)spl_fixpt_half.value);
139 
140 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
141 
142 	res.value += tmp;
143 
144 	if (arg1_negative ^ arg2_negative)
145 		res.value = -res.value;
146 
147 	return res;
148 }
149 
spl_fixpt_sqr(struct spl_fixed31_32 arg)150 struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
151 {
152 	struct spl_fixed31_32 res;
153 
154 	unsigned long long arg_value = abs_i64(arg.value);
155 
156 	unsigned long long arg_int = GET_INTEGER_PART(arg_value);
157 
158 	unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
159 
160 	unsigned long long tmp;
161 
162 	res.value = arg_int * arg_int;
163 
164 	SPL_ASSERT(res.value <= (long long)LONG_MAX);
165 
166 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
167 
168 	tmp = arg_int * arg_fra;
169 
170 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
171 
172 	res.value += tmp;
173 
174 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
175 
176 	res.value += tmp;
177 
178 	tmp = arg_fra * arg_fra;
179 
180 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
181 		(tmp >= (unsigned long long)spl_fixpt_half.value);
182 
183 	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184 
185 	res.value += tmp;
186 
187 	return res;
188 }
189 
spl_fixpt_recip(struct spl_fixed31_32 arg)190 struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
191 {
192 	/*
193 	 * @note
194 	 * Good idea to use Newton's method
195 	 */
196 
197 	return spl_fixpt_from_fraction(
198 		spl_fixpt_one.value,
199 		arg.value);
200 }
201 
spl_fixpt_sinc(struct spl_fixed31_32 arg)202 struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
203 {
204 	struct spl_fixed31_32 square;
205 
206 	struct spl_fixed31_32 res = spl_fixpt_one;
207 
208 	int n = 27;
209 
210 	struct spl_fixed31_32 arg_norm = arg;
211 
212 	if (spl_fixpt_le(
213 		spl_fixpt_two_pi,
214 		spl_fixpt_abs(arg))) {
215 		arg_norm = spl_fixpt_sub(
216 			arg_norm,
217 			spl_fixpt_mul_int(
218 				spl_fixpt_two_pi,
219 				(int)spl_div64_s64(
220 					arg_norm.value,
221 					spl_fixpt_two_pi.value)));
222 	}
223 
224 	square = spl_fixpt_sqr(arg_norm);
225 
226 	do {
227 		res = spl_fixpt_sub(
228 			spl_fixpt_one,
229 			spl_fixpt_div_int(
230 				spl_fixpt_mul(
231 					square,
232 					res),
233 				n * (n - 1)));
234 
235 		n -= 2;
236 	} while (n > 2);
237 
238 	if (arg.value != arg_norm.value)
239 		res = spl_fixpt_div(
240 			spl_fixpt_mul(res, arg_norm),
241 			arg);
242 
243 	return res;
244 }
245 
spl_fixpt_sin(struct spl_fixed31_32 arg)246 struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
247 {
248 	return spl_fixpt_mul(
249 		arg,
250 		spl_fixpt_sinc(arg));
251 }
252 
spl_fixpt_cos(struct spl_fixed31_32 arg)253 struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
254 {
255 	/* TODO implement argument normalization */
256 
257 	const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
258 
259 	struct spl_fixed31_32 res = spl_fixpt_one;
260 
261 	int n = 26;
262 
263 	do {
264 		res = spl_fixpt_sub(
265 			spl_fixpt_one,
266 			spl_fixpt_div_int(
267 				spl_fixpt_mul(
268 					square,
269 					res),
270 				n * (n - 1)));
271 
272 		n -= 2;
273 	} while (n != 0);
274 
275 	return res;
276 }
277 
278 /*
279  * @brief
280  * result = exp(arg),
281  * where abs(arg) < 1
282  *
283  * Calculated as Taylor series.
284  */
spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)285 static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
286 {
287 	unsigned int n = 9;
288 
289 	struct spl_fixed31_32 res = spl_fixpt_from_fraction(
290 		n + 2,
291 		n + 1);
292 	/* TODO find correct res */
293 
294 	SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
295 
296 	do
297 		res = spl_fixpt_add(
298 			spl_fixpt_one,
299 			spl_fixpt_div_int(
300 				spl_fixpt_mul(
301 					arg,
302 					res),
303 				n));
304 	while (--n != 1);
305 
306 	return spl_fixpt_add(
307 		spl_fixpt_one,
308 		spl_fixpt_mul(
309 			arg,
310 			res));
311 }
312 
spl_fixpt_exp(struct spl_fixed31_32 arg)313 struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
314 {
315 	/*
316 	 * @brief
317 	 * Main equation is:
318 	 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
319 	 * where m = round(x / ln(2)), r = x - m * ln(2)
320 	 */
321 
322 	if (spl_fixpt_le(
323 		spl_fixpt_ln2_div_2,
324 		spl_fixpt_abs(arg))) {
325 		int m = spl_fixpt_round(
326 			spl_fixpt_div(
327 				arg,
328 				spl_fixpt_ln2));
329 
330 		struct spl_fixed31_32 r = spl_fixpt_sub(
331 			arg,
332 			spl_fixpt_mul_int(
333 				spl_fixpt_ln2,
334 				m));
335 
336 		SPL_ASSERT(m != 0);
337 
338 		SPL_ASSERT(spl_fixpt_lt(
339 			spl_fixpt_abs(r),
340 			spl_fixpt_one));
341 
342 		if (m > 0)
343 			return spl_fixpt_shl(
344 				spl_fixed31_32_exp_from_taylor_series(r),
345 				(unsigned int)m);
346 		else
347 			return spl_fixpt_div_int(
348 				spl_fixed31_32_exp_from_taylor_series(r),
349 				1LL << -m);
350 	} else if (arg.value != 0)
351 		return spl_fixed31_32_exp_from_taylor_series(arg);
352 	else
353 		return spl_fixpt_one;
354 }
355 
spl_fixpt_log(struct spl_fixed31_32 arg)356 struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
357 {
358 	struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one);
359 	/* TODO improve 1st estimation */
360 
361 	struct spl_fixed31_32 error;
362 
363 	SPL_ASSERT(arg.value > 0);
364 	/* TODO if arg is negative, return NaN */
365 	/* TODO if arg is zero, return -INF */
366 
367 	do {
368 		struct spl_fixed31_32 res1 = spl_fixpt_add(
369 			spl_fixpt_sub(
370 				res,
371 				spl_fixpt_one),
372 			spl_fixpt_div(
373 				arg,
374 				spl_fixpt_exp(res)));
375 
376 		error = spl_fixpt_sub(
377 			res,
378 			res1);
379 
380 		res = res1;
381 		/* TODO determine max_allowed_error based on quality of exp() */
382 	} while (abs_i64(error.value) > 100ULL);
383 
384 	return res;
385 }
386 
387 
388 /* this function is a generic helper to translate fixed point value to
389  * specified integer format that will consist of integer_bits integer part and
390  * fractional_bits fractional part. For example it is used in
391  * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
392  * part in 32 bits. It is used in hw programming (scaler)
393  */
394 
spl_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)395 static inline unsigned int spl_ux_dy(
396 	long long value,
397 	unsigned int integer_bits,
398 	unsigned int fractional_bits)
399 {
400 	/* 1. create mask of integer part */
401 	unsigned int result = (1 << integer_bits) - 1;
402 	/* 2. mask out fractional part */
403 	unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
404 	/* 3. shrink fixed point integer part to be of integer_bits width*/
405 	result &= GET_INTEGER_PART(value);
406 	/* 4. make space for fractional part to be filled in after integer */
407 	result <<= fractional_bits;
408 	/* 5. shrink fixed point fractional part to of fractional_bits width*/
409 	fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
410 	/* 6. merge the result */
411 	return result | fractional_part;
412 }
413 
spl_clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)414 static inline unsigned int spl_clamp_ux_dy(
415 	long long value,
416 	unsigned int integer_bits,
417 	unsigned int fractional_bits,
418 	unsigned int min_clamp)
419 {
420 	unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
421 
422 	if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
423 		return (1 << (integer_bits + fractional_bits)) - 1;
424 	else if (truncated_val > min_clamp)
425 		return truncated_val;
426 	else
427 		return min_clamp;
428 }
429 
spl_fixpt_u4d19(struct spl_fixed31_32 arg)430 unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
431 {
432 	return spl_ux_dy(arg.value, 4, 19);
433 }
434 
spl_fixpt_u3d19(struct spl_fixed31_32 arg)435 unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
436 {
437 	return spl_ux_dy(arg.value, 3, 19);
438 }
439 
spl_fixpt_u2d19(struct spl_fixed31_32 arg)440 unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
441 {
442 	return spl_ux_dy(arg.value, 2, 19);
443 }
444 
spl_fixpt_u0d19(struct spl_fixed31_32 arg)445 unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
446 {
447 	return spl_ux_dy(arg.value, 0, 19);
448 }
449 
spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)450 unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
451 {
452 	return spl_clamp_ux_dy(arg.value, 0, 14, 1);
453 }
454 
spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)455 unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
456 {
457 	return spl_clamp_ux_dy(arg.value, 0, 10, 1);
458 }
459 
spl_fixpt_s4d19(struct spl_fixed31_32 arg)460 int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
461 {
462 	if (arg.value < 0)
463 		return -(int)spl_ux_dy(spl_fixpt_abs(arg).value, 4, 19);
464 	else
465 		return spl_ux_dy(arg.value, 4, 19);
466 }
467 
spl_fixpt_from_ux_dy(unsigned int value,unsigned int integer_bits,unsigned int fractional_bits)468 struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
469 	unsigned int integer_bits,
470 	unsigned int fractional_bits)
471 {
472 	struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
473 	struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
474 	long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
475 
476 	fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
477 	frac_mask = frac_mask << fractional_bits;
478 	fixpt_int_value.value = value & frac_mask;
479 	fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
480 	fixpt_value.value |= fixpt_int_value.value;
481 	return fixpt_value;
482 }
483 
spl_fixpt_from_int_dy(unsigned int int_value,unsigned int frac_value,unsigned int integer_bits,unsigned int fractional_bits)484 struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
485 	unsigned int frac_value,
486 	unsigned int integer_bits,
487 	unsigned int fractional_bits)
488 {
489 	struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value);
490 
491 	fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
492 	return fixpt_value;
493 }
494