1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5 #include "spl_fixpt31_32.h"
6
7 static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
8 static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
9 static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
10
abs_i64(long long arg)11 static inline unsigned long long abs_i64(
12 long long arg)
13 {
14 if (arg > 0)
15 return (unsigned long long)arg;
16 else
17 return (unsigned long long)(-arg);
18 }
19
20 /*
21 * @brief
22 * result = dividend / divisor
23 * *remainder = dividend % divisor
24 */
spl_complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)25 static inline unsigned long long spl_complete_integer_division_u64(
26 unsigned long long dividend,
27 unsigned long long divisor,
28 unsigned long long *remainder)
29 {
30 unsigned long long result;
31
32 result = spl_div64_u64_rem(dividend, divisor, remainder);
33
34 return result;
35 }
36
37
38 #define FRACTIONAL_PART_MASK \
39 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
40
41 #define GET_INTEGER_PART(x) \
42 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
43
44 #define GET_FRACTIONAL_PART(x) \
45 (FRACTIONAL_PART_MASK & (x))
46
spl_fixpt_from_fraction(long long numerator,long long denominator)47 struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
48 {
49 struct spl_fixed31_32 res;
50
51 bool arg1_negative = numerator < 0;
52 bool arg2_negative = denominator < 0;
53
54 unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
55 unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
56
57 unsigned long long remainder;
58
59 /* determine integer part */
60
61 unsigned long long res_value = spl_complete_integer_division_u64(
62 arg1_value, arg2_value, &remainder);
63
64 SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
65
66 /* determine fractional part */
67 {
68 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
69
70 do {
71 remainder <<= 1;
72
73 res_value <<= 1;
74
75 if (remainder >= arg2_value) {
76 res_value |= 1;
77 remainder -= arg2_value;
78 }
79 } while (--i != 0);
80 }
81
82 /* round up LSB */
83 {
84 unsigned long long summand = (remainder << 1) >= arg2_value;
85
86 SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
87
88 res_value += summand;
89 }
90
91 res.value = (long long)res_value;
92
93 if (arg1_negative ^ arg2_negative)
94 res.value = -res.value;
95
96 return res;
97 }
98
spl_fixpt_mul(struct spl_fixed31_32 arg1,struct spl_fixed31_32 arg2)99 struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
100 {
101 struct spl_fixed31_32 res;
102
103 bool arg1_negative = arg1.value < 0;
104 bool arg2_negative = arg2.value < 0;
105
106 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
107 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
108
109 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
110 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
111
112 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
113 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
114
115 unsigned long long tmp;
116
117 res.value = arg1_int * arg2_int;
118
119 SPL_ASSERT(res.value <= (long long)LONG_MAX);
120
121 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
122
123 tmp = arg1_int * arg2_fra;
124
125 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
126
127 res.value += tmp;
128
129 tmp = arg2_int * arg1_fra;
130
131 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
132
133 res.value += tmp;
134
135 tmp = arg1_fra * arg2_fra;
136
137 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
138 (tmp >= (unsigned long long)spl_fixpt_half.value);
139
140 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
141
142 res.value += tmp;
143
144 if (arg1_negative ^ arg2_negative)
145 res.value = -res.value;
146
147 return res;
148 }
149
spl_fixpt_sqr(struct spl_fixed31_32 arg)150 struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
151 {
152 struct spl_fixed31_32 res;
153
154 unsigned long long arg_value = abs_i64(arg.value);
155
156 unsigned long long arg_int = GET_INTEGER_PART(arg_value);
157
158 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
159
160 unsigned long long tmp;
161
162 res.value = arg_int * arg_int;
163
164 SPL_ASSERT(res.value <= (long long)LONG_MAX);
165
166 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
167
168 tmp = arg_int * arg_fra;
169
170 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
171
172 res.value += tmp;
173
174 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
175
176 res.value += tmp;
177
178 tmp = arg_fra * arg_fra;
179
180 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
181 (tmp >= (unsigned long long)spl_fixpt_half.value);
182
183 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184
185 res.value += tmp;
186
187 return res;
188 }
189
spl_fixpt_recip(struct spl_fixed31_32 arg)190 struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
191 {
192 /*
193 * @note
194 * Good idea to use Newton's method
195 */
196
197 return spl_fixpt_from_fraction(
198 spl_fixpt_one.value,
199 arg.value);
200 }
201
spl_fixpt_sinc(struct spl_fixed31_32 arg)202 struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
203 {
204 struct spl_fixed31_32 square;
205
206 struct spl_fixed31_32 res = spl_fixpt_one;
207
208 int n = 27;
209
210 struct spl_fixed31_32 arg_norm = arg;
211
212 if (spl_fixpt_le(
213 spl_fixpt_two_pi,
214 spl_fixpt_abs(arg))) {
215 arg_norm = spl_fixpt_sub(
216 arg_norm,
217 spl_fixpt_mul_int(
218 spl_fixpt_two_pi,
219 (int)spl_div64_s64(
220 arg_norm.value,
221 spl_fixpt_two_pi.value)));
222 }
223
224 square = spl_fixpt_sqr(arg_norm);
225
226 do {
227 res = spl_fixpt_sub(
228 spl_fixpt_one,
229 spl_fixpt_div_int(
230 spl_fixpt_mul(
231 square,
232 res),
233 n * (n - 1)));
234
235 n -= 2;
236 } while (n > 2);
237
238 if (arg.value != arg_norm.value)
239 res = spl_fixpt_div(
240 spl_fixpt_mul(res, arg_norm),
241 arg);
242
243 return res;
244 }
245
spl_fixpt_sin(struct spl_fixed31_32 arg)246 struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
247 {
248 return spl_fixpt_mul(
249 arg,
250 spl_fixpt_sinc(arg));
251 }
252
spl_fixpt_cos(struct spl_fixed31_32 arg)253 struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
254 {
255 /* TODO implement argument normalization */
256
257 const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
258
259 struct spl_fixed31_32 res = spl_fixpt_one;
260
261 int n = 26;
262
263 do {
264 res = spl_fixpt_sub(
265 spl_fixpt_one,
266 spl_fixpt_div_int(
267 spl_fixpt_mul(
268 square,
269 res),
270 n * (n - 1)));
271
272 n -= 2;
273 } while (n != 0);
274
275 return res;
276 }
277
278 /*
279 * @brief
280 * result = exp(arg),
281 * where abs(arg) < 1
282 *
283 * Calculated as Taylor series.
284 */
spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)285 static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
286 {
287 unsigned int n = 9;
288
289 struct spl_fixed31_32 res = spl_fixpt_from_fraction(
290 n + 2,
291 n + 1);
292 /* TODO find correct res */
293
294 SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
295
296 do
297 res = spl_fixpt_add(
298 spl_fixpt_one,
299 spl_fixpt_div_int(
300 spl_fixpt_mul(
301 arg,
302 res),
303 n));
304 while (--n != 1);
305
306 return spl_fixpt_add(
307 spl_fixpt_one,
308 spl_fixpt_mul(
309 arg,
310 res));
311 }
312
spl_fixpt_exp(struct spl_fixed31_32 arg)313 struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
314 {
315 /*
316 * @brief
317 * Main equation is:
318 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
319 * where m = round(x / ln(2)), r = x - m * ln(2)
320 */
321
322 if (spl_fixpt_le(
323 spl_fixpt_ln2_div_2,
324 spl_fixpt_abs(arg))) {
325 int m = spl_fixpt_round(
326 spl_fixpt_div(
327 arg,
328 spl_fixpt_ln2));
329
330 struct spl_fixed31_32 r = spl_fixpt_sub(
331 arg,
332 spl_fixpt_mul_int(
333 spl_fixpt_ln2,
334 m));
335
336 SPL_ASSERT(m != 0);
337
338 SPL_ASSERT(spl_fixpt_lt(
339 spl_fixpt_abs(r),
340 spl_fixpt_one));
341
342 if (m > 0)
343 return spl_fixpt_shl(
344 spl_fixed31_32_exp_from_taylor_series(r),
345 (unsigned int)m);
346 else
347 return spl_fixpt_div_int(
348 spl_fixed31_32_exp_from_taylor_series(r),
349 1LL << -m);
350 } else if (arg.value != 0)
351 return spl_fixed31_32_exp_from_taylor_series(arg);
352 else
353 return spl_fixpt_one;
354 }
355
spl_fixpt_log(struct spl_fixed31_32 arg)356 struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
357 {
358 struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one);
359 /* TODO improve 1st estimation */
360
361 struct spl_fixed31_32 error;
362
363 SPL_ASSERT(arg.value > 0);
364 /* TODO if arg is negative, return NaN */
365 /* TODO if arg is zero, return -INF */
366
367 do {
368 struct spl_fixed31_32 res1 = spl_fixpt_add(
369 spl_fixpt_sub(
370 res,
371 spl_fixpt_one),
372 spl_fixpt_div(
373 arg,
374 spl_fixpt_exp(res)));
375
376 error = spl_fixpt_sub(
377 res,
378 res1);
379
380 res = res1;
381 /* TODO determine max_allowed_error based on quality of exp() */
382 } while (abs_i64(error.value) > 100ULL);
383
384 return res;
385 }
386
387
388 /* this function is a generic helper to translate fixed point value to
389 * specified integer format that will consist of integer_bits integer part and
390 * fractional_bits fractional part. For example it is used in
391 * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
392 * part in 32 bits. It is used in hw programming (scaler)
393 */
394
spl_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)395 static inline unsigned int spl_ux_dy(
396 long long value,
397 unsigned int integer_bits,
398 unsigned int fractional_bits)
399 {
400 /* 1. create mask of integer part */
401 unsigned int result = (1 << integer_bits) - 1;
402 /* 2. mask out fractional part */
403 unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
404 /* 3. shrink fixed point integer part to be of integer_bits width*/
405 result &= GET_INTEGER_PART(value);
406 /* 4. make space for fractional part to be filled in after integer */
407 result <<= fractional_bits;
408 /* 5. shrink fixed point fractional part to of fractional_bits width*/
409 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
410 /* 6. merge the result */
411 return result | fractional_part;
412 }
413
spl_clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)414 static inline unsigned int spl_clamp_ux_dy(
415 long long value,
416 unsigned int integer_bits,
417 unsigned int fractional_bits,
418 unsigned int min_clamp)
419 {
420 unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
421
422 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
423 return (1 << (integer_bits + fractional_bits)) - 1;
424 else if (truncated_val > min_clamp)
425 return truncated_val;
426 else
427 return min_clamp;
428 }
429
spl_fixpt_u4d19(struct spl_fixed31_32 arg)430 unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
431 {
432 return spl_ux_dy(arg.value, 4, 19);
433 }
434
spl_fixpt_u3d19(struct spl_fixed31_32 arg)435 unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
436 {
437 return spl_ux_dy(arg.value, 3, 19);
438 }
439
spl_fixpt_u2d19(struct spl_fixed31_32 arg)440 unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
441 {
442 return spl_ux_dy(arg.value, 2, 19);
443 }
444
spl_fixpt_u0d19(struct spl_fixed31_32 arg)445 unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
446 {
447 return spl_ux_dy(arg.value, 0, 19);
448 }
449
spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)450 unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
451 {
452 return spl_clamp_ux_dy(arg.value, 0, 14, 1);
453 }
454
spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)455 unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
456 {
457 return spl_clamp_ux_dy(arg.value, 0, 10, 1);
458 }
459
spl_fixpt_s4d19(struct spl_fixed31_32 arg)460 int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
461 {
462 if (arg.value < 0)
463 return -(int)spl_ux_dy(spl_fixpt_abs(arg).value, 4, 19);
464 else
465 return spl_ux_dy(arg.value, 4, 19);
466 }
467
spl_fixpt_from_ux_dy(unsigned int value,unsigned int integer_bits,unsigned int fractional_bits)468 struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
469 unsigned int integer_bits,
470 unsigned int fractional_bits)
471 {
472 struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
473 struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
474 long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
475
476 fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
477 frac_mask = frac_mask << fractional_bits;
478 fixpt_int_value.value = value & frac_mask;
479 fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
480 fixpt_value.value |= fixpt_int_value.value;
481 return fixpt_value;
482 }
483
spl_fixpt_from_int_dy(unsigned int int_value,unsigned int frac_value,unsigned int integer_bits,unsigned int fractional_bits)484 struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
485 unsigned int frac_value,
486 unsigned int integer_bits,
487 unsigned int fractional_bits)
488 {
489 struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value);
490
491 fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
492 return fixpt_value;
493 }
494