Home
last modified time | relevance | path

Searched refs:float32x4_t (Results 1 – 25 of 51) sorted by relevance

123

/src/contrib/arm-optimized-routines/math/include/
H A Dmathlib.h114 __vpcs float32x4_t _ZGVnN4v_acosf (float32x4_t);
115 __vpcs float32x4_t _ZGVnN4v_acoshf (float32x4_t);
116 __vpcs float32x4_t _ZGVnN4v_asinf (float32x4_t);
117 __vpcs float32x4_t _ZGVnN4v_asinhf (float32x4_t);
118 __vpcs float32x4_t _ZGVnN4v_atanf (float32x4_t);
119 __vpcs float32x4_t _ZGVnN4v_atanhf (float32x4_t);
120 __vpcs float32x4_t _ZGVnN4v_cbrtf (float32x4_t);
121 __vpcs float32x4_t _ZGVnN4v_cosf (float32x4_t);
122 __vpcs float32x4_t _ZGVnN4v_coshf (float32x4_t);
123 __vpcs float32x4_t _ZGVnN4v_cospif (float32x4_t);
[all …]
/src/contrib/arm-optimized-routines/math/aarch64/advsimd/
H A Dexp10f.c18 float32x4_t c0, c1, c3;
20 float32x4_t inv_log10_2, special_bound;
23 float32x4_t scale_thresh;
56 static float32x4_t VPCS_ATTR NOINLINE
57 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) in special_case()
68 static float32x4_t VPCS_ATTR NOINLINE
69 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, in special_case()
70 float32x4_t scale, const struct data *d) in special_case()
74 float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); in special_case()
75 float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); in special_case()
[all …]
H A Dexp2f.c14 float32x4_t c1, c3;
17 float32x4_t scale_thresh, special_bound;
42 static float32x4_t VPCS_ATTR NOINLINE
43 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) in special_case()
52 static float32x4_t VPCS_ATTR NOINLINE
53 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, in special_case()
54 float32x4_t scale, const struct data *d) in special_case()
58 float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); in special_case()
59 float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); in special_case()
61 float32x4_t r2 = vmulq_f32 (s1, s1); in special_case()
[all …]
H A Derfcf.c15 float32x4_t max, shift;
17 float32x4_t third, two_over_five, tenth;
19 float32x4_t uflow_bound;
46 float32x4_t erfc;
47 float32x4_t scale;
62 float32x4_t e1 = vcombine_f32 (t0, t1); in lookup()
63 float32x4_t e2 = vcombine_f32 (t2, t3); in lookup()
70 static float32x4_t VPCS_ATTR NOINLINE
71 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) in special_case()
94 NOINLINE VPCS_ATTR float32x4_t V_NAME_F1 (erfc) (float32x4_t x) in erfc()
[all …]
H A Dexpf.c13 float32x4_t c1, c3, c4, inv_ln2;
17 float32x4_t special_bound, scale_thresh;
46 static float32x4_t VPCS_ATTR NOINLINE
47 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) in special_case()
56 static float32x4_t VPCS_ATTR NOINLINE
57 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, in special_case()
58 float32x4_t scale, const struct data *d) in special_case()
62 float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); in special_case()
63 float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); in special_case()
65 float32x4_t r2 = vmulq_f32 (s1, s1); in special_case()
[all …]
H A Dtanf.c15 float32x4_t poly[6];
17 float32x4_t shift;
19 float32x4_t range_val;
39 static float32x4_t VPCS_ATTR NOINLINE
40 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) in special_case()
46 static inline float32x4_t
47 eval_poly (float32x4_t z, const struct data *d) in eval_poly()
49 float32x4_t z2 = vmulq_f32 (z, z); in eval_poly()
59 float32x4_t z4 = vmulq_f32 (z2, z2); in eval_poly()
67 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x) in tan()
[all …]
H A Datan2f.c14 float32x4_t c0, pi_over_2, c4, c6, c2;
31 static float32x4_t VPCS_ATTR NOINLINE
32 special_case (float32x4_t y, float32x4_t x, float32x4_t ret, in special_case()
53 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) in atan2()
67 float32x4_t ax = vabsq_f32 (x); in atan2()
68 float32x4_t ay = vabsq_f32 (y); in atan2()
74 float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); in atan2()
75 float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax); in atan2()
76 float32x4_t z = vdivq_f32 (n, q); in atan2()
79 float32x4_t shift = vreinterpretq_f32_u32 ( in atan2()
[all …]
H A Dlog2f.c14 float32x4_t c0, c2, c4, c6, c8;
41 static float32x4_t VPCS_ATTR NOINLINE
42 special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r, in special_case()
55 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) in log2()
66 float32x4_t n = vcvtq_f32_s32 ( in log2()
73 float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); in log2()
76 float32x4_t r2 = vmulq_f32 (r, r); in log2()
78 float32x4_t c1357 = vld1q_f32 (&d->c1); in log2()
79 float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); in log2()
80 float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); in log2()
[all …]
H A Dv_log1pf_inline.h20 float32x4_t c4, c6, c1, c2, ln2;
35 static inline float32x4_t
36 eval_poly (float32x4_t m, const struct v_log1pf_data *d) in eval_poly()
39 float32x4_t c0357 = vld1q_f32 (&d->c0); in eval_poly()
40 float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0); in eval_poly()
41 float32x4_t m2 = vmulq_f32 (m, m); in eval_poly()
42 float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3); in eval_poly()
43 float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2); in eval_poly()
44 float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1); in eval_poly()
45 float32x4_t p = vfmaq_f32 (p45, m2, p67); in eval_poly()
[all …]
H A Dtanpif.c14 float32x4_t c0, c2, c4, c6;
27 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanpi) (float32x4_t x) in tanpi()
31 float32x4_t n = vrndnq_f32 (x); in tanpi()
34 float32x4_t xr = vsubq_f32 (x, n); in tanpi()
35 float32x4_t ar = vabdq_f32 (x, n); in tanpi()
37 float32x4_t r = vbslq_f32 (flip, vsubq_f32 (v_f32 (0.5f), ar), ar); in tanpi()
40 float32x4_t r2 = vmulq_f32 (r, r); in tanpi()
41 float32x4_t r4 = vmulq_f32 (r2, r2); in tanpi()
43 float32x4_t odd_coeffs = vld1q_f32 (&d->c1); in tanpi()
44 float32x4_t p01 = vfmaq_laneq_f32 (d->c0, r2, odd_coeffs, 0); in tanpi()
[all …]
H A Dexpf_1u.c12 float32x4_t shift, inv_ln2;
14 float32x4_t c1, c2, c3, c4;
15 float32x4_t special_bound, scale_thresh;
36 static float32x4_t VPCS_ATTR NOINLINE
37 specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d) in specialcase()
41 float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); in specialcase()
42 float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); in specialcase()
44 float32x4_t r1 = vmulq_f32 (s1, s1); in specialcase()
45 float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2); in specialcase()
50 float32x4_t VPCS_ATTR
[all …]
H A Dlog10f.c14 float32x4_t c0, c2, c4, c6, inv_ln10, ln2;
41 static float32x4_t VPCS_ATTR NOINLINE
42 special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, in special_case()
56 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) in log10()
59 float32x4_t c1357 = vld1q_f32 (&d->c1); in log10()
67 float32x4_t n = vcvtq_f32_s32 ( in log10()
74 float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); in log10()
77 float32x4_t r2 = vmulq_f32 (r, r); in log10()
79 float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); in log10()
80 float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); in log10()
[all …]
H A Dcbrtf.c15 float32x4_t poly[4], one_third;
33 static float32x4_t VPCS_ATTR NOINLINE
34 special_case (float32x4_t x, float32x4_t y, uint16x4_t special) in special_case()
39 static inline float32x4_t
42 return (float32x4_t){ table[i[0] + 2], table[i[1] + 2], table[i[2] + 2], in shifted_lookup()
52 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cbrt) (float32x4_t x) in cbrt()
63 float32x4_t m = vbslq_f32 (MantissaMask, x, v_f32 (0.5)); in cbrt()
70 float32x4_t p = v_pairwise_poly_3_f32 (m, vmulq_f32 (m, m), d->poly); in cbrt()
72 float32x4_t one_third = d->one_third; in cbrt()
73 float32x4_t two_thirds = vaddq_f32 (one_third, one_third); in cbrt()
[all …]
H A Dexp2f_1u.c13 float32x4_t c0, c1, c2, c3, c4, c5, shift;
15 float32x4_t special_bound, scale_thresh;
33 static float32x4_t VPCS_ATTR NOINLINE
34 specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d) in specialcase()
38 float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); in specialcase()
39 float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); in specialcase()
41 float32x4_t r1 = vmulq_f32 (s1, s1); in specialcase()
42 float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2); in specialcase()
47 float32x4_t VPCS_ATTR
48 _ZGVnN4v_exp2f_1u (float32x4_t x) in _ZGVnN4v_exp2f_1u()
[all …]
H A Dacosf.c15 float32x4_t poly[5];
16 float32x4_t pi_over_2f, pif;
32 static float32x4_t VPCS_ATTR NOINLINE
33 special_case (float32x4_t x, float32x4_t y, uint32x4_t special) in special_case()
61 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acos) (float32x4_t x) in acos()
76 float32x4_t ax = vreinterpretq_f32_u32 (ia); in acos()
82 float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x), in acos()
84 float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2)); in acos()
87 float32x4_t p = v_horner_4_f32 (z2, d->poly); in acos()
94 float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x); in acos()
[all …]
H A Dv_sincospif_common.h13 float32x4_t poly[6], range_val;
29 v_sincospif_inline (float32x4_t x, const struct v_sincospif_data *d) in v_sincospif_inline()
38 float32x4_t sr = vsubq_f32 (x, vrndaq_f32 (x)); in v_sincospif_inline()
40 float32x4_t cr = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (sr)); in v_sincospif_inline()
43 float32x4_t sr2 = vmulq_f32 (sr, sr); in v_sincospif_inline()
44 float32x4_t sr4 = vmulq_f32 (sr2, sr2); in v_sincospif_inline()
45 float32x4_t cr2 = vmulq_f32 (cr, cr); in v_sincospif_inline()
46 float32x4_t cr4 = vmulq_f32 (cr2, cr2); in v_sincospif_inline()
48 float32x4_t ss = vmulq_f32 (v_pw_horner_5_f32 (sr2, sr4, d->poly), sr); in v_sincospif_inline()
49 float32x4_t cc = vmulq_f32 (v_pw_horner_5_f32 (cr2, cr4, d->poly), cr); in v_sincospif_inline()
[all …]
H A Derff.c14 float32x4_t max, shift, third;
16 float32x4_t tiny_bound, scale_minus_one;
32 float32x4_t erf;
33 float32x4_t scale;
44 float32x4_t e1 = vcombine_f32 (t0, t1); in lookup()
45 float32x4_t e2 = vcombine_f32 (t2, t3); in lookup()
64 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (erf) (float32x4_t x) in erf()
71 float32x4_t xm = x; in erf()
79 float32x4_t a = vabsq_f32 (x); in erf()
84 float32x4_t shift = dat->shift; in erf()
[all …]
H A Dv_expf_inline.h17 float32x4_t inv_ln2, c1, c3, c4;
31 static inline float32x4_t
32 v_expf_inline (float32x4_t x, const struct v_expf_data *d) in v_expf_inline()
40 float32x4_t ax = vabsq_f32 (x); in v_expf_inline()
41 float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi); in v_expf_inline()
42 float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2)); in v_expf_inline()
43 float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0); in v_expf_inline()
46 float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); in v_expf_inline()
49 float32x4_t r2 = vmulq_f32 (r, r); in v_expf_inline()
50 float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2); in v_expf_inline()
[all …]
H A Dv_expm1f_inline.h16 float32x4_t c0, c2;
32 static inline float32x4_t
33 expm1f_inline (float32x4_t x, const struct v_expm1f_data *d) in expm1f_inline()
38 float32x4_t lane_consts = vld1q_f32 (&d->c1); in expm1f_inline()
41 float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2)); in expm1f_inline()
43 float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0); in expm1f_inline()
47 float32x4_t f2 = vmulq_f32 (f, f); in expm1f_inline()
48 float32x4_t f4 = vmulq_f32 (f2, f2); in expm1f_inline()
49 float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0); in expm1f_inline()
50 float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1); in expm1f_inline()
[all …]
H A Dhypotf.c31 static float32x4_t VPCS_ATTR NOINLINE
32 special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum, in special_case()
44 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y) in hypot()
48 float32x4_t ax = vabsq_f32 (x); in hypot()
49 float32x4_t ay = vabsq_f32 (y); in hypot()
62 float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (ax, ax), ay, ay); in hypot()
71 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y) in hypot()
75 float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y); in hypot()
H A Dlogf.c13 float32x4_t c2, c4, c6, ln2;
36 static float32x4_t VPCS_ATTR NOINLINE
37 special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, in special_case()
45 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) in log()
48 float32x4_t c1350 = vld1q_f32 (&d->c1); in log()
56 float32x4_t n = vcvtq_f32_s32 ( in log()
62 float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); in log()
65 float32x4_t r2 = vmulq_f32 (r, r); in log()
67 float32x4_t p = vfmaq_laneq_f32 (d->c2, r, c1350, 0); in log()
68 float32x4_t q = vfmaq_laneq_f32 (d->c4, r, c1350, 1); in log()
[all …]
H A Datanf.c15 float32x4_t poly[8];
16 float32x4_t pi_over_2;
35 static float32x4_t VPCS_ATTR NOINLINE
36 special_case (float32x4_t x, float32x4_t y, uint32x4_t special) in special_case()
46 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) in atan()
71 float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x); in atan()
72 float32x4_t shift = vreinterpretq_f32_u32 ( in atan()
75 float32x4_t az = vbslq_f32 ( in atan()
85 float32x4_t z2 = vmulq_f32 (z, z); in atan()
86 float32x4_t z4 = vmulq_f32 (z2, z2); in atan()
[all …]
H A Dcospif.c16 float32x4_t poly[6];
17 float32x4_t range_val;
25 static float32x4_t VPCS_ATTR NOINLINE
26 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp) in special_case()
36 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cospi) (float32x4_t x) in cospi()
41 float32x4_t r = vabsq_f32 (x); in cospi()
50 float32x4_t r = x; in cospi()
65 float32x4_t r2 = vmulq_f32 (r, r); in cospi()
66 float32x4_t r4 = vmulq_f32 (r2, r2); in cospi()
67 float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r); in cospi()
H A Dv_sincosf_common.h12 float32x4_t poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val;
25 check_ge_rangeval (float32x4_t x, const struct v_sincosf_data *d) in check_ge_rangeval()
38 v_sincosf_inline (float32x4_t x, const struct v_sincosf_data *d) in v_sincosf_inline()
41 float32x4_t shift = d->shift; in v_sincosf_inline()
42 float32x4_t q = vfmaq_f32 (shift, x, d->inv_pio2); in v_sincosf_inline()
47 float32x4_t r = x; in v_sincosf_inline()
53 float32x4_t r2 = vmulq_f32 (r, r), r3 = vmulq_f32 (r, r2); in v_sincosf_inline()
54 float32x4_t s = vfmaq_f32 (d->poly_sin[1], r2, d->poly_sin[2]); in v_sincosf_inline()
59 float32x4_t r4 = vmulq_f32 (r2, r2); in v_sincosf_inline()
60 float32x4_t p = vfmaq_f32 (d->poly_cos[1], r2, d->poly_cos[2]); in v_sincosf_inline()
[all …]
/src/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/
H A Derfinvf_5u.c28 float32x4_t P29_3, tailshift;
29 float32x4_t P_50[6], Q_50[2];
30 float32x4_t P_10[3], Q_10[3];
49 static inline float32x4_t
50 special (float32x4_t x, const struct data *d) in special()
58 float32x4_t t = vdivq_f32 ( in special()
61 float32x4_t ts = vbslq_f32 (v_u32 (0x7fffffff), t, x); in special()
62 float32x4_t q = vfmaq_f32 (d->Q_50[0], vaddq_f32 (t, d->Q_50[1]), t); in special()
66 static inline float32x4_t
67 notails (float32x4_t x, const struct data *d) in notails()
[all …]

123