| /src/contrib/arm-optimized-routines/math/include/ |
| H A D | mathlib.h | 150 __vpcs float64x2_t _ZGVnN2v_acos (float64x2_t); 151 __vpcs float64x2_t _ZGVnN2v_acosh (float64x2_t); 152 __vpcs float64x2_t _ZGVnN2v_asin (float64x2_t); 153 __vpcs float64x2_t _ZGVnN2v_asinh (float64x2_t); 154 __vpcs float64x2_t _ZGVnN2v_atan (float64x2_t); 155 __vpcs float64x2_t _ZGVnN2v_atanh (float64x2_t); 156 __vpcs float64x2_t _ZGVnN2v_cbrt (float64x2_t); 157 __vpcs float64x2_t _ZGVnN2v_cos (float64x2_t); 158 __vpcs float64x2_t _ZGVnN2v_cosh (float64x2_t); 159 __vpcs float64x2_t _ZGVnN2v_cospi (float64x2_t); [all …]
|
| /src/contrib/arm-optimized-routines/math/aarch64/advsimd/ |
| H A D | atan2.c | 14 float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; 15 float64x2_t pi_over_2; 49 static float64x2_t VPCS_ATTR NOINLINE 50 special_case (float64x2_t y, float64x2_t x, float64x2_t ret, in special_case() 72 float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) in atan2() 86 float64x2_t ax = vabsq_f64 (x); in atan2() 87 float64x2_t ay = vabsq_f64 (y); in atan2() 93 float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); in atan2() 94 float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax); in atan2() 95 float64x2_t z = vdivq_f64 (n, q); in atan2() [all …]
|
| H A D | pow.c | 23 float64x2_t small_powx; 26 float64x2_t log_c0, log_c2, log_c4, log_c5; 32 float64x2_t exp_c0, exp_c1; 75 static inline float64x2_t 78 return (float64x2_t){ in v_masked_lookup_f64() 87 static inline float64x2_t 88 v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) in v_log_inline() 96 float64x2_t z = vreinterpretq_f64_u64 (iz); in v_log_inline() 97 float64x2_t kd = vcvtq_f64_s64 (k); in v_log_inline() 99 float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp); in v_log_inline() [all …]
|
| H A D | atan.c | 14 float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; 15 float64x2_t pi_over_2; 42 float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) in atan() 45 float64x2_t c13 = vld1q_f64 (&d->c1); in atan() 46 float64x2_t c57 = vld1q_f64 (&d->c5); in atan() 47 float64x2_t c911 = vld1q_f64 (&d->c9); in atan() 48 float64x2_t c1315 = vld1q_f64 (&d->c13); in atan() 49 float64x2_t c1719 = vld1q_f64 (&d->c17); in atan() 72 float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x); in atan() 73 float64x2_t shift = vreinterpretq_f64_u64 ( in atan() [all …]
|
| H A D | erfc.c | 15 float64x2_t max, shift; 16 float64x2_t p20, p40, p41, p51; 20 float64x2_t uflow_bound; 52 float64x2_t erfc; 53 float64x2_t scale; 60 float64x2_t e1 in lookup() 62 float64x2_t e2 in lookup() 70 static float64x2_t VPCS_ATTR NOINLINE 71 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) in special_case() 104 float64x2_t V_NAME_D1 (erfc) (float64x2_t x) in erfc() [all …]
|
| H A D | asinh.c | 16 float64x2_t tiny_bound; 18 float64x2_t lc0, lc2; 21 float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17; 65 static float64x2_t NOINLINE VPCS_ATTR 66 special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask, in special_case() 79 float64x2_t invc; 80 float64x2_t logc; 90 float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc); in lookup() 91 float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc); in lookup() 97 static inline float64x2_t [all …]
|
| H A D | asin.c | 14 float64x2_t c0, c2, c4, c6, c8, c10; 15 float64x2_t pi_over_2; 35 static float64x2_t VPCS_ATTR NOINLINE 36 special_case (float64x2_t x, float64x2_t y, uint64x2_t special) in special_case() 62 float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) in asin() 65 float64x2_t ax = vabsq_f64 (x); in asin() 82 float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x), in asin() 84 float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2)); in asin() 87 float64x2_t z4 = vmulq_f64 (z2, z2); in asin() 88 float64x2_t z8 = vmulq_f64 (z4, z4); in asin() [all …]
|
| H A D | v_log1p_inline.h | 15 float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16; 43 static inline float64x2_t 44 eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d) in eval_poly() 47 float64x2_t c13 = vld1q_f64 (&d->c1); in eval_poly() 48 float64x2_t c57 = vld1q_f64 (&d->c5); in eval_poly() 49 float64x2_t c911 = vld1q_f64 (&d->c9); in eval_poly() 50 float64x2_t c1315 = vld1q_f64 (&d->c13); in eval_poly() 51 float64x2_t c1718 = vld1q_f64 (&d->c17); in eval_poly() 52 float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0); in eval_poly() 53 float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1); in eval_poly() [all …]
|
| H A D | v_expm1_inline.h | 16 float64x2_t c2, c4, c6, c8; 17 float64x2_t invln2; 36 static inline float64x2_t 37 expm1_inline (float64x2_t x, const struct v_expm1_data *d) in expm1_inline() 41 float64x2_t ln2 = vld1q_f64 (&d->ln2[0]); in expm1_inline() 48 float64x2_t n = vrndaq_f64 (vmulq_f64 (x, d->invln2)); in expm1_inline() 50 float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0); in expm1_inline() 58 float64x2_t f2 = vmulq_f64 (f, f); in expm1_inline() 59 float64x2_t f4 = vmulq_f64 (f2, f2); in expm1_inline() 60 float64x2_t lane_consts_13 = vld1q_f64 (&d->c1); in expm1_inline() [all …]
|
| H A D | tanpi.c | 14 float64x2_t c0, c2, c4, c6, c8, c10, c12; 35 float64x2_t VPCS_ATTR V_NAME_D1 (tanpi) (float64x2_t x) in tanpi() 39 float64x2_t n = vrndnq_f64 (x); in tanpi() 42 float64x2_t xr = vsubq_f64 (x, n); in tanpi() 43 float64x2_t ar = vabdq_f64 (x, n); in tanpi() 45 float64x2_t r = vbslq_f64 (flip, vsubq_f64 (v_f64 (0.5), ar), ar); in tanpi() 48 float64x2_t r2 = vmulq_f64 (r, r); in tanpi() 49 float64x2_t r4 = vmulq_f64 (r2, r2); in tanpi() 51 float64x2_t c_1_3 = vld1q_f64 (&d->c1); in tanpi() 52 float64x2_t c_5_7 = vld1q_f64 (&d->c5); in tanpi() [all …]
|
| H A D | cosh.c | 14 float64x2_t poly[3]; 15 float64x2_t inv_ln2; 17 float64x2_t shift, thres; 34 static float64x2_t NOINLINE VPCS_ATTR 35 special_case (float64x2_t x, float64x2_t y, uint64x2_t special) in special_case() 42 static inline float64x2_t 43 exp_inline (float64x2_t x) in exp_inline() 48 float64x2_t z = vfmaq_f64 (d->shift, x, d->inv_ln2); in exp_inline() 50 float64x2_t n = vsubq_f64 (z, d->shift); in exp_inline() 53 float64x2_t ln2 = vld1q_f64 (d->ln2); in exp_inline() [all …]
|
| H A D | exp10.c | 21 float64x2_t poly[4]; 22 float64x2_t log10_2, log2_10_hi, log2_10_lo, shift; 24 float64x2_t special_bound, scale_thresh; 52 static float64x2_t VPCS_ATTR NOINLINE 53 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) in special_case() 67 static inline float64x2_t VPCS_ATTR 68 special_case (float64x2_t s, float64x2_t y, float64x2_t n, in special_case() 73 float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b)); in special_case() 74 float64x2_t s2 = vreinterpretq_f64_u64 ( in special_case() 77 float64x2_t r1 = vmulq_f64 (s1, s1); in special_case() [all …]
|
| H A D | erf.c | 14 float64x2_t third; 15 float64x2_t tenth, two_over_five, two_over_nine; 17 float64x2_t max, shift; 20 float64x2_t tiny_bound, huge_bound, scale_minus_one; 43 float64x2_t erf; 44 float64x2_t scale; 51 float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf), in lookup() 75 float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) in erf() 79 float64x2_t a = vabsq_f64 (x); in erf() 102 float64x2_t shift = dat->shift; in erf() [all …]
|
| H A D | exp2.c | 21 float64x2_t poly[4]; 22 float64x2_t shift, scale_big_bound, scale_uoflow_bound; 45 static float64x2_t VPCS_ATTR NOINLINE 46 special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special) in special_case() 58 static inline float64x2_t VPCS_ATTR 59 special_case (float64x2_t s, float64x2_t y, float64x2_t n, in special_case() 64 float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b)); in special_case() 65 float64x2_t s2 = vreinterpretq_f64_u64 (vaddq_u64 ( in special_case() 68 float64x2_t r1 = vmulq_f64 (s1, s1); in special_case() 69 float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1); in special_case() [all …]
|
| H A D | log.c | 16 float64x2_t c0, c2; 40 float64x2_t invc; 41 float64x2_t logc; 51 float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc); in lookup() 52 float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc); in lookup() 58 static float64x2_t VPCS_ATTR NOINLINE 59 special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, in special_case() 62 float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); in special_case() 70 float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x) in log() 85 float64x2_t z = vreinterpretq_f64_u64 (iz); in log() [all …]
|
| H A D | log10.c | 18 float64x2_t c0, c2, c4; 43 float64x2_t invc; 44 float64x2_t log10c; 55 float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc); in lookup() 56 float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc); in lookup() 62 static float64x2_t VPCS_ATTR NOINLINE 63 special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, in special_case() 66 float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); in special_case() 76 float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x) in log10() 91 float64x2_t z = vreinterpretq_f64_u64 (iz); in log10() [all …]
|
| H A D | log2.c | 16 float64x2_t c0, c2; 42 float64x2_t invc; 43 float64x2_t log2c; 54 float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc); in lookup() 55 float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc); in lookup() 61 static float64x2_t VPCS_ATTR NOINLINE 62 special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, in special_case() 65 float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); in special_case() 74 float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x) in log2() 89 float64x2_t z = vreinterpretq_f64_u64 (iz); in log2() [all …]
|
| H A D | acos.c | 15 float64x2_t poly[12]; 16 float64x2_t pi, pi_over_2; 37 static float64x2_t VPCS_ATTR NOINLINE 38 special_case (float64x2_t x, float64x2_t y, uint64x2_t special) in special_case() 67 float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) in acos() 71 float64x2_t ax = vabsq_f64 (x); in acos() 87 float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x), in acos() 89 float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2)); in acos() 92 float64x2_t z4 = vmulq_f64 (z2, z2); in acos() 93 float64x2_t z8 = vmulq_f64 (z4, z4); in acos() [all …]
|
| H A D | cbrt.c | 15 float64x2_t poly[4], one_third, shift; 37 static float64x2_t NOINLINE VPCS_ATTR 38 special_case (float64x2_t x, float64x2_t y, uint32x2_t special) in special_case() 57 VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x) in cbrt() 69 float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5)); in cbrt() 76 float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly); in cbrt() 77 float64x2_t one_third = d->one_third; in cbrt() 79 float64x2_t m_by_3 = vmulq_f64 (m, one_third); in cbrt() 80 float64x2_t two_thirds = vaddq_f64 (one_third, one_third); in cbrt() 81 float64x2_t a in cbrt() [all …]
|
| H A D | powf.c | 34 float64x2_t log2_poly[4]; 36 float64x2_t exp2f_poly[3]; 97 static inline float64x2_t 98 ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k, in ylogx_core() 99 float64x2_t invc, float64x2_t logc, float64x2_t y) in ylogx_core() 103 float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc); in ylogx_core() 104 float64x2_t y0 = vaddq_f64 (logc, k); in ylogx_core() 107 float64x2_t logx = vfmaq_f64 (A[1], r, A[0]); in ylogx_core() 115 static inline float64x2_t 129 powf_core (const struct data *d, float64x2_t ylogx) in powf_core() [all …]
|
| H A D | exp.c | 18 float64x2_t poly[3]; 19 float64x2_t inv_ln2, ln2_hi, ln2_lo, shift; 21 float64x2_t special_bound, scale_thresh; 48 static float64x2_t VPCS_ATTR NOINLINE 49 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) in special_case() 63 static inline float64x2_t VPCS_ATTR 64 special_case (float64x2_t s, float64x2_t y, float64x2_t n) in special_case() 68 float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b)); in special_case() 69 float64x2_t s2 = vreinterpretq_f64_u64 ( in special_case() 72 float64x2_t r1 = vmulq_f64 (s1, s1); in special_case() [all …]
|
| H A D | v_sincospi_common.h | 12 float64x2_t poly[10], range_val; 35 v_sincospi_inline (float64x2_t x, const struct v_sincospi_data *d) in v_sincospi_inline() 44 float64x2_t sr = vsubq_f64 (x, vrndaq_f64 (x)); in v_sincospi_inline() 46 float64x2_t cr = vsubq_f64 (v_f64 (0.5), vabsq_f64 (sr)); in v_sincospi_inline() 49 float64x2_t sr2 = vmulq_f64 (sr, sr); in v_sincospi_inline() 50 float64x2_t sr4 = vmulq_f64 (sr2, sr2); in v_sincospi_inline() 51 float64x2_t cr2 = vmulq_f64 (cr, cr); in v_sincospi_inline() 52 float64x2_t cr4 = vmulq_f64 (cr2, cr2); in v_sincospi_inline() 54 float64x2_t ss = vmulq_f64 (v_pw_horner_9_f64 (sr2, sr4, d->poly), sr); in v_sincospi_inline() 55 float64x2_t cc = vmulq_f64 (v_pw_horner_9_f64 (cr2, cr4, d->poly), cr); in v_sincospi_inline() [all …]
|
| H A D | hypot.c | 31 static float64x2_t VPCS_ATTR NOINLINE 32 special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum, in special_case() 45 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) in hypot() 49 float64x2_t ax = vabsq_f64 (x); in hypot() 50 float64x2_t ay = vabsq_f64 (y); in hypot() 63 float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay); in hypot() 72 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) in hypot() 76 float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y); in hypot()
|
| H A D | tan.c | 15 float64x2_t poly[9]; 17 float64x2_t two_over_pi, shift; 19 float64x2_t range_val; 41 static float64x2_t VPCS_ATTR NOINLINE 42 special_case (float64x2_t x) in special_case() 51 float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) in tan() 68 float64x2_t q in tan() 74 float64x2_t r = x; in tan() 75 float64x2_t half_pi = vld1q_f64 (dat->half_pi); in tan() 88 float64x2_t r2 = vmulq_f64 (r, r), r4 = vmulq_f64 (r2, r2), in tan() [all …]
|
| /src/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/ |
| H A D | erfinv_25u.c | 24 float64x2_t tailshift; 27 float64x2_t P_57[9], Q_57[10], P_17[7], Q_17[6]; 64 static inline float64x2_t 65 special (float64x2_t x, const struct data *d) in special() 73 float64x2_t t = vnegq_f64 ( in special() 76 float64x2_t ts = vbslq_f64 (v_u64 (0x7fffffffffffffff), t, x); in special() 81 static inline float64x2_t 84 float64x2_t x = vld1q_f64 (c); in lookup() 88 static inline float64x2_t VPCS_ATTR 89 notails (float64x2_t x, const struct data *d) in notails() [all …]
|