aarch64: Optimise vecmath logs

Message ID 20231004093857.48835-1-Joe.Ramsay@arm.com
State Committed
Commit 5a4b6f8e4b7e2a76c71b713200a80181d745c93d
Headers
Series aarch64: Optimise vecmath logs |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed

Commit Message

Joe Ramsay Oct. 4, 2023, 9:38 a.m. UTC
  * Transpose table layout for improved memory access
* Use half-vector special comparisons for AdvSIMD
* Improve register use near special-case branches
  - Due to the presence of a function call, return value would get
    mov-d out of x0 in order to facilitate PCS. By moving the final
    computation after the branch this can be avoided

Also change SVE routines to use overloaded intrinsics for readability.
---
Thanks,
Joe
 sysdeps/aarch64/fpu/log_advsimd.c    |  36 ++--
 sysdeps/aarch64/fpu/log_sve.c        |  52 +++---
 sysdeps/aarch64/fpu/logf_advsimd.c   |  26 +--
 sysdeps/aarch64/fpu/logf_sve.c       |  40 ++---
 sysdeps/aarch64/fpu/v_log_data.c     | 260 +++++++++++++--------------
 sysdeps/aarch64/fpu/v_math.h         |  21 ++-
 sysdeps/aarch64/fpu/vecmath_config.h |   6 +-
 7 files changed, 226 insertions(+), 215 deletions(-)
  

Comments

Szabolcs Nagy Oct. 5, 2023, 3:58 p.m. UTC | #1
The 10/04/2023 10:38, Joe Ramsay wrote:
> * Transpose table layout for improved memory access
> * Use half-vector special comparisons for AdvSIMD
> * Improve register use near special-case branches
>   - Due to the presence of a function call, return value would get
>     mov-d out of x0 in order to facilitate PCS. By moving the final
>     computation after the branch this can be avoided
> 
> Also change SVE routines to use overloaded intrinsics for readability.


looks good. committed.


> ---
> Thanks,
> Joe
>  sysdeps/aarch64/fpu/log_advsimd.c    |  36 ++--
>  sysdeps/aarch64/fpu/log_sve.c        |  52 +++---
>  sysdeps/aarch64/fpu/logf_advsimd.c   |  26 +--
>  sysdeps/aarch64/fpu/logf_sve.c       |  40 ++---
>  sysdeps/aarch64/fpu/v_log_data.c     | 260 +++++++++++++--------------
>  sysdeps/aarch64/fpu/v_math.h         |  21 ++-
>  sysdeps/aarch64/fpu/vecmath_config.h |   6 +-
>  7 files changed, 226 insertions(+), 215 deletions(-)
> 
> diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
> index 434737f2a9..8b32d1cfe1 100644
> --- a/sysdeps/aarch64/fpu/log_advsimd.c
> +++ b/sysdeps/aarch64/fpu/log_advsimd.c
> @@ -21,9 +21,11 @@
>  
>  static const struct data
>  {
> +  uint64x2_t min_norm;
> +  uint32x4_t special_bound;
>    float64x2_t poly[5];
>    float64x2_t ln2;
> -  uint64x2_t min_norm, special_bound, sign_exp_mask;
> +  uint64x2_t sign_exp_mask;
>  } data = {
>    /* Worst-case error: 1.17 + 0.5 ulp.
>       Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
> @@ -32,7 +34,7 @@ static const struct data
>  	    V2 (-0x1.554e550bd501ep-3) },
>    .ln2 = V2 (0x1.62e42fefa39efp-1),
>    .min_norm = V2 (0x0010000000000000),
> -  .special_bound = V2 (0x7fe0000000000000), /* asuint64(inf) - min_norm.  */
> +  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm.  */
>    .sign_exp_mask = V2 (0xfff0000000000000)
>  };
>  
> @@ -52,29 +54,34 @@ lookup (uint64x2_t i)
>  {
>    /* Since N is a power of 2, n % N = n & (N - 1).  */
>    struct entry e;
> -  e.invc[0] = __v_log_data.invc[i[0] & IndexMask];
> -  e.logc[0] = __v_log_data.logc[i[0] & IndexMask];
> -  e.invc[1] = __v_log_data.invc[i[1] & IndexMask];
> -  e.logc[1] = __v_log_data.logc[i[1] & IndexMask];
> +  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
> +  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
> +  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
> +  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
> +  e.invc = vuzp1q_f64 (e0, e1);
> +  e.logc = vuzp2q_f64 (e0, e1);
>    return e;
>  }
>  
>  static float64x2_t VPCS_ATTR NOINLINE
> -special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
> +special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
> +	      uint32x2_t cmp)
>  {
> -  return v_call_f64 (log, x, y, cmp);
> +  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp));
>  }
>  
>  float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
>  {
>    const struct data *d = ptr_barrier (&data);
>    float64x2_t z, r, r2, p, y, kd, hi;
> -  uint64x2_t ix, iz, tmp, cmp;
> +  uint64x2_t ix, iz, tmp;
> +  uint32x2_t cmp;
>    int64x2_t k;
>    struct entry e;
>  
>    ix = vreinterpretq_u64_f64 (x);
> -  cmp = vcgeq_u64 (vsubq_u64 (ix, d->min_norm), d->special_bound);
> +  cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
> +		  vget_low_u32 (d->special_bound));
>  
>    /* x = 2^k z; where z is in range [Off,2*Off) and exact.
>       The range is split into N subintervals.
> @@ -83,7 +90,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
>    k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
>    iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
>    z = vreinterpretq_f64_u64 (iz);
> -  e = lookup (vshrq_n_u64 (tmp, 52 - V_LOG_TABLE_BITS));
> +  e = lookup (tmp);
>  
>    /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
>    r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
> @@ -97,9 +104,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
>    p = vfmaq_f64 (A (0), A (1), r);
>    y = vfmaq_f64 (y, A (4), r2);
>    y = vfmaq_f64 (p, y, r2);
> -  y = vfmaq_f64 (hi, y, r2);
>  
> -  if (__glibc_unlikely (v_any_u64 (cmp)))
> -    return special_case (x, y, cmp);
> -  return y;
> +  if (__glibc_unlikely (v_any_u32h (cmp)))
> +    return special_case (x, y, hi, r2, cmp);
> +  return vfmaq_f64 (hi, y, r2);
>  }
> diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c
> index 93c4f1c018..0c171a4d01 100644
> --- a/sysdeps/aarch64/fpu/log_sve.c
> +++ b/sysdeps/aarch64/fpu/log_sve.c
> @@ -38,43 +38,39 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
>  					 want 0x1.ffffff1cca045p-2.  */
>  svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
>  {
> -  svuint64_t ix = svreinterpret_u64_f64 (x);
> -  svuint64_t top = svlsr_n_u64_x (pg, ix, 52);
> -  svbool_t cmp
> -      = svcmpge_u64 (pg, svsub_n_u64_x (pg, top, MinTop), sv_u64 (ThreshTop));
> +  svuint64_t ix = svreinterpret_u64 (x);
> +  svuint64_t top = svlsr_x (pg, ix, 52);
> +  svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop));
>  
>    /* x = 2^k z; where z is in range [Off,2*Off) and exact.
>       The range is split into N subintervals.
>       The ith subinterval contains z and c is near its center.  */
> -  svuint64_t tmp = svsub_n_u64_x (pg, ix, Off);
> -  /* Equivalent to (tmp >> (52 - V_LOG_TABLE_BITS)) % N, since N is a power
> -     of 2.  */
> -  svuint64_t i = svand_n_u64_x (
> -      pg, svlsr_n_u64_x (pg, tmp, (52 - V_LOG_TABLE_BITS)), N - 1);
> -  svint64_t k = svasr_n_s64_x (pg, svreinterpret_s64_u64 (tmp),
> -			       52); /* Arithmetic shift.  */
> -  svuint64_t iz
> -      = svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
> -  svfloat64_t z = svreinterpret_f64_u64 (iz);
> +  svuint64_t tmp = svsub_x (pg, ix, Off);
> +  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
> +     The actual value of i is double this due to table layout.  */
> +  svuint64_t i
> +      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
> +  svint64_t k
> +      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
> +  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
> +  svfloat64_t z = svreinterpret_f64 (iz);
>    /* Lookup in 2 global lists (length N).  */
> -  svfloat64_t invc = svld1_gather_u64index_f64 (pg, __v_log_data.invc, i);
> -  svfloat64_t logc = svld1_gather_u64index_f64 (pg, __v_log_data.logc, i);
> +  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
> +  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
>  
>    /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
> -  svfloat64_t r = svmad_n_f64_x (pg, invc, z, -1);
> -  svfloat64_t kd = svcvt_f64_s64_x (pg, k);
> +  svfloat64_t r = svmad_x (pg, invc, z, -1);
> +  svfloat64_t kd = svcvt_f64_x (pg, k);
>    /* hi = r + log(c) + k*Ln2.  */
> -  svfloat64_t hi
> -      = svmla_n_f64_x (pg, svadd_f64_x (pg, logc, r), kd, __v_log_data.ln2);
> +  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
>    /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
> -  svfloat64_t r2 = svmul_f64_x (pg, r, r);
> -  svfloat64_t y = svmla_f64_x (pg, P (2), r, P (3));
> -  svfloat64_t p = svmla_f64_x (pg, P (0), r, P (1));
> -  y = svmla_f64_x (pg, y, r2, P (4));
> -  y = svmla_f64_x (pg, p, r2, y);
> -  y = svmla_f64_x (pg, hi, r2, y);
> +  svfloat64_t r2 = svmul_x (pg, r, r);
> +  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
> +  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
> +  y = svmla_x (pg, y, r2, P (4));
> +  y = svmla_x (pg, p, r2, y);
>  
>    if (__glibc_unlikely (svptest_any (pg, cmp)))
> -    return special_case (x, y, cmp);
> -  return y;
> +    return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp);
> +  return svmla_x (pg, hi, r2, y);
>  }
> diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c
> index 375ad28b9f..93903c7962 100644
> --- a/sysdeps/aarch64/fpu/logf_advsimd.c
> +++ b/sysdeps/aarch64/fpu/logf_advsimd.c
> @@ -21,9 +21,11 @@
>  
>  static const struct data
>  {
> +  uint32x4_t min_norm;
> +  uint16x8_t special_bound;
>    float32x4_t poly[7];
>    float32x4_t ln2, tiny_bound;
> -  uint32x4_t min_norm, special_bound, off, mantissa_mask;
> +  uint32x4_t off, mantissa_mask;
>  } data = {
>    /* 3.34 ulp error.  */
>    .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
> @@ -32,28 +34,31 @@ static const struct data
>    .ln2 = V4 (0x1.62e43p-1f),
>    .tiny_bound = V4 (0x1p-126),
>    .min_norm = V4 (0x00800000),
> -  .special_bound = V4 (0x7f000000), /* asuint32(inf) - min_norm.  */
> -  .off = V4 (0x3f2aaaab),	    /* 0.666667.  */
> +  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
> +  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
>    .mantissa_mask = V4 (0x007fffff)
>  };
>  
>  #define P(i) d->poly[7 - i]
>  
>  static float32x4_t VPCS_ATTR NOINLINE
> -special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
> +special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
> +	      uint16x4_t cmp)
>  {
>    /* Fall back to scalar code.  */
> -  return v_call_f32 (logf, x, y, cmp);
> +  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
>  }
>  
>  float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
>  {
>    const struct data *d = ptr_barrier (&data);
>    float32x4_t n, p, q, r, r2, y;
> -  uint32x4_t u, cmp;
> +  uint32x4_t u;
> +  uint16x4_t cmp;
>  
>    u = vreinterpretq_u32_f32 (x);
> -  cmp = vcgeq_u32 (vsubq_u32 (u, d->min_norm), d->special_bound);
> +  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
> +		  vget_low_u16 (d->special_bound));
>  
>    /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
>    u = vsubq_u32 (u, d->off);
> @@ -73,9 +78,8 @@ float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
>    q = vfmaq_f32 (q, p, r2);
>    y = vfmaq_f32 (y, q, r2);
>    p = vfmaq_f32 (r, d->ln2, n);
> -  y = vfmaq_f32 (p, y, r2);
>  
> -  if (__glibc_unlikely (v_any_u32 (cmp)))
> -    return special_case (x, y, cmp);
> -  return y;
> +  if (__glibc_unlikely (v_any_u16h (cmp)))
> +    return special_case (x, y, r2, p, cmp);
> +  return vfmaq_f32 (p, y, r2);
>  }
> diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
> index 46c6e7c461..c02761188a 100644
> --- a/sysdeps/aarch64/fpu/logf_sve.c
> +++ b/sysdeps/aarch64/fpu/logf_sve.c
> @@ -55,33 +55,31 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
>  {
>    const struct data *d = ptr_barrier (&data);
>  
> -  svuint32_t u = svreinterpret_u32_f32 (x);
> -  svbool_t cmp = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, u, Min), Thresh);
> +  svuint32_t u = svreinterpret_u32 (x);
> +  svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
>  
>    /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
> -  u = svsub_n_u32_x (pg, u, Off);
> -  svfloat32_t n
> -      = svcvt_f32_s32_x (pg, svasr_n_s32_x (pg, svreinterpret_s32_u32 (u),
> -					    23)); /* Sign-extend.  */
> -  u = svand_n_u32_x (pg, u, Mask);
> -  u = svadd_n_u32_x (pg, u, Off);
> -  svfloat32_t r = svsub_n_f32_x (pg, svreinterpret_f32_u32 (u), 1.0f);
> +  u = svsub_x (pg, u, Off);
> +  svfloat32_t n = svcvt_f32_x (
> +      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
> +  u = svand_x (pg, u, Mask);
> +  u = svadd_x (pg, u, Off);
> +  svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
>  
>    /* y = log(1+r) + n*ln2.  */
> -  svfloat32_t r2 = svmul_f32_x (pg, r, r);
> +  svfloat32_t r2 = svmul_x (pg, r, r);
>    /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
> -  svfloat32_t p_0135 = svld1rq_f32 (svptrue_b32 (), &d->poly_0135[0]);
> -  svfloat32_t p = svmla_lane_f32 (sv_f32 (d->poly_246[0]), r, p_0135, 1);
> -  svfloat32_t q = svmla_lane_f32 (sv_f32 (d->poly_246[1]), r, p_0135, 2);
> -  svfloat32_t y = svmla_lane_f32 (sv_f32 (d->poly_246[2]), r, p_0135, 3);
> -  p = svmla_lane_f32 (p, r2, p_0135, 0);
> +  svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
> +  svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
> +  svfloat32_t q = svmla_lane (sv_f32 (d->poly_246[1]), r, p_0135, 2);
> +  svfloat32_t y = svmla_lane (sv_f32 (d->poly_246[2]), r, p_0135, 3);
> +  p = svmla_lane (p, r2, p_0135, 0);
>  
> -  q = svmla_f32_x (pg, q, r2, p);
> -  y = svmla_f32_x (pg, y, r2, q);
> -  p = svmla_n_f32_x (pg, r, n, d->ln2);
> -  y = svmla_f32_x (pg, p, r2, y);
> +  q = svmla_x (pg, q, r2, p);
> +  y = svmla_x (pg, y, r2, q);
> +  p = svmla_x (pg, r, n, d->ln2);
>  
>    if (__glibc_unlikely (svptest_any (pg, cmp)))
> -    return special_case (x, y, cmp);
> -  return y;
> +    return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
> +  return svmla_x (pg, p, r2, y);
>  }
> diff --git a/sysdeps/aarch64/fpu/v_log_data.c b/sysdeps/aarch64/fpu/v_log_data.c
> index 6fd6f43695..99506e3cde 100644
> --- a/sysdeps/aarch64/fpu/v_log_data.c
> +++ b/sysdeps/aarch64/fpu/v_log_data.c
> @@ -34,140 +34,140 @@ const struct v_log_data __v_log_data = {
>       N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
>       tables:
>  
> -	invc[i] = 1/c
> -	logc[i] = (double)log(c)
> +	table[i].invc = 1/c
> +	table[i].logc = (double)log(c)
>  
>       where c is near the center of the subinterval and is chosen by trying
>       several floating point invc candidates around 1/center and selecting one
>       for which the error in (double)log(c) is minimized (< 0x1p-74), except the
>       subinterval that contains 1 and the previous one got tweaked to avoid
>       cancellation.  */
> -  .invc = { 0x1.6a133d0dec120p+0, 0x1.6815f2f3e42edp+0,
> -	    0x1.661e39be1ac9ep+0, 0x1.642bfa30ac371p+0,
> -	    0x1.623f1d916f323p+0, 0x1.60578da220f65p+0,
> -	    0x1.5e75349dea571p+0, 0x1.5c97fd387a75ap+0,
> -	    0x1.5abfd2981f200p+0, 0x1.58eca051dc99cp+0,
> -	    0x1.571e526d9df12p+0, 0x1.5554d555b3fcbp+0,
> -	    0x1.539015e2a20cdp+0, 0x1.51d0014ee0164p+0,
> -	    0x1.50148538cd9eep+0, 0x1.4e5d8f9f698a1p+0,
> -	    0x1.4cab0edca66bep+0, 0x1.4afcf1a9db874p+0,
> -	    0x1.495327136e16fp+0, 0x1.47ad9e84af28fp+0,
> -	    0x1.460c47b39ae15p+0, 0x1.446f12b278001p+0,
> -	    0x1.42d5efdd720ecp+0, 0x1.4140cfe001a0fp+0,
> -	    0x1.3fafa3b421f69p+0, 0x1.3e225c9c8ece5p+0,
> -	    0x1.3c98ec29a211ap+0, 0x1.3b13442a413fep+0,
> -	    0x1.399156baa3c54p+0, 0x1.38131639b4cdbp+0,
> -	    0x1.36987540fbf53p+0, 0x1.352166b648f61p+0,
> -	    0x1.33adddb3eb575p+0, 0x1.323dcd99fc1d3p+0,
> -	    0x1.30d129fefc7d2p+0, 0x1.2f67e6b72fe7dp+0,
> -	    0x1.2e01f7cf8b187p+0, 0x1.2c9f518ddc86ep+0,
> -	    0x1.2b3fe86e5f413p+0, 0x1.29e3b1211b25cp+0,
> -	    0x1.288aa08b373cfp+0, 0x1.2734abcaa8467p+0,
> -	    0x1.25e1c82459b81p+0, 0x1.2491eb1ad59c5p+0,
> -	    0x1.23450a54048b5p+0, 0x1.21fb1bb09e578p+0,
> -	    0x1.20b415346d8f7p+0, 0x1.1f6fed179a1acp+0,
> -	    0x1.1e2e99b93c7b3p+0, 0x1.1cf011a7a882ap+0,
> -	    0x1.1bb44b97dba5ap+0, 0x1.1a7b3e66cdd4fp+0,
> -	    0x1.1944e11dc56cdp+0, 0x1.18112aebb1a6ep+0,
> -	    0x1.16e013231b7e9p+0, 0x1.15b1913f156cfp+0,
> -	    0x1.14859cdedde13p+0, 0x1.135c2dc68cfa4p+0,
> -	    0x1.12353bdb01684p+0, 0x1.1110bf25b85b4p+0,
> -	    0x1.0feeafd2f8577p+0, 0x1.0ecf062c51c3bp+0,
> -	    0x1.0db1baa076c8bp+0, 0x1.0c96c5bb3048ep+0,
> -	    0x1.0b7e20263e070p+0, 0x1.0a67c2acd0ce3p+0,
> -	    0x1.0953a6391e982p+0, 0x1.0841c3caea380p+0,
> -	    0x1.07321489b13eap+0, 0x1.062491aee9904p+0,
> -	    0x1.05193497a7cc5p+0, 0x1.040ff6b5f5e9fp+0,
> -	    0x1.0308d19aa6127p+0, 0x1.0203beedb0c67p+0,
> -	    0x1.010037d38bcc2p+0, 1.0,
> -	    0x1.fc06d493cca10p-1, 0x1.f81e6ac3b918fp-1,
> -	    0x1.f44546ef18996p-1, 0x1.f07b10382c84bp-1,
> -	    0x1.ecbf7070e59d4p-1, 0x1.e91213f715939p-1,
> -	    0x1.e572a9a75f7b7p-1, 0x1.e1e0e2c530207p-1,
> -	    0x1.de5c72d8a8be3p-1, 0x1.dae50fa5658ccp-1,
> -	    0x1.d77a71145a2dap-1, 0x1.d41c51166623ep-1,
> -	    0x1.d0ca6ba0bb29fp-1, 0x1.cd847e8e59681p-1,
> -	    0x1.ca4a499693e00p-1, 0x1.c71b8e399e821p-1,
> -	    0x1.c3f80faf19077p-1, 0x1.c0df92dc2b0ecp-1,
> -	    0x1.bdd1de3cbb542p-1, 0x1.baceb9e1007a3p-1,
> -	    0x1.b7d5ef543e55ep-1, 0x1.b4e749977d953p-1,
> -	    0x1.b20295155478ep-1, 0x1.af279f8e82be2p-1,
> -	    0x1.ac5638197fdf3p-1, 0x1.a98e2f102e087p-1,
> -	    0x1.a6cf5606d05c1p-1, 0x1.a4197fc04d746p-1,
> -	    0x1.a16c80293dc01p-1, 0x1.9ec82c4dc5bc9p-1,
> -	    0x1.9c2c5a491f534p-1, 0x1.9998e1480b618p-1,
> -	    0x1.970d9977c6c2dp-1, 0x1.948a5c023d212p-1,
> -	    0x1.920f0303d6809p-1, 0x1.8f9b698a98b45p-1,
> -	    0x1.8d2f6b81726f6p-1, 0x1.8acae5bb55badp-1,
> -	    0x1.886db5d9275b8p-1, 0x1.8617ba567c13cp-1,
> -	    0x1.83c8d27487800p-1, 0x1.8180de3c5dbe7p-1,
> -	    0x1.7f3fbe71cdb71p-1, 0x1.7d055498071c1p-1,
> -	    0x1.7ad182e54f65ap-1, 0x1.78a42c3c90125p-1,
> -	    0x1.767d342f76944p-1, 0x1.745c7ef26b00ap-1,
> -	    0x1.7241f15769d0fp-1, 0x1.702d70d396e41p-1,
> -	    0x1.6e1ee3700cd11p-1, 0x1.6c162fc9cbe02p-1 },
> -  .logc = { -0x1.62fe995eb963ap-2, -0x1.5d5a48dad6b67p-2,
> -	    -0x1.57bde257d2769p-2, -0x1.52294fbf2af55p-2,
> -	    -0x1.4c9c7b598aa38p-2, -0x1.47174fc5ff560p-2,
> -	    -0x1.4199b7fa7b5cap-2, -0x1.3c239f48cfb99p-2,
> -	    -0x1.36b4f154d2aebp-2, -0x1.314d9a0ff32fbp-2,
> -	    -0x1.2bed85cca3cffp-2, -0x1.2694a11421af9p-2,
> -	    -0x1.2142d8d014fb2p-2, -0x1.1bf81a2c77776p-2,
> -	    -0x1.16b452a39c6a4p-2, -0x1.11776ffa6c67ep-2,
> -	    -0x1.0c416035020e0p-2, -0x1.071211aa10fdap-2,
> -	    -0x1.01e972e293b1bp-2, -0x1.f98ee587fd434p-3,
> -	    -0x1.ef5800ad716fbp-3, -0x1.e52e160484698p-3,
> -	    -0x1.db1104b19352ep-3, -0x1.d100ac59e0bd6p-3,
> -	    -0x1.c6fced287c3bdp-3, -0x1.bd05a7b317c29p-3,
> -	    -0x1.b31abd229164fp-3, -0x1.a93c0edadb0a3p-3,
> -	    -0x1.9f697ee30d7ddp-3, -0x1.95a2efa9aa40ap-3,
> -	    -0x1.8be843d796044p-3, -0x1.82395ecc477edp-3,
> -	    -0x1.7896240966422p-3, -0x1.6efe77aca8c55p-3,
> -	    -0x1.65723e117ec5cp-3, -0x1.5bf15c0955706p-3,
> -	    -0x1.527bb6c111da1p-3, -0x1.491133c939f8fp-3,
> -	    -0x1.3fb1b90c7fc58p-3, -0x1.365d2cc485f8dp-3,
> -	    -0x1.2d13758970de7p-3, -0x1.23d47a721fd47p-3,
> -	    -0x1.1aa0229f25ec2p-3, -0x1.117655ddebc3bp-3,
> -	    -0x1.0856fbf83ab6bp-3, -0x1.fe83fabbaa106p-4,
> -	    -0x1.ec6e8507a56cdp-4, -0x1.da6d68c7cc2eap-4,
> -	    -0x1.c88078462be0cp-4, -0x1.b6a786a423565p-4,
> -	    -0x1.a4e2676ac7f85p-4, -0x1.9330eea777e76p-4,
> -	    -0x1.8192f134d5ad9p-4, -0x1.70084464f0538p-4,
> -	    -0x1.5e90bdec5cb1fp-4, -0x1.4d2c3433c5536p-4,
> -	    -0x1.3bda7e219879ap-4, -0x1.2a9b732d27194p-4,
> -	    -0x1.196eeb2b10807p-4, -0x1.0854be8ef8a7ep-4,
> -	    -0x1.ee998cb277432p-5, -0x1.ccadb79919fb9p-5,
> -	    -0x1.aae5b1d8618b0p-5, -0x1.89413015d7442p-5,
> -	    -0x1.67bfe7bf158dep-5, -0x1.46618f83941bep-5,
> -	    -0x1.2525df1b0618ap-5, -0x1.040c8e2f77c6ap-5,
> -	    -0x1.c62aad39f738ap-6, -0x1.847fe3bdead9cp-6,
> -	    -0x1.43183683400acp-6, -0x1.01f31c4e1d544p-6,
> -	    -0x1.82201d1e6b69ap-7, -0x1.00dd0f3e1bfd6p-7,
> -	    -0x1.ff6fe1feb4e53p-9, 0.0,
> -	    0x1.fe91885ec8e20p-8,  0x1.fc516f716296dp-7,
> -	    0x1.7bb4dd70a015bp-6,  0x1.f84c99b34b674p-6,
> -	    0x1.39f9ce4fb2d71p-5,  0x1.7756c0fd22e78p-5,
> -	    0x1.b43ee82db8f3ap-5,  0x1.f0b3fced60034p-5,
> -	    0x1.165bd78d4878ep-4,  0x1.3425d2715ebe6p-4,
> -	    0x1.51b8bd91b7915p-4,  0x1.6f15632c76a47p-4,
> -	    0x1.8c3c88ecbe503p-4,  0x1.a92ef077625dap-4,
> -	    0x1.c5ed5745fa006p-4,  0x1.e27876de1c993p-4,
> -	    0x1.fed104fce4cdcp-4,  0x1.0d7bd9c17d78bp-3,
> -	    0x1.1b76986cef97bp-3,  0x1.295913d24f750p-3,
> -	    0x1.37239fa295d17p-3,  0x1.44d68dd78714bp-3,
> -	    0x1.52722ebe5d780p-3,  0x1.5ff6d12671f98p-3,
> -	    0x1.6d64c2389484bp-3,  0x1.7abc4da40fddap-3,
> -	    0x1.87fdbda1e8452p-3,  0x1.95295b06a5f37p-3,
> -	    0x1.a23f6d34abbc5p-3,  0x1.af403a28e04f2p-3,
> -	    0x1.bc2c06a85721ap-3,  0x1.c903161240163p-3,
> -	    0x1.d5c5aa93287ebp-3,  0x1.e274051823fa9p-3,
> -	    0x1.ef0e656300c16p-3,  0x1.fb9509f05aa2ap-3,
> -	    0x1.04041821f37afp-2,  0x1.0a340a49b3029p-2,
> -	    0x1.105a7918a126dp-2,  0x1.1677819812b84p-2,
> -	    0x1.1c8b405b40c0ep-2,  0x1.2295d16cfa6b1p-2,
> -	    0x1.28975066318a2p-2,  0x1.2e8fd855d86fcp-2,
> -	    0x1.347f83d605e59p-2,  0x1.3a666d1244588p-2,
> -	    0x1.4044adb6f8ec4p-2,  0x1.461a5f077558cp-2,
> -	    0x1.4be799e20b9c8p-2,  0x1.51ac76a6b79dfp-2,
> -	    0x1.57690d5744a45p-2,  0x1.5d1d758e45217p-2 }
> +  .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
> +	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
> +	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
> +	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
> +	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
> +	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
> +	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
> +	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
> +	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
> +	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
> +	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
> +	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
> +	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
> +	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
> +	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
> +	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
> +	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
> +	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
> +	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
> +	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
> +	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
> +	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
> +	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
> +	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
> +	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
> +	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
> +	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
> +	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
> +	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
> +	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
> +	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
> +	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
> +	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
> +	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
> +	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
> +	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
> +	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
> +	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
> +	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
> +	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
> +	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
> +	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
> +	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
> +	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
> +	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
> +	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
> +	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
> +	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
> +	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
> +	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
> +	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
> +	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
> +	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
> +	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
> +	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
> +	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
> +	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
> +	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
> +	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
> +	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
> +	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
> +	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
> +	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
> +	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
> +	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
> +	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
> +	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
> +	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
> +	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
> +	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
> +	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
> +	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
> +	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
> +	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
> +	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
> +	     { 1.0, 0.0 },
> +	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
> +	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
> +	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
> +	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
> +	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
> +	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
> +	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
> +	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
> +	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
> +	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
> +	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
> +	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
> +	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
> +	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
> +	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
> +	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
> +	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
> +	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
> +	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
> +	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
> +	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
> +	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
> +	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
> +	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
> +	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
> +	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
> +	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
> +	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
> +	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
> +	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
> +	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
> +	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
> +	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
> +	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
> +	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
> +	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
> +	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
> +	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
> +	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
> +	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
> +	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
> +	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
> +	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
> +	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
> +	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
> +	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
> +	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
> +	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
> +	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
> +	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
> +	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
> +	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
>  };
> diff --git a/sysdeps/aarch64/fpu/v_math.h b/sysdeps/aarch64/fpu/v_math.h
> index 43efd8f99d..cfc87f8dd0 100644
> --- a/sysdeps/aarch64/fpu/v_math.h
> +++ b/sysdeps/aarch64/fpu/v_math.h
> @@ -30,15 +30,15 @@
>  #define V_NAME_D2(fun) _ZGVnN2vv_##fun
>  
>  /* Shorthand helpers for declaring constants.  */
> -#define V2(x)                                                                  \
> -  {                                                                            \
> -    x, x                                                                       \
> -  }
> +#define V2(X) { X, X }
> +#define V4(X) { X, X, X, X }
> +#define V8(X) { X, X, X, X, X, X, X, X }
>  
> -#define V4(x)                                                                  \
> -  {                                                                            \
> -    x, x, x, x                                                                 \
> -  }
> +static inline int
> +v_any_u16h (uint16x4_t x)
> +{
> +  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
> +}
>  
>  static inline float32x4_t
>  v_f32 (float x)
> @@ -63,6 +63,11 @@ v_any_u32 (uint32x4_t x)
>    /* assume elements in x are either 0 or -1u.  */
>    return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
>  }
> +static inline int
> +v_any_u32h (uint32x2_t x)
> +{
> +  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
> +}
>  static inline float32x4_t
>  v_lookup_f32 (const float *tab, uint32x4_t idx)
>  {
> diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h
> index e7d30b477f..0abfd8b701 100644
> --- a/sysdeps/aarch64/fpu/vecmath_config.h
> +++ b/sysdeps/aarch64/fpu/vecmath_config.h
> @@ -42,8 +42,10 @@ extern const struct v_log_data
>    /* Shared data for vector log and log-derived routines (e.g. asinh).  */
>    double poly[V_LOG_POLY_ORDER - 1];
>    double ln2;
> -  double invc[1 << V_LOG_TABLE_BITS];
> -  double logc[1 << V_LOG_TABLE_BITS];
> +  struct
> +  {
> +    double invc, logc;
> +  } table[1 << V_LOG_TABLE_BITS];
>  } __v_log_data attribute_hidden;
>  
>  #define V_EXP_TABLE_BITS 7
> -- 
> 2.27.0
>
  

Patch

diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
index 434737f2a9..8b32d1cfe1 100644
--- a/sysdeps/aarch64/fpu/log_advsimd.c
+++ b/sysdeps/aarch64/fpu/log_advsimd.c
@@ -21,9 +21,11 @@ 
 
 static const struct data
 {
+  uint64x2_t min_norm;
+  uint32x4_t special_bound;
   float64x2_t poly[5];
   float64x2_t ln2;
-  uint64x2_t min_norm, special_bound, sign_exp_mask;
+  uint64x2_t sign_exp_mask;
 } data = {
   /* Worst-case error: 1.17 + 0.5 ulp.
      Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
@@ -32,7 +34,7 @@  static const struct data
 	    V2 (-0x1.554e550bd501ep-3) },
   .ln2 = V2 (0x1.62e42fefa39efp-1),
   .min_norm = V2 (0x0010000000000000),
-  .special_bound = V2 (0x7fe0000000000000), /* asuint64(inf) - min_norm.  */
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm.  */
   .sign_exp_mask = V2 (0xfff0000000000000)
 };
 
@@ -52,29 +54,34 @@  lookup (uint64x2_t i)
 {
   /* Since N is a power of 2, n % N = n & (N - 1).  */
   struct entry e;
-  e.invc[0] = __v_log_data.invc[i[0] & IndexMask];
-  e.logc[0] = __v_log_data.logc[i[0] & IndexMask];
-  e.invc[1] = __v_log_data.invc[i[1] & IndexMask];
-  e.logc[1] = __v_log_data.logc[i[1] & IndexMask];
+  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
   return e;
 }
 
 static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
+	      uint32x2_t cmp)
 {
-  return v_call_f64 (log, x, y, cmp);
+  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp));
 }
 
 float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float64x2_t z, r, r2, p, y, kd, hi;
-  uint64x2_t ix, iz, tmp, cmp;
+  uint64x2_t ix, iz, tmp;
+  uint32x2_t cmp;
   int64x2_t k;
   struct entry e;
 
   ix = vreinterpretq_u64_f64 (x);
-  cmp = vcgeq_u64 (vsubq_u64 (ix, d->min_norm), d->special_bound);
+  cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
+		  vget_low_u32 (d->special_bound));
 
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
@@ -83,7 +90,7 @@  float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
   k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
   iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
   z = vreinterpretq_f64_u64 (iz);
-  e = lookup (vshrq_n_u64 (tmp, 52 - V_LOG_TABLE_BITS));
+  e = lookup (tmp);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
@@ -97,9 +104,8 @@  float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
   p = vfmaq_f64 (A (0), A (1), r);
   y = vfmaq_f64 (y, A (4), r2);
   y = vfmaq_f64 (p, y, r2);
-  y = vfmaq_f64 (hi, y, r2);
 
-  if (__glibc_unlikely (v_any_u64 (cmp)))
-    return special_case (x, y, cmp);
-  return y;
+  if (__glibc_unlikely (v_any_u32h (cmp)))
+    return special_case (x, y, hi, r2, cmp);
+  return vfmaq_f64 (hi, y, r2);
 }
diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c
index 93c4f1c018..0c171a4d01 100644
--- a/sysdeps/aarch64/fpu/log_sve.c
+++ b/sysdeps/aarch64/fpu/log_sve.c
@@ -38,43 +38,39 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
 					 want 0x1.ffffff1cca045p-2.  */
 svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
 {
-  svuint64_t ix = svreinterpret_u64_f64 (x);
-  svuint64_t top = svlsr_n_u64_x (pg, ix, 52);
-  svbool_t cmp
-      = svcmpge_u64 (pg, svsub_n_u64_x (pg, top, MinTop), sv_u64 (ThreshTop));
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t top = svlsr_x (pg, ix, 52);
+  svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop));
 
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_n_u64_x (pg, ix, Off);
-  /* Equivalent to (tmp >> (52 - V_LOG_TABLE_BITS)) % N, since N is a power
-     of 2.  */
-  svuint64_t i = svand_n_u64_x (
-      pg, svlsr_n_u64_x (pg, tmp, (52 - V_LOG_TABLE_BITS)), N - 1);
-  svint64_t k = svasr_n_s64_x (pg, svreinterpret_s64_u64 (tmp),
-			       52); /* Arithmetic shift.  */
-  svuint64_t iz
-      = svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
-  svfloat64_t z = svreinterpret_f64_u64 (iz);
+  svuint64_t tmp = svsub_x (pg, ix, Off);
+  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+     The actual value of i is double this due to table layout.  */
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+  svint64_t k
+      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64 (iz);
   /* Lookup in 2 global lists (length N).  */
-  svfloat64_t invc = svld1_gather_u64index_f64 (pg, __v_log_data.invc, i);
-  svfloat64_t logc = svld1_gather_u64index_f64 (pg, __v_log_data.logc, i);
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  svfloat64_t r = svmad_n_f64_x (pg, invc, z, -1);
-  svfloat64_t kd = svcvt_f64_s64_x (pg, k);
+  svfloat64_t r = svmad_x (pg, invc, z, -1);
+  svfloat64_t kd = svcvt_f64_x (pg, k);
   /* hi = r + log(c) + k*Ln2.  */
-  svfloat64_t hi
-      = svmla_n_f64_x (pg, svadd_f64_x (pg, logc, r), kd, __v_log_data.ln2);
+  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
   /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  svfloat64_t r2 = svmul_f64_x (pg, r, r);
-  svfloat64_t y = svmla_f64_x (pg, P (2), r, P (3));
-  svfloat64_t p = svmla_f64_x (pg, P (0), r, P (1));
-  y = svmla_f64_x (pg, y, r2, P (4));
-  y = svmla_f64_x (pg, p, r2, y);
-  y = svmla_f64_x (pg, hi, r2, y);
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
+  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
+  y = svmla_x (pg, y, r2, P (4));
+  y = svmla_x (pg, p, r2, y);
 
   if (__glibc_unlikely (svptest_any (pg, cmp)))
-    return special_case (x, y, cmp);
-  return y;
+    return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp);
+  return svmla_x (pg, hi, r2, y);
 }
diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c
index 375ad28b9f..93903c7962 100644
--- a/sysdeps/aarch64/fpu/logf_advsimd.c
+++ b/sysdeps/aarch64/fpu/logf_advsimd.c
@@ -21,9 +21,11 @@ 
 
 static const struct data
 {
+  uint32x4_t min_norm;
+  uint16x8_t special_bound;
   float32x4_t poly[7];
   float32x4_t ln2, tiny_bound;
-  uint32x4_t min_norm, special_bound, off, mantissa_mask;
+  uint32x4_t off, mantissa_mask;
 } data = {
   /* 3.34 ulp error.  */
   .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
@@ -32,28 +34,31 @@  static const struct data
   .ln2 = V4 (0x1.62e43p-1f),
   .tiny_bound = V4 (0x1p-126),
   .min_norm = V4 (0x00800000),
-  .special_bound = V4 (0x7f000000), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	    /* 0.666667.  */
+  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
   .mantissa_mask = V4 (0x007fffff)
 };
 
 #define P(i) d->poly[7 - i]
 
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
+	      uint16x4_t cmp)
 {
   /* Fall back to scalar code.  */
-  return v_call_f32 (logf, x, y, cmp);
+  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
 }
 
 float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, p, q, r, r2, y;
-  uint32x4_t u, cmp;
+  uint32x4_t u;
+  uint16x4_t cmp;
 
   u = vreinterpretq_u32_f32 (x);
-  cmp = vcgeq_u32 (vsubq_u32 (u, d->min_norm), d->special_bound);
+  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
+		  vget_low_u16 (d->special_bound));
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
   u = vsubq_u32 (u, d->off);
@@ -73,9 +78,8 @@  float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
   q = vfmaq_f32 (q, p, r2);
   y = vfmaq_f32 (y, q, r2);
   p = vfmaq_f32 (r, d->ln2, n);
-  y = vfmaq_f32 (p, y, r2);
 
-  if (__glibc_unlikely (v_any_u32 (cmp)))
-    return special_case (x, y, cmp);
-  return y;
+  if (__glibc_unlikely (v_any_u16h (cmp)))
+    return special_case (x, y, r2, p, cmp);
+  return vfmaq_f32 (p, y, r2);
 }
diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
index 46c6e7c461..c02761188a 100644
--- a/sysdeps/aarch64/fpu/logf_sve.c
+++ b/sysdeps/aarch64/fpu/logf_sve.c
@@ -55,33 +55,31 @@  svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32_f32 (x);
-  svbool_t cmp = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, u, Min), Thresh);
+  svuint32_t u = svreinterpret_u32 (x);
+  svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_n_u32_x (pg, u, Off);
-  svfloat32_t n
-      = svcvt_f32_s32_x (pg, svasr_n_s32_x (pg, svreinterpret_s32_u32 (u),
-					    23)); /* Sign-extend.  */
-  u = svand_n_u32_x (pg, u, Mask);
-  u = svadd_n_u32_x (pg, u, Off);
-  svfloat32_t r = svsub_n_f32_x (pg, svreinterpret_f32_u32 (u), 1.0f);
+  u = svsub_x (pg, u, Off);
+  svfloat32_t n = svcvt_f32_x (
+      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
+  u = svand_x (pg, u, Mask);
+  u = svadd_x (pg, u, Off);
+  svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log(1+r) + n*ln2.  */
-  svfloat32_t r2 = svmul_f32_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (pg, r, r);
   /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
-  svfloat32_t p_0135 = svld1rq_f32 (svptrue_b32 (), &d->poly_0135[0]);
-  svfloat32_t p = svmla_lane_f32 (sv_f32 (d->poly_246[0]), r, p_0135, 1);
-  svfloat32_t q = svmla_lane_f32 (sv_f32 (d->poly_246[1]), r, p_0135, 2);
-  svfloat32_t y = svmla_lane_f32 (sv_f32 (d->poly_246[2]), r, p_0135, 3);
-  p = svmla_lane_f32 (p, r2, p_0135, 0);
+  svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
+  svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
+  svfloat32_t q = svmla_lane (sv_f32 (d->poly_246[1]), r, p_0135, 2);
+  svfloat32_t y = svmla_lane (sv_f32 (d->poly_246[2]), r, p_0135, 3);
+  p = svmla_lane (p, r2, p_0135, 0);
 
-  q = svmla_f32_x (pg, q, r2, p);
-  y = svmla_f32_x (pg, y, r2, q);
-  p = svmla_n_f32_x (pg, r, n, d->ln2);
-  y = svmla_f32_x (pg, p, r2, y);
+  q = svmla_x (pg, q, r2, p);
+  y = svmla_x (pg, y, r2, q);
+  p = svmla_x (pg, r, n, d->ln2);
 
   if (__glibc_unlikely (svptest_any (pg, cmp)))
-    return special_case (x, y, cmp);
-  return y;
+    return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+  return svmla_x (pg, p, r2, y);
 }
diff --git a/sysdeps/aarch64/fpu/v_log_data.c b/sysdeps/aarch64/fpu/v_log_data.c
index 6fd6f43695..99506e3cde 100644
--- a/sysdeps/aarch64/fpu/v_log_data.c
+++ b/sysdeps/aarch64/fpu/v_log_data.c
@@ -34,140 +34,140 @@  const struct v_log_data __v_log_data = {
      N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
      tables:
 
-	invc[i] = 1/c
-	logc[i] = (double)log(c)
+	table[i].invc = 1/c
+	table[i].logc = (double)log(c)
 
      where c is near the center of the subinterval and is chosen by trying
      several floating point invc candidates around 1/center and selecting one
      for which the error in (double)log(c) is minimized (< 0x1p-74), except the
      subinterval that contains 1 and the previous one got tweaked to avoid
      cancellation.  */
-  .invc = { 0x1.6a133d0dec120p+0, 0x1.6815f2f3e42edp+0,
-	    0x1.661e39be1ac9ep+0, 0x1.642bfa30ac371p+0,
-	    0x1.623f1d916f323p+0, 0x1.60578da220f65p+0,
-	    0x1.5e75349dea571p+0, 0x1.5c97fd387a75ap+0,
-	    0x1.5abfd2981f200p+0, 0x1.58eca051dc99cp+0,
-	    0x1.571e526d9df12p+0, 0x1.5554d555b3fcbp+0,
-	    0x1.539015e2a20cdp+0, 0x1.51d0014ee0164p+0,
-	    0x1.50148538cd9eep+0, 0x1.4e5d8f9f698a1p+0,
-	    0x1.4cab0edca66bep+0, 0x1.4afcf1a9db874p+0,
-	    0x1.495327136e16fp+0, 0x1.47ad9e84af28fp+0,
-	    0x1.460c47b39ae15p+0, 0x1.446f12b278001p+0,
-	    0x1.42d5efdd720ecp+0, 0x1.4140cfe001a0fp+0,
-	    0x1.3fafa3b421f69p+0, 0x1.3e225c9c8ece5p+0,
-	    0x1.3c98ec29a211ap+0, 0x1.3b13442a413fep+0,
-	    0x1.399156baa3c54p+0, 0x1.38131639b4cdbp+0,
-	    0x1.36987540fbf53p+0, 0x1.352166b648f61p+0,
-	    0x1.33adddb3eb575p+0, 0x1.323dcd99fc1d3p+0,
-	    0x1.30d129fefc7d2p+0, 0x1.2f67e6b72fe7dp+0,
-	    0x1.2e01f7cf8b187p+0, 0x1.2c9f518ddc86ep+0,
-	    0x1.2b3fe86e5f413p+0, 0x1.29e3b1211b25cp+0,
-	    0x1.288aa08b373cfp+0, 0x1.2734abcaa8467p+0,
-	    0x1.25e1c82459b81p+0, 0x1.2491eb1ad59c5p+0,
-	    0x1.23450a54048b5p+0, 0x1.21fb1bb09e578p+0,
-	    0x1.20b415346d8f7p+0, 0x1.1f6fed179a1acp+0,
-	    0x1.1e2e99b93c7b3p+0, 0x1.1cf011a7a882ap+0,
-	    0x1.1bb44b97dba5ap+0, 0x1.1a7b3e66cdd4fp+0,
-	    0x1.1944e11dc56cdp+0, 0x1.18112aebb1a6ep+0,
-	    0x1.16e013231b7e9p+0, 0x1.15b1913f156cfp+0,
-	    0x1.14859cdedde13p+0, 0x1.135c2dc68cfa4p+0,
-	    0x1.12353bdb01684p+0, 0x1.1110bf25b85b4p+0,
-	    0x1.0feeafd2f8577p+0, 0x1.0ecf062c51c3bp+0,
-	    0x1.0db1baa076c8bp+0, 0x1.0c96c5bb3048ep+0,
-	    0x1.0b7e20263e070p+0, 0x1.0a67c2acd0ce3p+0,
-	    0x1.0953a6391e982p+0, 0x1.0841c3caea380p+0,
-	    0x1.07321489b13eap+0, 0x1.062491aee9904p+0,
-	    0x1.05193497a7cc5p+0, 0x1.040ff6b5f5e9fp+0,
-	    0x1.0308d19aa6127p+0, 0x1.0203beedb0c67p+0,
-	    0x1.010037d38bcc2p+0, 1.0,
-	    0x1.fc06d493cca10p-1, 0x1.f81e6ac3b918fp-1,
-	    0x1.f44546ef18996p-1, 0x1.f07b10382c84bp-1,
-	    0x1.ecbf7070e59d4p-1, 0x1.e91213f715939p-1,
-	    0x1.e572a9a75f7b7p-1, 0x1.e1e0e2c530207p-1,
-	    0x1.de5c72d8a8be3p-1, 0x1.dae50fa5658ccp-1,
-	    0x1.d77a71145a2dap-1, 0x1.d41c51166623ep-1,
-	    0x1.d0ca6ba0bb29fp-1, 0x1.cd847e8e59681p-1,
-	    0x1.ca4a499693e00p-1, 0x1.c71b8e399e821p-1,
-	    0x1.c3f80faf19077p-1, 0x1.c0df92dc2b0ecp-1,
-	    0x1.bdd1de3cbb542p-1, 0x1.baceb9e1007a3p-1,
-	    0x1.b7d5ef543e55ep-1, 0x1.b4e749977d953p-1,
-	    0x1.b20295155478ep-1, 0x1.af279f8e82be2p-1,
-	    0x1.ac5638197fdf3p-1, 0x1.a98e2f102e087p-1,
-	    0x1.a6cf5606d05c1p-1, 0x1.a4197fc04d746p-1,
-	    0x1.a16c80293dc01p-1, 0x1.9ec82c4dc5bc9p-1,
-	    0x1.9c2c5a491f534p-1, 0x1.9998e1480b618p-1,
-	    0x1.970d9977c6c2dp-1, 0x1.948a5c023d212p-1,
-	    0x1.920f0303d6809p-1, 0x1.8f9b698a98b45p-1,
-	    0x1.8d2f6b81726f6p-1, 0x1.8acae5bb55badp-1,
-	    0x1.886db5d9275b8p-1, 0x1.8617ba567c13cp-1,
-	    0x1.83c8d27487800p-1, 0x1.8180de3c5dbe7p-1,
-	    0x1.7f3fbe71cdb71p-1, 0x1.7d055498071c1p-1,
-	    0x1.7ad182e54f65ap-1, 0x1.78a42c3c90125p-1,
-	    0x1.767d342f76944p-1, 0x1.745c7ef26b00ap-1,
-	    0x1.7241f15769d0fp-1, 0x1.702d70d396e41p-1,
-	    0x1.6e1ee3700cd11p-1, 0x1.6c162fc9cbe02p-1 },
-  .logc = { -0x1.62fe995eb963ap-2, -0x1.5d5a48dad6b67p-2,
-	    -0x1.57bde257d2769p-2, -0x1.52294fbf2af55p-2,
-	    -0x1.4c9c7b598aa38p-2, -0x1.47174fc5ff560p-2,
-	    -0x1.4199b7fa7b5cap-2, -0x1.3c239f48cfb99p-2,
-	    -0x1.36b4f154d2aebp-2, -0x1.314d9a0ff32fbp-2,
-	    -0x1.2bed85cca3cffp-2, -0x1.2694a11421af9p-2,
-	    -0x1.2142d8d014fb2p-2, -0x1.1bf81a2c77776p-2,
-	    -0x1.16b452a39c6a4p-2, -0x1.11776ffa6c67ep-2,
-	    -0x1.0c416035020e0p-2, -0x1.071211aa10fdap-2,
-	    -0x1.01e972e293b1bp-2, -0x1.f98ee587fd434p-3,
-	    -0x1.ef5800ad716fbp-3, -0x1.e52e160484698p-3,
-	    -0x1.db1104b19352ep-3, -0x1.d100ac59e0bd6p-3,
-	    -0x1.c6fced287c3bdp-3, -0x1.bd05a7b317c29p-3,
-	    -0x1.b31abd229164fp-3, -0x1.a93c0edadb0a3p-3,
-	    -0x1.9f697ee30d7ddp-3, -0x1.95a2efa9aa40ap-3,
-	    -0x1.8be843d796044p-3, -0x1.82395ecc477edp-3,
-	    -0x1.7896240966422p-3, -0x1.6efe77aca8c55p-3,
-	    -0x1.65723e117ec5cp-3, -0x1.5bf15c0955706p-3,
-	    -0x1.527bb6c111da1p-3, -0x1.491133c939f8fp-3,
-	    -0x1.3fb1b90c7fc58p-3, -0x1.365d2cc485f8dp-3,
-	    -0x1.2d13758970de7p-3, -0x1.23d47a721fd47p-3,
-	    -0x1.1aa0229f25ec2p-3, -0x1.117655ddebc3bp-3,
-	    -0x1.0856fbf83ab6bp-3, -0x1.fe83fabbaa106p-4,
-	    -0x1.ec6e8507a56cdp-4, -0x1.da6d68c7cc2eap-4,
-	    -0x1.c88078462be0cp-4, -0x1.b6a786a423565p-4,
-	    -0x1.a4e2676ac7f85p-4, -0x1.9330eea777e76p-4,
-	    -0x1.8192f134d5ad9p-4, -0x1.70084464f0538p-4,
-	    -0x1.5e90bdec5cb1fp-4, -0x1.4d2c3433c5536p-4,
-	    -0x1.3bda7e219879ap-4, -0x1.2a9b732d27194p-4,
-	    -0x1.196eeb2b10807p-4, -0x1.0854be8ef8a7ep-4,
-	    -0x1.ee998cb277432p-5, -0x1.ccadb79919fb9p-5,
-	    -0x1.aae5b1d8618b0p-5, -0x1.89413015d7442p-5,
-	    -0x1.67bfe7bf158dep-5, -0x1.46618f83941bep-5,
-	    -0x1.2525df1b0618ap-5, -0x1.040c8e2f77c6ap-5,
-	    -0x1.c62aad39f738ap-6, -0x1.847fe3bdead9cp-6,
-	    -0x1.43183683400acp-6, -0x1.01f31c4e1d544p-6,
-	    -0x1.82201d1e6b69ap-7, -0x1.00dd0f3e1bfd6p-7,
-	    -0x1.ff6fe1feb4e53p-9, 0.0,
-	    0x1.fe91885ec8e20p-8,  0x1.fc516f716296dp-7,
-	    0x1.7bb4dd70a015bp-6,  0x1.f84c99b34b674p-6,
-	    0x1.39f9ce4fb2d71p-5,  0x1.7756c0fd22e78p-5,
-	    0x1.b43ee82db8f3ap-5,  0x1.f0b3fced60034p-5,
-	    0x1.165bd78d4878ep-4,  0x1.3425d2715ebe6p-4,
-	    0x1.51b8bd91b7915p-4,  0x1.6f15632c76a47p-4,
-	    0x1.8c3c88ecbe503p-4,  0x1.a92ef077625dap-4,
-	    0x1.c5ed5745fa006p-4,  0x1.e27876de1c993p-4,
-	    0x1.fed104fce4cdcp-4,  0x1.0d7bd9c17d78bp-3,
-	    0x1.1b76986cef97bp-3,  0x1.295913d24f750p-3,
-	    0x1.37239fa295d17p-3,  0x1.44d68dd78714bp-3,
-	    0x1.52722ebe5d780p-3,  0x1.5ff6d12671f98p-3,
-	    0x1.6d64c2389484bp-3,  0x1.7abc4da40fddap-3,
-	    0x1.87fdbda1e8452p-3,  0x1.95295b06a5f37p-3,
-	    0x1.a23f6d34abbc5p-3,  0x1.af403a28e04f2p-3,
-	    0x1.bc2c06a85721ap-3,  0x1.c903161240163p-3,
-	    0x1.d5c5aa93287ebp-3,  0x1.e274051823fa9p-3,
-	    0x1.ef0e656300c16p-3,  0x1.fb9509f05aa2ap-3,
-	    0x1.04041821f37afp-2,  0x1.0a340a49b3029p-2,
-	    0x1.105a7918a126dp-2,  0x1.1677819812b84p-2,
-	    0x1.1c8b405b40c0ep-2,  0x1.2295d16cfa6b1p-2,
-	    0x1.28975066318a2p-2,  0x1.2e8fd855d86fcp-2,
-	    0x1.347f83d605e59p-2,  0x1.3a666d1244588p-2,
-	    0x1.4044adb6f8ec4p-2,  0x1.461a5f077558cp-2,
-	    0x1.4be799e20b9c8p-2,  0x1.51ac76a6b79dfp-2,
-	    0x1.57690d5744a45p-2,  0x1.5d1d758e45217p-2 }
+  .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
+	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
+	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
+	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
+	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
+	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
+	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
+	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
+	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
+	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
+	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
+	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
+	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
+	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
+	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
+	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
+	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
+	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
+	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
+	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
+	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
+	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
+	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
+	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
+	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
+	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
+	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
+	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
+	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
+	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
+	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
+	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
+	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
+	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
+	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
+	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
+	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
+	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
+	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
+	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
+	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
+	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
+	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
+	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
+	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
+	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
+	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
+	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
+	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
+	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
+	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
+	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
+	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
+	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
+	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
+	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
+	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
+	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
+	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
+	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
+	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
+	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
+	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
+	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
+	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
+	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
+	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
+	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
+	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
+	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
+	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
+	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
+	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
+	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
+	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
+	     { 1.0, 0.0 },
+	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
+	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
+	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
+	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
+	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
+	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
+	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
+	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
+	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
+	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
+	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
+	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
+	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
+	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
+	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
+	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
+	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
+	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
+	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
+	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
+	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
+	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
+	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
+	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
+	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
+	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
+	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
+	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
+	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
+	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
+	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
+	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
+	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
+	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
+	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
+	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
+	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
+	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
+	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
+	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
+	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
+	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
+	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
+	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
+	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
+	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
+	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
+	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
+	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
+	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
+	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
+	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
 };
diff --git a/sysdeps/aarch64/fpu/v_math.h b/sysdeps/aarch64/fpu/v_math.h
index 43efd8f99d..cfc87f8dd0 100644
--- a/sysdeps/aarch64/fpu/v_math.h
+++ b/sysdeps/aarch64/fpu/v_math.h
@@ -30,15 +30,15 @@ 
 #define V_NAME_D2(fun) _ZGVnN2vv_##fun
 
 /* Shorthand helpers for declaring constants.  */
-#define V2(x)                                                                  \
-  {                                                                            \
-    x, x                                                                       \
-  }
+#define V2(X) { X, X }
+#define V4(X) { X, X, X, X }
+#define V8(X) { X, X, X, X, X, X, X, X }
 
-#define V4(x)                                                                  \
-  {                                                                            \
-    x, x, x, x                                                                 \
-  }
+static inline int
+v_any_u16h (uint16x4_t x)
+{
+  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
+}
 
 static inline float32x4_t
 v_f32 (float x)
@@ -63,6 +63,11 @@  v_any_u32 (uint32x4_t x)
   /* assume elements in x are either 0 or -1u.  */
   return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
 }
+static inline int
+v_any_u32h (uint32x2_t x)
+{
+  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
+}
 static inline float32x4_t
 v_lookup_f32 (const float *tab, uint32x4_t idx)
 {
diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h
index e7d30b477f..0abfd8b701 100644
--- a/sysdeps/aarch64/fpu/vecmath_config.h
+++ b/sysdeps/aarch64/fpu/vecmath_config.h
@@ -42,8 +42,10 @@  extern const struct v_log_data
   /* Shared data for vector log and log-derived routines (e.g. asinh).  */
   double poly[V_LOG_POLY_ORDER - 1];
   double ln2;
-  double invc[1 << V_LOG_TABLE_BITS];
-  double logc[1 << V_LOG_TABLE_BITS];
+  struct
+  {
+    double invc, logc;
+  } table[1 << V_LOG_TABLE_BITS];
 } __v_log_data attribute_hidden;
 
 #define V_EXP_TABLE_BITS 7