[v2,3/4] aarch64: Add vector implementations of log routines

Message ID 20230619145634.18801-3-Joe.Ramsay@arm.com
State Superseded
Headers
Series [v2,1/4] aarch64: Add vector implementations of cos routines |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed

Commit Message

Joe Ramsay June 19, 2023, 2:56 p.m. UTC
  Optimised implementations for single and double precision, Advanced
SIMD and SVE, copied from Arm Optimized Routines. Log lookup table
added as HIDDEN symbol to allow it to be shared between AdvSIMD and
SVE variants.

As previously, data tables are marked volatile or writable to prevent
overly aggressive constant inlining. Special-case handlers are marked
NOINLINE to avoid incurring the penalty of switching call standards
unnecessarily.
---
Changes to v1:
 * Use __glibc_unlikely
 * Use attribute_hidden instead of HIDDEN
 * Explain data table storage

 sysdeps/aarch64/fpu/Makefile                  |  11 +-
 sysdeps/aarch64/fpu/Versions                  |   4 +
 sysdeps/aarch64/fpu/bits/math-vector.h        |   4 +
 sysdeps/aarch64/fpu/log_advsimd.c             | 104 +++++++++++
 sysdeps/aarch64/fpu/log_sve.c                 |  80 ++++++++
 sysdeps/aarch64/fpu/logf_advsimd.c            |  80 ++++++++
 sysdeps/aarch64/fpu/logf_sve.c                |  85 +++++++++
 .../fpu/test-double-advsimd-wrappers.c        |   1 +
 .../aarch64/fpu/test-double-sve-wrappers.c    |   1 +
 .../aarch64/fpu/test-float-advsimd-wrappers.c |   1 +
 sysdeps/aarch64/fpu/test-float-sve-wrappers.c |   1 +
 sysdeps/aarch64/fpu/v_log_data.c              | 173 ++++++++++++++++++
 sysdeps/aarch64/fpu/vecmath_config.h          |  10 +
 sysdeps/aarch64/libm-test-ulps                |   8 +
 .../unix/sysv/linux/aarch64/libmvec.abilist   |   4 +
 15 files changed, 562 insertions(+), 5 deletions(-)
 create mode 100644 sysdeps/aarch64/fpu/log_advsimd.c
 create mode 100644 sysdeps/aarch64/fpu/log_sve.c
 create mode 100644 sysdeps/aarch64/fpu/logf_advsimd.c
 create mode 100644 sysdeps/aarch64/fpu/logf_sve.c
 create mode 100644 sysdeps/aarch64/fpu/v_log_data.c
  

Patch

diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index b3285542ea..3f9cd2d000 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -1,16 +1,17 @@ 
-float-advsimd-funcs = cos sin
+float-advsimd-funcs = cos sin log
 
-double-advsimd-funcs = cos sin
+double-advsimd-funcs = cos sin log
 
-float-sve-funcs = cos sin
+float-sve-funcs = cos sin log
 
-double-sve-funcs = cos sin
+double-sve-funcs = cos sin log
 
 ifeq ($(subdir),mathvec)
 libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
                   $(addsuffix _advsimd,$(double-advsimd-funcs)) \
                   $(addsuffix f_sve,$(float-sve-funcs)) \
-                  $(addsuffix _sve,$(double-sve-funcs))
+                  $(addsuffix _sve,$(double-sve-funcs)) \
+                  v_log_data
 endif
 
 sve-cflags = -march=armv8-a+sve
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index d26b3968a9..902446f40d 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -1,11 +1,15 @@ 
 libmvec {
   GLIBC_2.38 {
     _ZGVnN2v_cos;
+    _ZGVnN2v_log;
     _ZGVnN2v_sin;
     _ZGVnN4v_cosf;
+    _ZGVnN4v_logf;
     _ZGVnN4v_sinf;
     _ZGVsMxv_cos;
     _ZGVsMxv_cosf;
+    _ZGVsMxv_log;
+    _ZGVsMxv_logf;
     _ZGVsMxv_sin;
     _ZGVsMxv_sinf;
   }
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index ad9c9945e8..70c737338e 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -50,9 +50,11 @@  typedef __SVBool_t __sv_bool_t;
 #  define __vpcs __attribute__ ((__aarch64_vector_pcs__))
 
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
 
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
 
 #  undef __ADVSIMD_VEC_MATH_SUPPORTED
@@ -61,9 +63,11 @@  __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
 #ifdef __SVE_VEC_MATH_SUPPORTED
 
 __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
 
 __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
 
 #  undef __SVE_VEC_MATH_SUPPORTED
diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
new file mode 100644
index 0000000000..681b77fab5
--- /dev/null
+++ b/sysdeps/aarch64/fpu/log_advsimd.c
@@ -0,0 +1,104 @@ 
+/* Double-precision vector (Advanced SIMD) log function.
+
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const volatile struct
+{
+  float64x2_t poly[5];
+  float64x2_t ln2;
+  uint64x2_t min_norm, special_bound, sign_exp_mask;
+} data = {
+  /* Worst-case error: 1.17 + 0.5 ulp.
+     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),
+	    V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),
+	    V2 (-0x1.554e550bd501ep-3) },
+  .ln2 = V2 (0x1.62e42fefa39efp-1),
+  .min_norm = V2 (0x0010000000000000),
+  .special_bound = V2 (0x7fe0000000000000), /* asuint64(inf) - min_norm.  */
+  .sign_exp_mask = V2 (0xfff0000000000000)
+};
+
+#define A(i) data.poly[i]
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+#define Off v_u64 (0x3fe6900900000000)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  e.invc[0] = __v_log_data.invc[i[0] & IndexMask];
+  e.logc[0] = __v_log_data.logc[i[0] & IndexMask];
+  e.invc[1] = __v_log_data.invc[i[1] & IndexMask];
+  e.logc[1] = __v_log_data.logc[i[1] & IndexMask];
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+{
+  return v_call_f64 (log, x, y, cmp);
+}
+
+float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
+{
+  float64x2_t z, r, r2, p, y, kd, hi;
+  uint64x2_t ix, iz, tmp, cmp;
+  int64x2_t k;
+  struct entry e;
+
+  ix = vreinterpretq_u64_f64 (x);
+  cmp = vcgeq_u64 (vsubq_u64 (ix, data.min_norm), data.special_bound);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = vsubq_u64 (ix, Off);
+  k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
+  iz = vsubq_u64 (ix, vandq_u64 (tmp, data.sign_exp_mask));
+  z = vreinterpretq_f64_u64 (iz);
+  e = lookup (vshrq_n_u64 (tmp, 52 - V_LOG_TABLE_BITS));
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, data.ln2);
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  r2 = vmulq_f64 (r, r);
+  y = vfmaq_f64 (A (2), A (3), r);
+  p = vfmaq_f64 (A (0), A (1), r);
+  y = vfmaq_f64 (y, A (4), r2);
+  y = vfmaq_f64 (p, y, r2);
+  y = vfmaq_f64 (hi, y, r2);
+
+  if (__glibc_unlikely (v_any_u64 (cmp)))
+    return special_case (x, y, cmp);
+  return y;
+}
diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c
new file mode 100644
index 0000000000..93c4f1c018
--- /dev/null
+++ b/sysdeps/aarch64/fpu/log_sve.c
@@ -0,0 +1,80 @@ 
+/* Double-precision vector (SVE) log function.
+
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+#define P(i) sv_f64 (__v_log_data.poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+#define Off (0x3fe6900900000000)
+#define MaxTop (0x7ff)
+#define MinTop (0x001)
+#define ThreshTop (0x7fe) /* MaxTop - MinTop.  */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
+{
+  return sv_call_f64 (log, x, y, cmp);
+}
+
+/* SVE port of AdvSIMD log algorithm.
+   Maximum measured error is 2.17 ulp:
+   SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
+					 want 0x1.ffffff1cca045p-2.  */
+svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+{
+  svuint64_t ix = svreinterpret_u64_f64 (x);
+  svuint64_t top = svlsr_n_u64_x (pg, ix, 52);
+  svbool_t cmp
+      = svcmpge_u64 (pg, svsub_n_u64_x (pg, top, MinTop), sv_u64 (ThreshTop));
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_n_u64_x (pg, ix, Off);
+  /* Equivalent to (tmp >> (52 - V_LOG_TABLE_BITS)) % N, since N is a power
+     of 2.  */
+  svuint64_t i = svand_n_u64_x (
+      pg, svlsr_n_u64_x (pg, tmp, (52 - V_LOG_TABLE_BITS)), N - 1);
+  svint64_t k = svasr_n_s64_x (pg, svreinterpret_s64_u64 (tmp),
+			       52); /* Arithmetic shift.  */
+  svuint64_t iz
+      = svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64_u64 (iz);
+  /* Lookup in 2 global lists (length N).  */
+  svfloat64_t invc = svld1_gather_u64index_f64 (pg, __v_log_data.invc, i);
+  svfloat64_t logc = svld1_gather_u64index_f64 (pg, __v_log_data.logc, i);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  svfloat64_t r = svmad_n_f64_x (pg, invc, z, -1);
+  svfloat64_t kd = svcvt_f64_s64_x (pg, k);
+  /* hi = r + log(c) + k*Ln2.  */
+  svfloat64_t hi
+      = svmla_n_f64_x (pg, svadd_f64_x (pg, logc, r), kd, __v_log_data.ln2);
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t r2 = svmul_f64_x (pg, r, r);
+  svfloat64_t y = svmla_f64_x (pg, P (2), r, P (3));
+  svfloat64_t p = svmla_f64_x (pg, P (0), r, P (1));
+  y = svmla_f64_x (pg, y, r2, P (4));
+  y = svmla_f64_x (pg, p, r2, y);
+  y = svmla_f64_x (pg, hi, r2, y);
+
+  if (__glibc_unlikely (svptest_any (pg, cmp)))
+    return special_case (x, y, cmp);
+  return y;
+}
diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c
new file mode 100644
index 0000000000..7058d9fd18
--- /dev/null
+++ b/sysdeps/aarch64/fpu/logf_advsimd.c
@@ -0,0 +1,80 @@ 
+/* Single-precision vector (Advanced SIMD) log function.
+
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const volatile struct
+{
+  float32x4_t poly[7];
+  float32x4_t ln2, tiny_bound;
+  uint32x4_t min_norm, special_bound, off, mantissa_mask;
+} data = {
+  /* 3.34 ulp error.  */
+  .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
+	    V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
+	    V4 (-0x1.ffffc8p-2f) },
+  .ln2 = V4 (0x1.62e43p-1f),
+  .tiny_bound = V4 (0x1p-126),
+  .min_norm = V4 (0x00800000),
+  .special_bound = V4 (0x7f000000), /* asuint32(inf) - min_norm.  */
+  .off = V4 (0x3f2aaaab),	    /* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff)
+};
+
+#define P(i) data.poly[7 - i]
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (logf, x, y, cmp);
+}
+
+float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
+{
+  float32x4_t n, p, q, r, r2, y;
+  uint32x4_t u, cmp;
+
+  u = vreinterpretq_u32_f32 (x);
+  cmp = vcgeq_u32 (vsubq_u32 (u, data.min_norm), data.special_bound);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u = vsubq_u32 (u, data.off);
+  n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
+  u = vandq_u32 (u, data.mantissa_mask);
+  u = vaddq_u32 (u, data.off);
+  r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log(1+r) + n*ln2.  */
+  r2 = vmulq_f32 (r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  p = vfmaq_f32 (P (5), P (6), r);
+  q = vfmaq_f32 (P (3), P (4), r);
+  y = vfmaq_f32 (P (1), P (2), r);
+  p = vfmaq_f32 (p, P (7), r2);
+  q = vfmaq_f32 (q, p, r2);
+  y = vfmaq_f32 (y, q, r2);
+  p = vfmaq_f32 (r, data.ln2, n);
+  y = vfmaq_f32 (p, y, r2);
+
+  if (__glibc_unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, cmp);
+  return y;
+}
diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
new file mode 100644
index 0000000000..01697a322e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/logf_sve.c
@@ -0,0 +1,85 @@ 
+/* Single-precision vector (SVE) log function.
+
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static struct
+{
+  float poly_0135[4];
+  float poly_246[3];
+  float ln2;
+} data = {
+  .poly_0135 = {
+    /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
+       that coeffs 0, 1, 3 and 5 can be loaded as a single quad-word, hence used
+       with _lane variant of MLA intrinsic.  */
+    -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
+  },
+  .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
+  .ln2 = 0x1.62e43p-1f
+};
+
+#define Min (0x00800000)
+#define Max (0x7f800000)
+#define Thresh (0x7f000000) /* Max - Min.  */
+#define Mask (0x007fffff)
+#define Off (0x3f2aaaab) /* 0.666667.  */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+  return sv_call_f32 (logf, x, y, cmp);
+}
+
+/* Optimised implementation of SVE logf, using the same algorithm and
+   polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
+   SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
+				  want 0x1.26ede6p-2.  */
+svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
+{
+  svuint32_t u = svreinterpret_u32_f32 (x);
+  svbool_t cmp = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, u, Min), Thresh);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u = svsub_n_u32_x (pg, u, Off);
+  svfloat32_t n
+      = svcvt_f32_s32_x (pg, svasr_n_s32_x (pg, svreinterpret_s32_u32 (u),
+					    23)); /* Sign-extend.  */
+  u = svand_n_u32_x (pg, u, Mask);
+  u = svadd_n_u32_x (pg, u, Off);
+  svfloat32_t r = svsub_n_f32_x (pg, svreinterpret_f32_u32 (u), 1.0f);
+
+  /* y = log(1+r) + n*ln2.  */
+  svfloat32_t r2 = svmul_f32_x (pg, r, r);
+  /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
+  svfloat32_t p_0135 = svld1rq_f32 (svptrue_b32 (), &data.poly_0135[0]);
+  svfloat32_t p = svmla_lane_f32 (sv_f32 (data.poly_246[0]), r, p_0135, 1);
+  svfloat32_t q = svmla_lane_f32 (sv_f32 (data.poly_246[1]), r, p_0135, 2);
+  svfloat32_t y = svmla_lane_f32 (sv_f32 (data.poly_246[2]), r, p_0135, 3);
+  p = svmla_lane_f32 (p, r2, p_0135, 0);
+
+  q = svmla_f32_x (pg, q, r2, p);
+  y = svmla_f32_x (pg, y, r2, q);
+  p = svmla_n_f32_x (pg, r, n, data.ln2);
+  y = svmla_f32_x (pg, p, r2, y);
+
+  if (__glibc_unlikely (svptest_any (pg, cmp)))
+    return special_case (x, y, cmp);
+  return y;
+}
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 4af97a25a2..c5f6fcd7c4 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -24,4 +24,5 @@ 
 #define VEC_TYPE float64x2_t
 
 VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
+VPCS_VECTOR_WRAPPER (log_advsimd, _ZGVnN2v_log)
 VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 64c790adc5..d5e2ec6dc5 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -33,4 +33,5 @@ 
   }
 
 SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
+SVE_VECTOR_WRAPPER (log_sve, _ZGVsMxv_log)
 SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 50e776b952..c240738837 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -24,4 +24,5 @@ 
 #define VEC_TYPE float32x4_t
 
 VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
+VPCS_VECTOR_WRAPPER (logf_advsimd, _ZGVnN4v_logf)
 VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 7355032929..5a06b75857 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -33,4 +33,5 @@ 
   }
 
 SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
+SVE_VECTOR_WRAPPER (logf_sve, _ZGVsMxv_logf)
 SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
diff --git a/sysdeps/aarch64/fpu/v_log_data.c b/sysdeps/aarch64/fpu/v_log_data.c
new file mode 100644
index 0000000000..6fd6f43695
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_log_data.c
@@ -0,0 +1,173 @@ 
+/* Lookup table for double-precision log(x) vector function.
+
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "vecmath_config.h"
+
+const struct v_log_data __v_log_data = {
+  /* Worst-case error: 1.17 + 0.5 ulp.
+     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
+	    0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
+  .ln2 = 0x1.62e42fefa39efp-1,
+  /* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + poly(z/c - 1)
+
+     where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+     N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
+     tables:
+
+	invc[i] = 1/c
+	logc[i] = (double)log(c)
+
+     where c is near the center of the subinterval and is chosen by trying
+     several floating point invc candidates around 1/center and selecting one
+     for which the error in (double)log(c) is minimized (< 0x1p-74), except the
+     subinterval that contains 1 and the previous one got tweaked to avoid
+     cancellation.  */
+  .invc = { 0x1.6a133d0dec120p+0, 0x1.6815f2f3e42edp+0,
+	    0x1.661e39be1ac9ep+0, 0x1.642bfa30ac371p+0,
+	    0x1.623f1d916f323p+0, 0x1.60578da220f65p+0,
+	    0x1.5e75349dea571p+0, 0x1.5c97fd387a75ap+0,
+	    0x1.5abfd2981f200p+0, 0x1.58eca051dc99cp+0,
+	    0x1.571e526d9df12p+0, 0x1.5554d555b3fcbp+0,
+	    0x1.539015e2a20cdp+0, 0x1.51d0014ee0164p+0,
+	    0x1.50148538cd9eep+0, 0x1.4e5d8f9f698a1p+0,
+	    0x1.4cab0edca66bep+0, 0x1.4afcf1a9db874p+0,
+	    0x1.495327136e16fp+0, 0x1.47ad9e84af28fp+0,
+	    0x1.460c47b39ae15p+0, 0x1.446f12b278001p+0,
+	    0x1.42d5efdd720ecp+0, 0x1.4140cfe001a0fp+0,
+	    0x1.3fafa3b421f69p+0, 0x1.3e225c9c8ece5p+0,
+	    0x1.3c98ec29a211ap+0, 0x1.3b13442a413fep+0,
+	    0x1.399156baa3c54p+0, 0x1.38131639b4cdbp+0,
+	    0x1.36987540fbf53p+0, 0x1.352166b648f61p+0,
+	    0x1.33adddb3eb575p+0, 0x1.323dcd99fc1d3p+0,
+	    0x1.30d129fefc7d2p+0, 0x1.2f67e6b72fe7dp+0,
+	    0x1.2e01f7cf8b187p+0, 0x1.2c9f518ddc86ep+0,
+	    0x1.2b3fe86e5f413p+0, 0x1.29e3b1211b25cp+0,
+	    0x1.288aa08b373cfp+0, 0x1.2734abcaa8467p+0,
+	    0x1.25e1c82459b81p+0, 0x1.2491eb1ad59c5p+0,
+	    0x1.23450a54048b5p+0, 0x1.21fb1bb09e578p+0,
+	    0x1.20b415346d8f7p+0, 0x1.1f6fed179a1acp+0,
+	    0x1.1e2e99b93c7b3p+0, 0x1.1cf011a7a882ap+0,
+	    0x1.1bb44b97dba5ap+0, 0x1.1a7b3e66cdd4fp+0,
+	    0x1.1944e11dc56cdp+0, 0x1.18112aebb1a6ep+0,
+	    0x1.16e013231b7e9p+0, 0x1.15b1913f156cfp+0,
+	    0x1.14859cdedde13p+0, 0x1.135c2dc68cfa4p+0,
+	    0x1.12353bdb01684p+0, 0x1.1110bf25b85b4p+0,
+	    0x1.0feeafd2f8577p+0, 0x1.0ecf062c51c3bp+0,
+	    0x1.0db1baa076c8bp+0, 0x1.0c96c5bb3048ep+0,
+	    0x1.0b7e20263e070p+0, 0x1.0a67c2acd0ce3p+0,
+	    0x1.0953a6391e982p+0, 0x1.0841c3caea380p+0,
+	    0x1.07321489b13eap+0, 0x1.062491aee9904p+0,
+	    0x1.05193497a7cc5p+0, 0x1.040ff6b5f5e9fp+0,
+	    0x1.0308d19aa6127p+0, 0x1.0203beedb0c67p+0,
+	    0x1.010037d38bcc2p+0, 1.0,
+	    0x1.fc06d493cca10p-1, 0x1.f81e6ac3b918fp-1,
+	    0x1.f44546ef18996p-1, 0x1.f07b10382c84bp-1,
+	    0x1.ecbf7070e59d4p-1, 0x1.e91213f715939p-1,
+	    0x1.e572a9a75f7b7p-1, 0x1.e1e0e2c530207p-1,
+	    0x1.de5c72d8a8be3p-1, 0x1.dae50fa5658ccp-1,
+	    0x1.d77a71145a2dap-1, 0x1.d41c51166623ep-1,
+	    0x1.d0ca6ba0bb29fp-1, 0x1.cd847e8e59681p-1,
+	    0x1.ca4a499693e00p-1, 0x1.c71b8e399e821p-1,
+	    0x1.c3f80faf19077p-1, 0x1.c0df92dc2b0ecp-1,
+	    0x1.bdd1de3cbb542p-1, 0x1.baceb9e1007a3p-1,
+	    0x1.b7d5ef543e55ep-1, 0x1.b4e749977d953p-1,
+	    0x1.b20295155478ep-1, 0x1.af279f8e82be2p-1,
+	    0x1.ac5638197fdf3p-1, 0x1.a98e2f102e087p-1,
+	    0x1.a6cf5606d05c1p-1, 0x1.a4197fc04d746p-1,
+	    0x1.a16c80293dc01p-1, 0x1.9ec82c4dc5bc9p-1,
+	    0x1.9c2c5a491f534p-1, 0x1.9998e1480b618p-1,
+	    0x1.970d9977c6c2dp-1, 0x1.948a5c023d212p-1,
+	    0x1.920f0303d6809p-1, 0x1.8f9b698a98b45p-1,
+	    0x1.8d2f6b81726f6p-1, 0x1.8acae5bb55badp-1,
+	    0x1.886db5d9275b8p-1, 0x1.8617ba567c13cp-1,
+	    0x1.83c8d27487800p-1, 0x1.8180de3c5dbe7p-1,
+	    0x1.7f3fbe71cdb71p-1, 0x1.7d055498071c1p-1,
+	    0x1.7ad182e54f65ap-1, 0x1.78a42c3c90125p-1,
+	    0x1.767d342f76944p-1, 0x1.745c7ef26b00ap-1,
+	    0x1.7241f15769d0fp-1, 0x1.702d70d396e41p-1,
+	    0x1.6e1ee3700cd11p-1, 0x1.6c162fc9cbe02p-1 },
+  .logc = { -0x1.62fe995eb963ap-2, -0x1.5d5a48dad6b67p-2,
+	    -0x1.57bde257d2769p-2, -0x1.52294fbf2af55p-2,
+	    -0x1.4c9c7b598aa38p-2, -0x1.47174fc5ff560p-2,
+	    -0x1.4199b7fa7b5cap-2, -0x1.3c239f48cfb99p-2,
+	    -0x1.36b4f154d2aebp-2, -0x1.314d9a0ff32fbp-2,
+	    -0x1.2bed85cca3cffp-2, -0x1.2694a11421af9p-2,
+	    -0x1.2142d8d014fb2p-2, -0x1.1bf81a2c77776p-2,
+	    -0x1.16b452a39c6a4p-2, -0x1.11776ffa6c67ep-2,
+	    -0x1.0c416035020e0p-2, -0x1.071211aa10fdap-2,
+	    -0x1.01e972e293b1bp-2, -0x1.f98ee587fd434p-3,
+	    -0x1.ef5800ad716fbp-3, -0x1.e52e160484698p-3,
+	    -0x1.db1104b19352ep-3, -0x1.d100ac59e0bd6p-3,
+	    -0x1.c6fced287c3bdp-3, -0x1.bd05a7b317c29p-3,
+	    -0x1.b31abd229164fp-3, -0x1.a93c0edadb0a3p-3,
+	    -0x1.9f697ee30d7ddp-3, -0x1.95a2efa9aa40ap-3,
+	    -0x1.8be843d796044p-3, -0x1.82395ecc477edp-3,
+	    -0x1.7896240966422p-3, -0x1.6efe77aca8c55p-3,
+	    -0x1.65723e117ec5cp-3, -0x1.5bf15c0955706p-3,
+	    -0x1.527bb6c111da1p-3, -0x1.491133c939f8fp-3,
+	    -0x1.3fb1b90c7fc58p-3, -0x1.365d2cc485f8dp-3,
+	    -0x1.2d13758970de7p-3, -0x1.23d47a721fd47p-3,
+	    -0x1.1aa0229f25ec2p-3, -0x1.117655ddebc3bp-3,
+	    -0x1.0856fbf83ab6bp-3, -0x1.fe83fabbaa106p-4,
+	    -0x1.ec6e8507a56cdp-4, -0x1.da6d68c7cc2eap-4,
+	    -0x1.c88078462be0cp-4, -0x1.b6a786a423565p-4,
+	    -0x1.a4e2676ac7f85p-4, -0x1.9330eea777e76p-4,
+	    -0x1.8192f134d5ad9p-4, -0x1.70084464f0538p-4,
+	    -0x1.5e90bdec5cb1fp-4, -0x1.4d2c3433c5536p-4,
+	    -0x1.3bda7e219879ap-4, -0x1.2a9b732d27194p-4,
+	    -0x1.196eeb2b10807p-4, -0x1.0854be8ef8a7ep-4,
+	    -0x1.ee998cb277432p-5, -0x1.ccadb79919fb9p-5,
+	    -0x1.aae5b1d8618b0p-5, -0x1.89413015d7442p-5,
+	    -0x1.67bfe7bf158dep-5, -0x1.46618f83941bep-5,
+	    -0x1.2525df1b0618ap-5, -0x1.040c8e2f77c6ap-5,
+	    -0x1.c62aad39f738ap-6, -0x1.847fe3bdead9cp-6,
+	    -0x1.43183683400acp-6, -0x1.01f31c4e1d544p-6,
+	    -0x1.82201d1e6b69ap-7, -0x1.00dd0f3e1bfd6p-7,
+	    -0x1.ff6fe1feb4e53p-9, 0.0,
+	    0x1.fe91885ec8e20p-8,  0x1.fc516f716296dp-7,
+	    0x1.7bb4dd70a015bp-6,  0x1.f84c99b34b674p-6,
+	    0x1.39f9ce4fb2d71p-5,  0x1.7756c0fd22e78p-5,
+	    0x1.b43ee82db8f3ap-5,  0x1.f0b3fced60034p-5,
+	    0x1.165bd78d4878ep-4,  0x1.3425d2715ebe6p-4,
+	    0x1.51b8bd91b7915p-4,  0x1.6f15632c76a47p-4,
+	    0x1.8c3c88ecbe503p-4,  0x1.a92ef077625dap-4,
+	    0x1.c5ed5745fa006p-4,  0x1.e27876de1c993p-4,
+	    0x1.fed104fce4cdcp-4,  0x1.0d7bd9c17d78bp-3,
+	    0x1.1b76986cef97bp-3,  0x1.295913d24f750p-3,
+	    0x1.37239fa295d17p-3,  0x1.44d68dd78714bp-3,
+	    0x1.52722ebe5d780p-3,  0x1.5ff6d12671f98p-3,
+	    0x1.6d64c2389484bp-3,  0x1.7abc4da40fddap-3,
+	    0x1.87fdbda1e8452p-3,  0x1.95295b06a5f37p-3,
+	    0x1.a23f6d34abbc5p-3,  0x1.af403a28e04f2p-3,
+	    0x1.bc2c06a85721ap-3,  0x1.c903161240163p-3,
+	    0x1.d5c5aa93287ebp-3,  0x1.e274051823fa9p-3,
+	    0x1.ef0e656300c16p-3,  0x1.fb9509f05aa2ap-3,
+	    0x1.04041821f37afp-2,  0x1.0a340a49b3029p-2,
+	    0x1.105a7918a126dp-2,  0x1.1677819812b84p-2,
+	    0x1.1c8b405b40c0ep-2,  0x1.2295d16cfa6b1p-2,
+	    0x1.28975066318a2p-2,  0x1.2e8fd855d86fcp-2,
+	    0x1.347f83d605e59p-2,  0x1.3a666d1244588p-2,
+	    0x1.4044adb6f8ec4p-2,  0x1.461a5f077558cp-2,
+	    0x1.4be799e20b9c8p-2,  0x1.51ac76a6b79dfp-2,
+	    0x1.57690d5744a45p-2,  0x1.5d1d758e45217p-2 }
+};
diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h
index 8976297fa9..ec53dde5dc 100644
--- a/sysdeps/aarch64/fpu/vecmath_config.h
+++ b/sysdeps/aarch64/fpu/vecmath_config.h
@@ -26,4 +26,14 @@ 
    supported in GLIBC, however we keep it for ease of development.  */
 #define WANT_SIMD_EXCEPT 0
 
+#define V_LOG_POLY_ORDER 6
+#define V_LOG_TABLE_BITS 7
+extern const struct v_log_data
+{
+  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
+  double poly[V_LOG_POLY_ORDER - 1];
+  double ln2;
+  double invc[1 << V_LOG_TABLE_BITS];
+  double logc[1 << V_LOG_TABLE_BITS];
+} __v_log_data attribute_hidden;
 #endif
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 4145662b2d..7bdbf4614d 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1219,10 +1219,18 @@  double: 3
 float: 3
 ldouble: 1
 
+Function: "log_advsimd":
+double: 1
+float: 3
+
 Function: "log_downward":
 float: 2
 ldouble: 1
 
+Function: "log_sve":
+double: 1
+float: 3
+
 Function: "log_towardzero":
 float: 2
 ldouble: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index a4c564859c..1922191886 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -1,8 +1,12 @@ 
 GLIBC_2.38 _ZGVnN2v_cos F
+GLIBC_2.38 _ZGVnN2v_log F
 GLIBC_2.38 _ZGVnN2v_sin F
 GLIBC_2.38 _ZGVnN4v_cosf F
+GLIBC_2.38 _ZGVnN4v_logf F
 GLIBC_2.38 _ZGVnN4v_sinf F
 GLIBC_2.38 _ZGVsMxv_cos F
 GLIBC_2.38 _ZGVsMxv_cosf F
+GLIBC_2.38 _ZGVsMxv_log F
+GLIBC_2.38 _ZGVsMxv_logf F
 GLIBC_2.38 _ZGVsMxv_sin F
 GLIBC_2.38 _ZGVsMxv_sinf F