[v5,14/18] x86-64: Add vector atanh/atanhf implementation to libmvec
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
Implement vectorized atanh/atanhf containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector atanh/atanhf with regenerated ulps.
---
bits/libm-simd-decl-stubs.h | 11 +
math/bits/mathcalls.h | 2 +-
.../unix/sysv/linux/x86_64/libmvec.abilist | 8 +
sysdeps/x86/fpu/bits/math-vector.h | 4 +
.../x86/fpu/finclude/math-vector-fortran.h | 4 +
sysdeps/x86_64/fpu/Makeconfig | 1 +
sysdeps/x86_64/fpu/Versions | 2 +
sysdeps/x86_64/fpu/libm-test-ulps | 20 +
.../fpu/multiarch/svml_d_atanh2_core-sse2.S | 20 +
.../x86_64/fpu/multiarch/svml_d_atanh2_core.c | 27 +
.../fpu/multiarch/svml_d_atanh2_core_sse4.S | 1519 +++++++++++++++++
.../fpu/multiarch/svml_d_atanh4_core-sse.S | 20 +
.../x86_64/fpu/multiarch/svml_d_atanh4_core.c | 27 +
.../fpu/multiarch/svml_d_atanh4_core_avx2.S | 1479 ++++++++++++++++
.../fpu/multiarch/svml_d_atanh8_core-avx2.S | 20 +
.../x86_64/fpu/multiarch/svml_d_atanh8_core.c | 27 +
.../fpu/multiarch/svml_d_atanh8_core_avx512.S | 401 +++++
.../fpu/multiarch/svml_s_atanhf16_core-avx2.S | 20 +
.../fpu/multiarch/svml_s_atanhf16_core.c | 28 +
.../multiarch/svml_s_atanhf16_core_avx512.S | 393 +++++
.../fpu/multiarch/svml_s_atanhf4_core-sse2.S | 20 +
.../fpu/multiarch/svml_s_atanhf4_core.c | 28 +
.../fpu/multiarch/svml_s_atanhf4_core_sse4.S | 361 ++++
.../fpu/multiarch/svml_s_atanhf8_core-sse.S | 20 +
.../fpu/multiarch/svml_s_atanhf8_core.c | 28 +
.../fpu/multiarch/svml_s_atanhf8_core_avx2.S | 335 ++++
sysdeps/x86_64/fpu/svml_d_atanh2_core.S | 29 +
sysdeps/x86_64/fpu/svml_d_atanh4_core.S | 29 +
sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S | 25 +
sysdeps/x86_64/fpu/svml_d_atanh8_core.S | 25 +
sysdeps/x86_64/fpu/svml_s_atanhf16_core.S | 25 +
sysdeps/x86_64/fpu/svml_s_atanhf4_core.S | 29 +
sysdeps/x86_64/fpu/svml_s_atanhf8_core.S | 29 +
sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S | 25 +
.../fpu/test-double-libmvec-atanh-avx.c | 1 +
.../fpu/test-double-libmvec-atanh-avx2.c | 1 +
.../fpu/test-double-libmvec-atanh-avx512f.c | 1 +
.../x86_64/fpu/test-double-libmvec-atanh.c | 3 +
.../x86_64/fpu/test-double-vlen2-wrappers.c | 1 +
.../fpu/test-double-vlen4-avx2-wrappers.c | 1 +
.../x86_64/fpu/test-double-vlen4-wrappers.c | 1 +
.../x86_64/fpu/test-double-vlen8-wrappers.c | 1 +
.../fpu/test-float-libmvec-atanhf-avx.c | 1 +
.../fpu/test-float-libmvec-atanhf-avx2.c | 1 +
.../fpu/test-float-libmvec-atanhf-avx512f.c | 1 +
.../x86_64/fpu/test-float-libmvec-atanhf.c | 3 +
.../x86_64/fpu/test-float-vlen16-wrappers.c | 1 +
.../x86_64/fpu/test-float-vlen4-wrappers.c | 1 +
.../fpu/test-float-vlen8-avx2-wrappers.c | 1 +
.../x86_64/fpu/test-float-vlen8-wrappers.c | 1 +
50 files changed, 5060 insertions(+), 1 deletion(-)
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core-sse2.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core_sse4.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core-sse.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core_avx2.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core-avx2.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core_avx512.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core-avx2.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core_avx512.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core-sse2.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core_sse4.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core-sse.S
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core_avx2.S
create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh2_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh4_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S
create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh8_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf16_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf4_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf8_core.S
create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S
create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx.c
create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx2.c
create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx512f.c
create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh.c
create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx.c
create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx2.c
create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx512f.c
create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf.c
Comments
On Tue, Dec 28, 2021 at 10:39:56PM -0800, Sunil K Pandey wrote:
> Implement vectorized atanh/atanhf containing SSE, AVX, AVX2 and
> AVX512 versions for libmvec as per vector ABI. It also contains
> accuracy and ABI tests for vector atanh/atanhf with regenerated ulps.
> ---
> bits/libm-simd-decl-stubs.h | 11 +
> math/bits/mathcalls.h | 2 +-
> .../unix/sysv/linux/x86_64/libmvec.abilist | 8 +
> sysdeps/x86/fpu/bits/math-vector.h | 4 +
> .../x86/fpu/finclude/math-vector-fortran.h | 4 +
> sysdeps/x86_64/fpu/Makeconfig | 1 +
> sysdeps/x86_64/fpu/Versions | 2 +
> sysdeps/x86_64/fpu/libm-test-ulps | 20 +
> .../fpu/multiarch/svml_d_atanh2_core-sse2.S | 20 +
> .../x86_64/fpu/multiarch/svml_d_atanh2_core.c | 27 +
> .../fpu/multiarch/svml_d_atanh2_core_sse4.S | 1519 +++++++++++++++++
> .../fpu/multiarch/svml_d_atanh4_core-sse.S | 20 +
> .../x86_64/fpu/multiarch/svml_d_atanh4_core.c | 27 +
> .../fpu/multiarch/svml_d_atanh4_core_avx2.S | 1479 ++++++++++++++++
> .../fpu/multiarch/svml_d_atanh8_core-avx2.S | 20 +
> .../x86_64/fpu/multiarch/svml_d_atanh8_core.c | 27 +
> .../fpu/multiarch/svml_d_atanh8_core_avx512.S | 401 +++++
> .../fpu/multiarch/svml_s_atanhf16_core-avx2.S | 20 +
> .../fpu/multiarch/svml_s_atanhf16_core.c | 28 +
> .../multiarch/svml_s_atanhf16_core_avx512.S | 393 +++++
> .../fpu/multiarch/svml_s_atanhf4_core-sse2.S | 20 +
> .../fpu/multiarch/svml_s_atanhf4_core.c | 28 +
> .../fpu/multiarch/svml_s_atanhf4_core_sse4.S | 361 ++++
> .../fpu/multiarch/svml_s_atanhf8_core-sse.S | 20 +
> .../fpu/multiarch/svml_s_atanhf8_core.c | 28 +
> .../fpu/multiarch/svml_s_atanhf8_core_avx2.S | 335 ++++
> sysdeps/x86_64/fpu/svml_d_atanh2_core.S | 29 +
> sysdeps/x86_64/fpu/svml_d_atanh4_core.S | 29 +
> sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S | 25 +
> sysdeps/x86_64/fpu/svml_d_atanh8_core.S | 25 +
> sysdeps/x86_64/fpu/svml_s_atanhf16_core.S | 25 +
> sysdeps/x86_64/fpu/svml_s_atanhf4_core.S | 29 +
> sysdeps/x86_64/fpu/svml_s_atanhf8_core.S | 29 +
> sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S | 25 +
> .../fpu/test-double-libmvec-atanh-avx.c | 1 +
> .../fpu/test-double-libmvec-atanh-avx2.c | 1 +
> .../fpu/test-double-libmvec-atanh-avx512f.c | 1 +
> .../x86_64/fpu/test-double-libmvec-atanh.c | 3 +
> .../x86_64/fpu/test-double-vlen2-wrappers.c | 1 +
> .../fpu/test-double-vlen4-avx2-wrappers.c | 1 +
> .../x86_64/fpu/test-double-vlen4-wrappers.c | 1 +
> .../x86_64/fpu/test-double-vlen8-wrappers.c | 1 +
> .../fpu/test-float-libmvec-atanhf-avx.c | 1 +
> .../fpu/test-float-libmvec-atanhf-avx2.c | 1 +
> .../fpu/test-float-libmvec-atanhf-avx512f.c | 1 +
> .../x86_64/fpu/test-float-libmvec-atanhf.c | 3 +
> .../x86_64/fpu/test-float-vlen16-wrappers.c | 1 +
> .../x86_64/fpu/test-float-vlen4-wrappers.c | 1 +
> .../fpu/test-float-vlen8-avx2-wrappers.c | 1 +
> .../x86_64/fpu/test-float-vlen8-wrappers.c | 1 +
> 50 files changed, 5060 insertions(+), 1 deletion(-)
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core-sse2.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core_sse4.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core-sse.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core_avx2.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core-avx2.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core_avx512.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core-avx2.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core_avx512.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core-sse2.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core_sse4.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core-sse.S
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core_avx2.S
> create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh2_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh4_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S
> create mode 100644 sysdeps/x86_64/fpu/svml_d_atanh8_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf16_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf4_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf8_core.S
> create mode 100644 sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S
> create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx.c
> create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx2.c
> create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx512f.c
> create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atanh.c
> create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx.c
> create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx2.c
> create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx512f.c
> create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanhf.c
>
> diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h
> index 845246fab9..bb7380a446 100644
> --- a/bits/libm-simd-decl-stubs.h
> +++ b/bits/libm-simd-decl-stubs.h
> @@ -252,4 +252,15 @@
> #define __DECL_SIMD_log1pf32x
> #define __DECL_SIMD_log1pf64x
> #define __DECL_SIMD_log1pf128x
> +
> +#define __DECL_SIMD_atanh
> +#define __DECL_SIMD_atanhf
> +#define __DECL_SIMD_atanhl
> +#define __DECL_SIMD_atanhf16
> +#define __DECL_SIMD_atanhf32
> +#define __DECL_SIMD_atanhf64
> +#define __DECL_SIMD_atanhf128
> +#define __DECL_SIMD_atanhf32x
> +#define __DECL_SIMD_atanhf64x
> +#define __DECL_SIMD_atanhf128x
> #endif
> diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
> index aa4bc61aa4..04dd9c5d1b 100644
> --- a/math/bits/mathcalls.h
> +++ b/math/bits/mathcalls.h
> @@ -86,7 +86,7 @@ __MATHCALL (acosh,, (_Mdouble_ __x));
> /* Hyperbolic arc sine of X. */
> __MATHCALL (asinh,, (_Mdouble_ __x));
> /* Hyperbolic arc tangent of X. */
> -__MATHCALL (atanh,, (_Mdouble_ __x));
> +__MATHCALL_VEC (atanh,, (_Mdouble_ __x));
> #endif
>
> /* Exponential and logarithmic functions. */
> diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
> index 68b940606a..2d389912b1 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
> +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
> @@ -49,6 +49,7 @@ GLIBC_2.22 _ZGVeN8vvv_sincos F
> GLIBC_2.35 _ZGVbN2v_acos F
> GLIBC_2.35 _ZGVbN2v_asin F
> GLIBC_2.35 _ZGVbN2v_atan F
> +GLIBC_2.35 _ZGVbN2v_atanh F
> GLIBC_2.35 _ZGVbN2v_cbrt F
> GLIBC_2.35 _ZGVbN2v_cosh F
> GLIBC_2.35 _ZGVbN2v_exp10 F
> @@ -63,6 +64,7 @@ GLIBC_2.35 _ZGVbN2vv_hypot F
> GLIBC_2.35 _ZGVbN4v_acosf F
> GLIBC_2.35 _ZGVbN4v_asinf F
> GLIBC_2.35 _ZGVbN4v_atanf F
> +GLIBC_2.35 _ZGVbN4v_atanhf F
> GLIBC_2.35 _ZGVbN4v_cbrtf F
> GLIBC_2.35 _ZGVbN4v_coshf F
> GLIBC_2.35 _ZGVbN4v_exp10f F
> @@ -77,6 +79,7 @@ GLIBC_2.35 _ZGVbN4vv_hypotf F
> GLIBC_2.35 _ZGVcN4v_acos F
> GLIBC_2.35 _ZGVcN4v_asin F
> GLIBC_2.35 _ZGVcN4v_atan F
> +GLIBC_2.35 _ZGVcN4v_atanh F
> GLIBC_2.35 _ZGVcN4v_cbrt F
> GLIBC_2.35 _ZGVcN4v_cosh F
> GLIBC_2.35 _ZGVcN4v_exp10 F
> @@ -91,6 +94,7 @@ GLIBC_2.35 _ZGVcN4vv_hypot F
> GLIBC_2.35 _ZGVcN8v_acosf F
> GLIBC_2.35 _ZGVcN8v_asinf F
> GLIBC_2.35 _ZGVcN8v_atanf F
> +GLIBC_2.35 _ZGVcN8v_atanhf F
> GLIBC_2.35 _ZGVcN8v_cbrtf F
> GLIBC_2.35 _ZGVcN8v_coshf F
> GLIBC_2.35 _ZGVcN8v_exp10f F
> @@ -105,6 +109,7 @@ GLIBC_2.35 _ZGVcN8vv_hypotf F
> GLIBC_2.35 _ZGVdN4v_acos F
> GLIBC_2.35 _ZGVdN4v_asin F
> GLIBC_2.35 _ZGVdN4v_atan F
> +GLIBC_2.35 _ZGVdN4v_atanh F
> GLIBC_2.35 _ZGVdN4v_cbrt F
> GLIBC_2.35 _ZGVdN4v_cosh F
> GLIBC_2.35 _ZGVdN4v_exp10 F
> @@ -119,6 +124,7 @@ GLIBC_2.35 _ZGVdN4vv_hypot F
> GLIBC_2.35 _ZGVdN8v_acosf F
> GLIBC_2.35 _ZGVdN8v_asinf F
> GLIBC_2.35 _ZGVdN8v_atanf F
> +GLIBC_2.35 _ZGVdN8v_atanhf F
> GLIBC_2.35 _ZGVdN8v_cbrtf F
> GLIBC_2.35 _ZGVdN8v_coshf F
> GLIBC_2.35 _ZGVdN8v_exp10f F
> @@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVdN8vv_hypotf F
> GLIBC_2.35 _ZGVeN16v_acosf F
> GLIBC_2.35 _ZGVeN16v_asinf F
> GLIBC_2.35 _ZGVeN16v_atanf F
> +GLIBC_2.35 _ZGVeN16v_atanhf F
> GLIBC_2.35 _ZGVeN16v_cbrtf F
> GLIBC_2.35 _ZGVeN16v_coshf F
> GLIBC_2.35 _ZGVeN16v_exp10f F
> @@ -147,6 +154,7 @@ GLIBC_2.35 _ZGVeN16vv_hypotf F
> GLIBC_2.35 _ZGVeN8v_acos F
> GLIBC_2.35 _ZGVeN8v_asin F
> GLIBC_2.35 _ZGVeN8v_atan F
> +GLIBC_2.35 _ZGVeN8v_atanh F
> GLIBC_2.35 _ZGVeN8v_cbrt F
> GLIBC_2.35 _ZGVeN8v_cosh F
> GLIBC_2.35 _ZGVeN8v_exp10 F
> diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
> index 14c9db3bb3..4937b6811f 100644
> --- a/sysdeps/x86/fpu/bits/math-vector.h
> +++ b/sysdeps/x86/fpu/bits/math-vector.h
> @@ -114,6 +114,10 @@
> # define __DECL_SIMD_log1p __DECL_SIMD_x86_64
> # undef __DECL_SIMD_log1pf
> # define __DECL_SIMD_log1pf __DECL_SIMD_x86_64
> +# undef __DECL_SIMD_atanh
> +# define __DECL_SIMD_atanh __DECL_SIMD_x86_64
> +# undef __DECL_SIMD_atanhf
> +# define __DECL_SIMD_atanhf __DECL_SIMD_x86_64
>
> # endif
> #endif
> diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
> index 3dca196432..da39c08ba9 100644
> --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
> +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
> @@ -56,6 +56,8 @@
> !GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64')
> !GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64')
> !GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64')
> +!GCC$ builtin (atanh) attributes simd (notinbranch) if('x86_64')
> +!GCC$ builtin (atanhf) attributes simd (notinbranch) if('x86_64')
>
> !GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
> !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
> @@ -97,3 +99,5 @@
> !GCC$ builtin (log2f) attributes simd (notinbranch) if('x32')
> !GCC$ builtin (log1p) attributes simd (notinbranch) if('x32')
> !GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32')
> +!GCC$ builtin (atanh) attributes simd (notinbranch) if('x32')
> +!GCC$ builtin (atanhf) attributes simd (notinbranch) if('x32')
> diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
> index 378cb06d37..de87544259 100644
> --- a/sysdeps/x86_64/fpu/Makeconfig
> +++ b/sysdeps/x86_64/fpu/Makeconfig
> @@ -26,6 +26,7 @@ libmvec-funcs = \
> asin \
> atan \
> atan2 \
> + atanh \
> cbrt \
> cos \
> cosh \
> diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
> index 155fb115f3..df0ea83711 100644
> --- a/sysdeps/x86_64/fpu/Versions
> +++ b/sysdeps/x86_64/fpu/Versions
> @@ -17,6 +17,7 @@ libmvec {
> _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
> _ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin;
> _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
> + _ZGVbN2v_atanh; _ZGVcN4v_atanh; _ZGVdN4v_atanh; _ZGVeN8v_atanh;
> _ZGVbN2v_cbrt; _ZGVcN4v_cbrt; _ZGVdN4v_cbrt; _ZGVeN8v_cbrt;
> _ZGVbN2v_cosh; _ZGVcN4v_cosh; _ZGVdN4v_cosh; _ZGVeN8v_cosh;
> _ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10;
> @@ -31,6 +32,7 @@ libmvec {
> _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
> _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf;
> _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
> + _ZGVbN4v_atanhf; _ZGVcN8v_atanhf; _ZGVdN8v_atanhf; _ZGVeN16v_atanhf;
> _ZGVbN4v_cbrtf; _ZGVcN8v_cbrtf; _ZGVdN8v_cbrtf; _ZGVeN16v_cbrtf;
> _ZGVbN4v_coshf; _ZGVcN8v_coshf; _ZGVdN8v_coshf; _ZGVeN16v_coshf;
> _ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f;
> diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
> index a2b15a795b..09a46190b6 100644
> --- a/sysdeps/x86_64/fpu/libm-test-ulps
> +++ b/sysdeps/x86_64/fpu/libm-test-ulps
> @@ -248,6 +248,26 @@ float: 3
> float128: 4
> ldouble: 5
>
> +Function: "atanh_vlen16":
> +float: 1
> +
> +Function: "atanh_vlen2":
> +double: 1
> +
> +Function: "atanh_vlen4":
> +double: 1
> +float: 1
> +
> +Function: "atanh_vlen4_avx2":
> +double: 1
> +
> +Function: "atanh_vlen8":
> +double: 1
> +float: 1
> +
> +Function: "atanh_vlen8_avx2":
> +float: 1
> +
> Function: "cabs":
> double: 1
> float128: 1
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core-sse2.S
> new file mode 100644
> index 0000000000..b154ab8649
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core-sse2.S
> @@ -0,0 +1,20 @@
> +/* SSE2 version of vectorized atanh, vector length is 2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVbN2v_atanh _ZGVbN2v_atanh_sse2
> +#include "../svml_d_atanh2_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core.c
> new file mode 100644
> index 0000000000..138190e568
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core.c
> @@ -0,0 +1,27 @@
> +/* Multiple versions of vectorized atanh, vector length is 2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVbN2v_atanh
> +#include "ifunc-mathvec-sse4_1.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVbN2v_atanh, __GI__ZGVbN2v_atanh, __redirect__ZGVbN2v_atanh)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core_sse4.S
> new file mode 100644
> index 0000000000..7e70b036f7
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh2_core_sse4.S
> @@ -0,0 +1,1519 @@
> +/* Function atanh vectorized with SSE4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_datanh_data_internal
> + */
> +#define Log_HA_table 0
> +#define Log_LA_table 8208
> +#define poly_coeff 12320
> +#define ExpMask 12384
> +#define Two10 12400
> +#define MinLog1p 12416
> +#define MaxLog1p 12432
> +#define One 12448
> +#define SgnMask 12464
> +#define XThreshold 12480
> +#define XhMask 12496
> +#define Threshold 12512
> +#define Bias 12528
> +#define Bias1 12544
> +#define ExpMask0 12560
> +#define ExpMask2 12576
> +#define L2 12592
> +#define dHalf 12608
> +#define dSign 12624
> +#define dTopMask12 12640
> +#define dTopMask41 12656
> +#define TinyRange 12672
> +
> +/* Lookup bias for data table __svml_datanh_data_internal. */
> +#define Table_Lookup_Bias -0x405ff0
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.sse4,"ax",@progbits
> +ENTRY(_ZGVbN2v_atanh_sse4)
> + pushq %rbp
> + cfi_def_cfa_offset(16)
> + movq %rsp, %rbp
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> + andq $-32, %rsp
> + subq $64, %rsp
> + movaps %xmm0, %xmm12
> + movups SgnMask+__svml_datanh_data_internal(%rip), %xmm7
> + lea Table_Lookup_Bias+__svml_datanh_data_internal(%rip), %rsi
> +
> +/* Load the constant 1 and a sign mask */
> + movups One+__svml_datanh_data_internal(%rip), %xmm11
> +
> +/* Strip off the sign, so treat X as positive until right at the end */
> + movaps %xmm7, %xmm14
> + andps %xmm12, %xmm14
> + movaps %xmm11, %xmm15
> + subpd %xmm14, %xmm15
> + movups dTopMask41+__svml_datanh_data_internal(%rip), %xmm2
> + movaps %xmm11, %xmm5
> + movaps %xmm2, %xmm0
> +
> +/*
> + * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
> + * the upper part UHi being <= 41 bits long. Then we have
> + * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
> + */
> + movaps %xmm14, %xmm6
> + andps %xmm15, %xmm0
> +
> +/*
> + * Check whether |X| < 1, in which case we use the main function.
> + * Otherwise set the rangemask so that the callout will get used.
> + * Note that this will also use the callout for NaNs since not(NaN < 1).
> + */
> + movaps %xmm14, %xmm13
> +
> +/*
> + * Now compute R = 1/(UHi+ULo) * (1 - E) and the error term E
> + * The first FMR is exact (we force R to 12 bits just in case it
> + * isn't already, to make absolutely sure), and since E is ~ 2^-12,
> + * the rounding error in the other one is acceptable.
> + */
> + cvtpd2ps %xmm0, %xmm1
> + subpd %xmm15, %xmm5
> + addpd %xmm14, %xmm6
> + subpd %xmm0, %xmm15
> + cmpnltpd %xmm11, %xmm13
> + subpd %xmm14, %xmm5
> + movmskpd %xmm13, %edx
> + movlhps %xmm1, %xmm1
> + movaps %xmm14, %xmm9
> + rcpps %xmm1, %xmm4
> + addpd %xmm15, %xmm5
> + cmpltpd TinyRange+__svml_datanh_data_internal(%rip), %xmm9
> + cvtps2pd %xmm4, %xmm14
> + andps dTopMask12+__svml_datanh_data_internal(%rip), %xmm14
> + movaps %xmm11, %xmm13
> + mulpd %xmm14, %xmm0
> + mulpd %xmm14, %xmm5
> + subpd %xmm0, %xmm13
> +
> +/*
> + * Split V as well into upper 41 bits and lower part, so that we can get
> + * a preliminary quotient estimate without rounding error.
> + */
> + andps %xmm6, %xmm2
> +
> +/*
> + * Now we feed into the log1p code, using H in place of _VARG1 and
> + * later incorporating L into the reduced argument.
> + * compute 1+x as high, low parts
> + */
> + movaps %xmm11, %xmm0
> + subpd %xmm5, %xmm13
> + subpd %xmm2, %xmm6
> +
> +/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
> + mulpd %xmm14, %xmm2
> + mulpd %xmm6, %xmm14
> +
> +/*
> + * Compute D = E + E^2 + E^3 + E^4 + E^5
> + * = E + (E + E^2) (E + E * E^2)
> + */
> + movaps %xmm13, %xmm6
> + movaps %xmm13, %xmm3
> + mulpd %xmm13, %xmm6
> + mulpd %xmm6, %xmm3
> + addpd %xmm13, %xmm6
> + addpd %xmm13, %xmm3
> + mulpd %xmm3, %xmm6
> + addpd %xmm6, %xmm13
> +
> +/*
> + * Compute R * (VHi + VLo) * (1 + E + E^2 + E^3 + E^4 + E^5)
> + * = R * (VHi + VLo) * (1 + D)
> + * = QHi + (QHi * D + QLo + QLo * D)
> + */
> + movaps %xmm13, %xmm1
> + movaps %xmm11, %xmm5
> + mulpd %xmm14, %xmm13
> + mulpd %xmm2, %xmm1
> + addpd %xmm13, %xmm14
> + addpd %xmm14, %xmm1
> +
> +/*
> + * Now finally accumulate the high and low parts of the
> + * argument to log1p, H + L, with a final compensated summation.
> + */
> + addpd %xmm1, %xmm2
> + maxpd %xmm2, %xmm0
> + minpd %xmm2, %xmm5
> + andps %xmm7, %xmm2
> + movaps %xmm0, %xmm4
> + cmpltpd XThreshold+__svml_datanh_data_internal(%rip), %xmm2
> + addpd %xmm5, %xmm4
> + orps XhMask+__svml_datanh_data_internal(%rip), %xmm2
> + movaps %xmm12, %xmm10
> +
> +/* preserve mantissa, set input exponent to 2^(-10) */
> + movups ExpMask+__svml_datanh_data_internal(%rip), %xmm7
> + andps %xmm2, %xmm4
> + andps %xmm4, %xmm7
> +
> +/* exponent bits */
> + movaps %xmm4, %xmm6
> + orps Two10+__svml_datanh_data_internal(%rip), %xmm7
> + psrlq $20, %xmm6
> +
> +/* reciprocal approximation good to at least 11 bits */
> + cvtpd2ps %xmm7, %xmm1
> + subpd %xmm4, %xmm0
> + mulpd %xmm12, %xmm10
> + addpd %xmm0, %xmm5
> + addpd %xmm12, %xmm10
> + movlhps %xmm1, %xmm1
> + rcpps %xmm1, %xmm15
> + cvtps2pd %xmm15, %xmm3
> +
> +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */
> + movups .FLT_21(%rip), %xmm1
> + addpd %xmm1, %xmm3
> + subpd %xmm1, %xmm3
> +
> +/* exponent of X needed to scale Xl */
> + movdqu ExpMask0+__svml_datanh_data_internal(%rip), %xmm0
> +
> +/*
> + * prepare table index
> + * table lookup
> + */
> + movaps %xmm3, %xmm13
> +
> +/* 2^ (-10-exp(X) ) */
> + movdqu ExpMask2+__svml_datanh_data_internal(%rip), %xmm2
> + pand %xmm4, %xmm0
> + psubq %xmm0, %xmm2
> +
> +/* scale DblRcp */
> + mulpd %xmm3, %xmm2
> +
> +/* argument reduction */
> + mulpd %xmm2, %xmm4
> + mulpd %xmm2, %xmm5
> + subpd %xmm11, %xmm4
> + addpd %xmm5, %xmm4
> +
> +/* polynomial */
> + movups poly_coeff+__svml_datanh_data_internal(%rip), %xmm11
> + psrlq $40, %xmm13
> + mulpd %xmm4, %xmm11
> + movd %xmm13, %eax
> + pshufd $221, %xmm6, %xmm7
> +
> +/* exponent*log(2.0) */
> + movups Threshold+__svml_datanh_data_internal(%rip), %xmm6
> + cmpltpd %xmm3, %xmm6
> + addpd poly_coeff+16+__svml_datanh_data_internal(%rip), %xmm11
> +
> +/* biased exponent in DP format */
> + cvtdq2pd %xmm7, %xmm1
> + movaps %xmm4, %xmm3
> + mulpd %xmm4, %xmm3
> + movups poly_coeff+32+__svml_datanh_data_internal(%rip), %xmm2
> + mulpd %xmm4, %xmm2
> + mulpd %xmm3, %xmm11
> + addpd poly_coeff+48+__svml_datanh_data_internal(%rip), %xmm2
> + addpd %xmm11, %xmm2
> +
> +/* reconstruction */
> + mulpd %xmm2, %xmm3
> + andps Bias+__svml_datanh_data_internal(%rip), %xmm6
> + orps Bias1+__svml_datanh_data_internal(%rip), %xmm6
> + pshufd $2, %xmm13, %xmm14
> + subpd %xmm6, %xmm1
> + addpd %xmm3, %xmm4
> + movd %xmm14, %ecx
> + mulpd L2+__svml_datanh_data_internal(%rip), %xmm1
> + movslq %eax, %rax
> + movslq %ecx, %rcx
> +
> +/* Record the sign for eventual reincorporation. */
> + movups dSign+__svml_datanh_data_internal(%rip), %xmm8
> + andps %xmm12, %xmm8
> + movsd (%rsi,%rax), %xmm0
> +
> +/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
> + orps %xmm8, %xmm10
> + movhpd (%rsi,%rcx), %xmm0
> + andps %xmm9, %xmm10
> + addpd %xmm4, %xmm0
> + addpd %xmm0, %xmm1
> +
> +/* Finally, halve the result and reincorporate the sign */
> + movups dHalf+__svml_datanh_data_internal(%rip), %xmm4
> + movaps %xmm9, %xmm0
> + pxor %xmm8, %xmm4
> + mulpd %xmm1, %xmm4
> + andnps %xmm4, %xmm0
> + orps %xmm10, %xmm0
> + testl %edx, %edx
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm12
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + movq %rbp, %rsp
> + popq %rbp
> + cfi_def_cfa(7, 8)
> + cfi_restore(6)
> + ret
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + movups %xmm12, 32(%rsp)
> + movups %xmm0, 48(%rsp)
> + # LOE rbx r12 r13 r14 r15 edx
> +
> + xorl %eax, %eax
> + movq %r12, 16(%rsp)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22
> + movl %eax, %r12d
> + movq %r13, 8(%rsp)
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22
> + movl %edx, %r13d
> + movq %r14, (%rsp)
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $2, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + movups 48(%rsp), %xmm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r12 r13 r14 r15 xmm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movsd 32(%rsp,%r14,8), %xmm0
> + call atanh@PLT
> + # LOE rbx r14 r15 r12d r13d xmm0
> +
> + movsd %xmm0, 48(%rsp,%r14,8)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx r15 r12d r13d
> +END(_ZGVbN2v_atanh_sse4)
> +
> + .section .rodata, "a"
> + .align 16
> +
> +#ifdef __svml_datanh_data_internal_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2];
> + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2];
> + __declspec(align(16)) VUINT32 poly_coeff[4][2][2];
> + __declspec(align(16)) VUINT32 ExpMask[2][2];
> + __declspec(align(16)) VUINT32 Two10[2][2];
> + __declspec(align(16)) VUINT32 MinLog1p[2][2];
> + __declspec(align(16)) VUINT32 MaxLog1p[2][2];
> + __declspec(align(16)) VUINT32 One[2][2];
> + __declspec(align(16)) VUINT32 SgnMask[2][2];
> + __declspec(align(16)) VUINT32 XThreshold[2][2];
> + __declspec(align(16)) VUINT32 XhMask[2][2];
> + __declspec(align(16)) VUINT32 Threshold[2][2];
> + __declspec(align(16)) VUINT32 Bias[2][2];
> + __declspec(align(16)) VUINT32 Bias1[2][2];
> + __declspec(align(16)) VUINT32 ExpMask0[2][2];
> + __declspec(align(16)) VUINT32 ExpMask2[2][2];
> + __declspec(align(16)) VUINT32 L2[2][2];
> + __declspec(align(16)) VUINT32 dHalf[2][2];
> + __declspec(align(16)) VUINT32 dSign[2][2];
> + __declspec(align(16)) VUINT32 dTopMask12[2][2];
> + __declspec(align(16)) VUINT32 dTopMask41[2][2];
> + __declspec(align(16)) VUINT32 TinyRange[2][2];
> +} __svml_datanh_data_internal;
> +#endif
> +__svml_datanh_data_internal:
> + /* Log_HA_table */
> + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100
> + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a
> + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff
> + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a
> + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb
> + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e
> + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b
> + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af
> + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e
> + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4
> + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597
> + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16
> + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6
> + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362
> + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557
> + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b
> + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed
> + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed
> + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f
> + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce
> + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7
> + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1
> + .quad 0xc086238206e94218, 0xbe1ceee898588610
> + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea
> + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6
> + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6
> + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165
> + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1
> + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b
> + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670
> + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c
> + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c
> + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84
> + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7
> + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b
> + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf
> + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62
> + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b
> + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98
> + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87
> + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff
> + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798
> + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e
> + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde
> + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b
> + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c
> + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98
> + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f
> + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358
> + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380
> + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4
> + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b
> + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2
> + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4
> + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400
> + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7
> + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a
> + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d
> + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b
> + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575
> + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951
> + .quad 0xc086241263e87f50, 0xbe1cf16e74768529
> + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb
> + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa
> + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11
> + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805
> + .quad 0xc08624242f008380, 0xbe1ceea988c5a417
> + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5
> + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38
> + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411
> + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739
> + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42
> + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d
> + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e
> + .quad 0xc086244055d2c968, 0xbe1cef345284c119
> + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219
> + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114
> + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189
> + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f
> + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f
> + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5
> + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31
> + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d
> + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb
> + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7
> + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f
> + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663
> + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2
> + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc
> + .quad 0xc086247419475160, 0xbe1cf03dd9922331
> + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129
> + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6
> + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100
> + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a
> + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7
> + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8
> + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002
> + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4
> + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c
> + .quad 0xc0862495e5179270, 0xbe1cee757f20c326
> + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4
> + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97
> + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb
> + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e
> + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b
> + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80
> + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71
> + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9
> + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139
> + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f
> + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7
> + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d
> + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75
> + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466
> + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55
> + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe
> + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f
> + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968
> + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78
> + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75
> + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2
> + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d
> + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed
> + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f
> + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65
> + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078
> + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a
> + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a
> + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2
> + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc
> + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501
> + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7
> + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c
> + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c
> + .quad 0xc0862507f9448db0, 0xbe1cf082da464994
> + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf
> + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531
> + .quad 0xc08625117667dd78, 0xbe1cf1106599c962
> + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f
> + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6
> + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4
> + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092
> + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd
> + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7
> + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25
> + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d
> + .quad 0xc086252dab033898, 0xbe1cf220bba8861f
> + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2
> + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae
> + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8
> + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171
> + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408
> + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b
> + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268
> + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5
> + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c
> + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc
> + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157
> + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997
> + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff
> + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f
> + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9
> + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d
> + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc
> + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9
> + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5
> + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b
> + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996
> + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945
> + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995
> + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c
> + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947
> + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22
> + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923
> + .quad 0xc08625830381da08, 0xbe1ceef1391a0372
> + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13
> + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83
> + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9
> + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0
> + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81
> + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766
> + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b
> + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2
> + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec
> + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e
> + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7
> + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780
> + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11
> + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219
> + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160
> + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495
> + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5
> + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5
> + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283
> + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889
> + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124
> + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86
> + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092
> + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb
> + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12
> + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7
> + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e
> + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701
> + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812
> + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e
> + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4
> + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12
> + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21
> + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2
> + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece
> + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12
> + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad
> + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3
> + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9
> + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1
> + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9
> + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2
> + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51
> + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e
> + .quad 0xc08626052294df58, 0xbe1cf1b745c57716
> + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23
> + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2
> + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef
> + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5
> + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6
> + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a
> + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545
> + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011
> + .quad 0xc086261e32267e98, 0xbe1cf19917010e96
> + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985
> + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3
> + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c
> + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50
> + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68
> + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9
> + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b
> + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238
> + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e
> + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d
> + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba
> + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279
> + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085
> + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2
> + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1
> + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f
> + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a
> + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39
> + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3
> + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2
> + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa
> + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366
> + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b
> + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0
> + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365
> + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544
> + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9
> + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9
> + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2
> + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c
> + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6
> + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d
> + .quad 0xc08626778c3d4798, 0xbe1cefe260819380
> + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3
> + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa
> + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1
> + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52
> + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd
> + .quad 0xc086268762086350, 0xbe1cefaee1edfa35
> + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936
> + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed
> + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49
> + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e
> + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc
> + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840
> + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be
> + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c
> + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06
> + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e
> + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3
> + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68
> + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5
> + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986
> + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d
> + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26
> + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06
> + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652
> + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f
> + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c
> + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73
> + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7
> + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe
> + .quad 0xc08626c586da9388, 0xbe1cef7de2452430
> + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae
> + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d
> + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3
> + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d
> + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e
> + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64
> + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2
> + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d
> + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab
> + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153
> + .quad 0xc08626e164224880, 0xbe1ceeb431709788
> + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5
> + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b
> + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93
> + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8
> + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2
> + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6
> + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef
> + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339
> + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1
> + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28
> + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f
> + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3
> + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6
> + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6
> + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3
> + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c
> + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445
> + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c
> + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f
> + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802
> + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd
> + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5
> + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570
> + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85
> + .quad 0xc086271f58064068, 0xbe1cef092a785e3f
> + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30
> + .quad 0xc086272438546be8, 0xbe1cf210907ded8b
> + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99
> + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc
> + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5
> + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899
> + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99
> + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe
> + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d
> + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8
> + .quad 0xc086273a05367688, 0xbe1cf18656c50806
> + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a
> + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911
> + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c
> + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80
> + .quad 0xc086274608397868, 0xbe1cf25a328c28e2
> + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8
> + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a
> + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228
> + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c
> + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44
> + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2
> + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4
> + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a
> + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9
> + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627
> + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e
> + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee
> + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad
> + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5
> + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f
> + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312
> + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85
> + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011
> + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7
> + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da
> + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554
> + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377
> + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd
> + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0
> + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766
> + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7
> + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec
> + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc
> + .quad 0xc086278a58297918, 0xbe1cf053073872bf
> + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947
> + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234
> + .quad 0xc086279148685aa0, 0xbe1cf162204794a8
> + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac
> + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3
> + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388
> + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5
> + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f
> + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a
> + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f
> + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f
> + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26
> + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a
> + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81
> + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d
> + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893
> + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0
> + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8
> + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00
> + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2
> + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4
> + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7
> + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3
> + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d
> + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e
> + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93
> + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a
> + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9
> + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f
> + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1
> + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4
> + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb
> + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b
> + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b
> + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96
> + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477
> + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2
> + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c
> + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875
> + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522
> + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57
> + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e
> + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548
> + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305
> + .quad 0xc08627f007f0a408, 0xbe1cf18134625550
> + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11
> + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc
> + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8
> + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe
> + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25
> + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9
> + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c
> + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b
> + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b
> + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c
> + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a
> + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4
> + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256
> + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea
> + .quad 0xc0862810d5af5880, 0xbe1cee622478393d
> + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f
> + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536
> + .quad 0xc086281755366778, 0xbe1cef2edae5837d
> + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9
> + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8
> + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83
> + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4
> + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9
> + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2
> + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d
> + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1
> + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b
> + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02
> + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9
> + .quad 0xc08628311f099420, 0xbe1cef247a9ec596
> + .quad 0xc086283341749490, 0xbe1cef74bbcc488a
> + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e
> + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810
> + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8
> + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065
> + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e
> + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234
> + .quad 0xc08628422284b168, 0xbe1cf0abf7638127
> + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058
> + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c
> + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1
> + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43
> + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09
> + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60
> + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393
> + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7
> + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da
> + .quad 0xc08628573479b220, 0xbe1ceec34cf49523
> + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb
> + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b
> + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d
> + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5
> + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792
> + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8
> + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc
> + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7
> + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98
> + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7
> + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d
> + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248
> + .quad 0xc086287246aab180, 0xbe1cefa7bc194186
> + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9
> + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07
> + .quad 0xc086287879041490, 0xbe1cf034803c8a48
> + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f
> + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7
> + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943
> + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0
> + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da
> + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc
> + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34
> + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486
> + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3
> + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0
> + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d
> + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f
> + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed
> + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d
> + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646
> + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9
> + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f
> + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35
> + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a
> + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464
> + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0
> + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c
> + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d
> + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2
> + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9
> + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979
> + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c
> + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32
> + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2
> + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303
> + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880
> + /*== Log_LA_table ==*/
> + .align 16
> + .quad 0x8000000000000000
> + .quad 0xbf5ff802a9ab10e6
> + .quad 0xbf6ff00aa2b10bc0
> + .quad 0xbf77ee11ebd82e94
> + .quad 0xbf7fe02a6b106789
> + .quad 0xbf83e7295d25a7d9
> + .quad 0xbf87dc475f810a77
> + .quad 0xbf8bcf712c74384c
> + .quad 0xbf8fc0a8b0fc03e4
> + .quad 0xbf91d7f7eb9eebe7
> + .quad 0xbf93cea44346a575
> + .quad 0xbf95c45a51b8d389
> + .quad 0xbf97b91b07d5b11b
> + .quad 0xbf99ace7551cc514
> + .quad 0xbf9b9fc027af9198
> + .quad 0xbf9d91a66c543cc4
> + .quad 0xbf9f829b0e783300
> + .quad 0xbfa0b94f7c196176
> + .quad 0xbfa1b0d98923d980
> + .quad 0xbfa2a7ec2214e873
> + .quad 0xbfa39e87b9febd60
> + .quad 0xbfa494acc34d911c
> + .quad 0xbfa58a5bafc8e4d5
> + .quad 0xbfa67f94f094bd98
> + .quad 0xbfa77458f632dcfc
> + .quad 0xbfa868a83083f6cf
> + .quad 0xbfa95c830ec8e3eb
> + .quad 0xbfaa4fe9ffa3d235
> + .quad 0xbfab42dd711971bf
> + .quad 0xbfac355dd0921f2d
> + .quad 0xbfad276b8adb0b52
> + .quad 0xbfae19070c276016
> + .quad 0xbfaf0a30c01162a6
> + .quad 0xbfaffae9119b9303
> + .quad 0xbfb075983598e471
> + .quad 0xbfb0ed839b5526fe
> + .quad 0xbfb16536eea37ae1
> + .quad 0xbfb1dcb263db1944
> + .quad 0xbfb253f62f0a1417
> + .quad 0xbfb2cb0283f5de1f
> + .quad 0xbfb341d7961bd1d1
> + .quad 0xbfb3b87598b1b6ee
> + .quad 0xbfb42edcbea646f0
> + .quad 0xbfb4a50d3aa1b040
> + .quad 0xbfb51b073f06183f
> + .quad 0xbfb590cafdf01c28
> + .quad 0xbfb60658a93750c4
> + .quad 0xbfb67bb0726ec0fc
> + .quad 0xbfb6f0d28ae56b4c
> + .quad 0xbfb765bf23a6be13
> + .quad 0xbfb7da766d7b12cd
> + .quad 0xbfb84ef898e8282a
> + .quad 0xbfb8c345d6319b21
> + .quad 0xbfb9375e55595ede
> + .quad 0xbfb9ab42462033ad
> + .quad 0xbfba1ef1d8061cd4
> + .quad 0xbfba926d3a4ad563
> + .quad 0xbfbb05b49bee43fe
> + .quad 0xbfbb78c82bb0eda1
> + .quad 0xbfbbeba818146765
> + .quad 0xbfbc5e548f5bc743
> + .quad 0xbfbcd0cdbf8c13e1
> + .quad 0xbfbd4313d66cb35d
> + .quad 0xbfbdb5270187d927
> + .quad 0xbfbe27076e2af2e6
> + .quad 0xbfbe98b549671467
> + .quad 0xbfbf0a30c01162a6
> + .quad 0xbfbf7b79fec37ddf
> + .quad 0xbfbfec9131dbeabb
> + .quad 0xbfc02ebb42bf3d4b
> + .quad 0xbfc0671512ca596e
> + .quad 0xbfc09f561ee719c3
> + .quad 0xbfc0d77e7cd08e59
> + .quad 0xbfc10f8e422539b1
> + .quad 0xbfc14785846742ac
> + .quad 0xbfc17f6458fca611
> + .quad 0xbfc1b72ad52f67a0
> + .quad 0xbfc1eed90e2dc2c3
> + .quad 0xbfc2266f190a5acb
> + .quad 0xbfc25ded0abc6ad2
> + .quad 0xbfc29552f81ff523
> + .quad 0xbfc2cca0f5f5f251
> + .quad 0xbfc303d718e47fd3
> + .quad 0xbfc33af575770e4f
> + .quad 0xbfc371fc201e8f74
> + .quad 0xbfc3a8eb2d31a376
> + .quad 0xbfc3dfc2b0ecc62a
> + .quad 0xbfc41682bf727bc0
> + .quad 0xbfc44d2b6ccb7d1e
> + .quad 0xbfc483bccce6e3dd
> + .quad 0xbfc4ba36f39a55e5
> + .quad 0xbfc4f099f4a230b2
> + .quad 0xbfc526e5e3a1b438
> + .quad 0xbfc55d1ad4232d6f
> + .quad 0xbfc59338d9982086
> + .quad 0xbfc5c940075972b9
> + .quad 0xbfc5ff3070a793d4
> + .quad 0xbfc6350a28aaa758
> + .quad 0xbfc66acd4272ad51
> + .quad 0xbfc6a079d0f7aad2
> + .quad 0xbfc6d60fe719d21d
> + .quad 0xbfc70b8f97a1aa75
> + .quad 0xbfc740f8f54037a5
> + .quad 0xbfc7764c128f2127
> + .quad 0xbfc7ab890210d909
> + .quad 0xbfc7e0afd630c274
> + .quad 0xbfc815c0a14357eb
> + .quad 0xbfc84abb75865139
> + .quad 0xbfc87fa06520c911
> + .quad 0xbfc8b46f8223625b
> + .quad 0xbfc8e928de886d41
> + .quad 0xbfc91dcc8c340bde
> + .quad 0xbfc9525a9cf456b4
> + .quad 0xbfc986d3228180ca
> + .quad 0xbfc9bb362e7dfb83
> + .quad 0xbfc9ef83d2769a34
> + .quad 0xbfca23bc1fe2b563
> + .quad 0xbfca57df28244dcd
> + .quad 0xbfca8becfc882f19
> + .quad 0xbfcabfe5ae46124c
> + .quad 0xbfcaf3c94e80bff3
> + .quad 0xbfcb2797ee46320c
> + .quad 0xbfcb5b519e8fb5a4
> + .quad 0xbfcb8ef670420c3b
> + .quad 0xbfcbc286742d8cd6
> + .quad 0xbfcbf601bb0e44e2
> + .quad 0xbfcc2968558c18c1
> + .quad 0xbfcc5cba543ae425
> + .quad 0xbfcc8ff7c79a9a22
> + .quad 0xbfccc320c0176502
> + .quad 0xbfccf6354e09c5dc
> + .quad 0xbfcd293581b6b3e7
> + .quad 0xbfcd5c216b4fbb91
> + .quad 0xbfcd8ef91af31d5e
> + .quad 0xbfcdc1bca0abec7d
> + .quad 0xbfcdf46c0c722d2f
> + .quad 0xbfce27076e2af2e6
> + .quad 0xbfce598ed5a87e2f
> + .quad 0xbfce8c0252aa5a60
> + .quad 0xbfcebe61f4dd7b0b
> + .quad 0xbfcef0adcbdc5936
> + .quad 0xbfcf22e5e72f105d
> + .quad 0xbfcf550a564b7b37
> + .quad 0xbfcf871b28955045
> + .quad 0xbfcfb9186d5e3e2b
> + .quad 0xbfcfeb0233e607cc
> + .quad 0xbfd00e6c45ad501d
> + .quad 0xbfd0274dc16c232f
> + .quad 0xbfd0402594b4d041
> + .quad 0xbfd058f3c703ebc6
> + .quad 0xbfd071b85fcd590d
> + .quad 0xbfd08a73667c57af
> + .quad 0xbfd0a324e27390e3
> + .quad 0xbfd0bbccdb0d24bd
> + .quad 0xbfd0d46b579ab74b
> + .quad 0xbfd0ed005f657da4
> + .quad 0xbfd1058bf9ae4ad5
> + .quad 0xbfd11e0e2dad9cb7
> + .quad 0xbfd136870293a8b0
> + .quad 0xbfd14ef67f88685a
> + .quad 0xbfd1675cababa60e
> + .quad 0xbfd17fb98e15095d
> + .quad 0xbfd1980d2dd4236f
> + .quad 0xbfd1b05791f07b49
> + .quad 0xbfd1c898c16999fb
> + .quad 0xbfd1e0d0c33716be
> + .quad 0xbfd1f8ff9e48a2f3
> + .quad 0xbfd211255986160c
> + .quad 0xbfd22941fbcf7966
> + .quad 0xbfd241558bfd1404
> + .quad 0xbfd2596010df763a
> + .quad 0xbfd27161913f853d
> + .quad 0xbfd2895a13de86a3
> + .quad 0xbfd2a1499f762bc9
> + .quad 0xbfd2b9303ab89d25
> + .quad 0xbfd2d10dec508583
> + .quad 0xbfd2e8e2bae11d31
> + .quad 0xbfd300aead06350c
> + .quad 0xbfd31871c9544185
> + .quad 0xbfd3302c16586588
> + .quad 0xbfd347dd9a987d55
> + .quad 0xbfd35f865c93293e
> + .quad 0xbfd3772662bfd85b
> + .quad 0xbfd38ebdb38ed321
> + .quad 0xbfd3a64c556945ea
> + .quad 0xbfd3bdd24eb14b6a
> + .quad 0xbfd3d54fa5c1f710
> + .quad 0xbfd3ecc460ef5f50
> + .quad 0xbfd404308686a7e4
> + .quad 0xbfd41b941cce0bee
> + .quad 0xbfd432ef2a04e814
> + .quad 0xbfd44a41b463c47c
> + .quad 0xbfd4618bc21c5ec2
> + .quad 0xbfd478cd5959b3d9
> + .quad 0xbfd49006804009d1
> + .quad 0xbfd4a7373cecf997
> + .quad 0xbfd4be5f957778a1
> + .quad 0xbfd4d57f8fefe27f
> + .quad 0xbfd4ec973260026a
> + .quad 0xbfd503a682cb1cb3
> + .quad 0xbfd51aad872df82d
> + .quad 0xbfd531ac457ee77e
> + .quad 0xbfd548a2c3add263
> + .quad 0xbfd55f9107a43ee2
> + .quad 0xbfd5767717455a6c
> + .quad 0xbfd58d54f86e02f2
> + .quad 0xbfd5a42ab0f4cfe2
> + .quad 0xbfd5baf846aa1b19
> + .quad 0xbfd5d1bdbf5809ca
> + .quad 0xbfd5e87b20c2954a
> + .quad 0xbfd5ff3070a793d4
> + .quad 0xbfd615ddb4bec13c
> + .quad 0xbfd62c82f2b9c795
> + .quad 0x3fd61965cdb02c1f
> + .quad 0x3fd602d08af091ec
> + .quad 0x3fd5ec433d5c35ae
> + .quad 0x3fd5d5bddf595f30
> + .quad 0x3fd5bf406b543db2
> + .quad 0x3fd5a8cadbbedfa1
> + .quad 0x3fd5925d2b112a59
> + .quad 0x3fd57bf753c8d1fb
> + .quad 0x3fd565995069514c
> + .quad 0x3fd54f431b7be1a9
> + .quad 0x3fd538f4af8f72fe
> + .quad 0x3fd522ae0738a3d8
> + .quad 0x3fd50c6f1d11b97c
> + .quad 0x3fd4f637ebba9810
> + .quad 0x3fd4e0086dd8baca
> + .quad 0x3fd4c9e09e172c3c
> + .quad 0x3fd4b3c077267e9a
> + .quad 0x3fd49da7f3bcc41f
> + .quad 0x3fd487970e958770
> + .quad 0x3fd4718dc271c41b
> + .quad 0x3fd45b8c0a17df13
> + .quad 0x3fd44591e0539f49
> + .quad 0x3fd42f9f3ff62642
> + .quad 0x3fd419b423d5e8c7
> + .quad 0x3fd403d086cea79c
> + .quad 0x3fd3edf463c1683e
> + .quad 0x3fd3d81fb5946dba
> + .quad 0x3fd3c25277333184
> + .quad 0x3fd3ac8ca38e5c5f
> + .quad 0x3fd396ce359bbf54
> + .quad 0x3fd3811728564cb2
> + .quad 0x3fd36b6776be1117
> + .quad 0x3fd355bf1bd82c8b
> + .quad 0x3fd3401e12aecba1
> + .quad 0x3fd32a84565120a8
> + .quad 0x3fd314f1e1d35ce4
> + .quad 0x3fd2ff66b04ea9d4
> + .quad 0x3fd2e9e2bce12286
> + .quad 0x3fd2d46602adccee
> + .quad 0x3fd2bef07cdc9354
> + .quad 0x3fd2a982269a3dbf
> + .quad 0x3fd2941afb186b7c
> + .quad 0x3fd27ebaf58d8c9d
> + .quad 0x3fd269621134db92
> + .quad 0x3fd25410494e56c7
> + .quad 0x3fd23ec5991eba49
> + .quad 0x3fd22981fbef797b
> + .quad 0x3fd214456d0eb8d4
> + .quad 0x3fd1ff0fe7cf47a7
> + .quad 0x3fd1e9e1678899f4
> + .quad 0x3fd1d4b9e796c245
> + .quad 0x3fd1bf99635a6b95
> + .quad 0x3fd1aa7fd638d33f
> + .quad 0x3fd1956d3b9bc2fa
> + .quad 0x3fd180618ef18adf
> + .quad 0x3fd16b5ccbacfb73
> + .quad 0x3fd1565eed455fc3
> + .quad 0x3fd14167ef367783
> + .quad 0x3fd12c77cd00713b
> + .quad 0x3fd1178e8227e47c
> + .quad 0x3fd102ac0a35cc1c
> + .quad 0x3fd0edd060b78081
> + .quad 0x3fd0d8fb813eb1ef
> + .quad 0x3fd0c42d676162e3
> + .quad 0x3fd0af660eb9e279
> + .quad 0x3fd09aa572e6c6d4
> + .quad 0x3fd085eb8f8ae797
> + .quad 0x3fd07138604d5862
> + .quad 0x3fd05c8be0d9635a
> + .quad 0x3fd047e60cde83b8
> + .quad 0x3fd03346e0106062
> + .quad 0x3fd01eae5626c691
> + .quad 0x3fd00a1c6adda473
> + .quad 0x3fcfeb2233ea07cd
> + .quad 0x3fcfc218be620a5e
> + .quad 0x3fcf991c6cb3b379
> + .quad 0x3fcf702d36777df0
> + .quad 0x3fcf474b134df229
> + .quad 0x3fcf1e75fadf9bde
> + .quad 0x3fcef5ade4dcffe6
> + .quad 0x3fceccf2c8fe920a
> + .quad 0x3fcea4449f04aaf5
> + .quad 0x3fce7ba35eb77e2a
> + .quad 0x3fce530effe71012
> + .quad 0x3fce2a877a6b2c12
> + .quad 0x3fce020cc6235ab5
> + .quad 0x3fcdd99edaf6d7e9
> + .quad 0x3fcdb13db0d48940
> + .quad 0x3fcd88e93fb2f450
> + .quad 0x3fcd60a17f903515
> + .quad 0x3fcd38666871f465
> + .quad 0x3fcd1037f2655e7b
> + .quad 0x3fcce816157f1988
> + .quad 0x3fccc000c9db3c52
> + .quad 0x3fcc97f8079d44ec
> + .quad 0x3fcc6ffbc6f00f71
> + .quad 0x3fcc480c0005ccd1
> + .quad 0x3fcc2028ab17f9b4
> + .quad 0x3fcbf851c067555f
> + .quad 0x3fcbd087383bd8ad
> + .quad 0x3fcba8c90ae4ad19
> + .quad 0x3fcb811730b823d2
> + .quad 0x3fcb5971a213acdb
> + .quad 0x3fcb31d8575bce3d
> + .quad 0x3fcb0a4b48fc1b46
> + .quad 0x3fcae2ca6f672bd4
> + .quad 0x3fcabb55c31693ad
> + .quad 0x3fca93ed3c8ad9e3
> + .quad 0x3fca6c90d44b704e
> + .quad 0x3fca454082e6ab05
> + .quad 0x3fca1dfc40f1b7f1
> + .quad 0x3fc9f6c407089664
> + .quad 0x3fc9cf97cdce0ec3
> + .quad 0x3fc9a8778debaa38
> + .quad 0x3fc981634011aa75
> + .quad 0x3fc95a5adcf7017f
> + .quad 0x3fc9335e5d594989
> + .quad 0x3fc90c6db9fcbcd9
> + .quad 0x3fc8e588ebac2dbf
> + .quad 0x3fc8beafeb38fe8c
> + .quad 0x3fc897e2b17b19a5
> + .quad 0x3fc871213750e994
> + .quad 0x3fc84a6b759f512f
> + .quad 0x3fc823c16551a3c2
> + .quad 0x3fc7fd22ff599d4f
> + .quad 0x3fc7d6903caf5ad0
> + .quad 0x3fc7b0091651528c
> + .quad 0x3fc7898d85444c73
> + .quad 0x3fc7631d82935a86
> + .quad 0x3fc73cb9074fd14d
> + .quad 0x3fc716600c914054
> + .quad 0x3fc6f0128b756abc
> + .quad 0x3fc6c9d07d203fc7
> + .quad 0x3fc6a399dabbd383
> + .quad 0x3fc67d6e9d785771
> + .quad 0x3fc6574ebe8c133a
> + .quad 0x3fc6313a37335d76
> + .quad 0x3fc60b3100b09476
> + .quad 0x3fc5e533144c1719
> + .quad 0x3fc5bf406b543db2
> + .quad 0x3fc59958ff1d52f1
> + .quad 0x3fc5737cc9018cdd
> + .quad 0x3fc54dabc26105d2
> + .quad 0x3fc527e5e4a1b58d
> + .quad 0x3fc5022b292f6a45
> + .quad 0x3fc4dc7b897bc1c8
> + .quad 0x3fc4b6d6fefe22a4
> + .quad 0x3fc4913d8333b561
> + .quad 0x3fc46baf0f9f5db7
> + .quad 0x3fc4462b9dc9b3dc
> + .quad 0x3fc420b32740fdd4
> + .quad 0x3fc3fb45a59928cc
> + .quad 0x3fc3d5e3126bc27f
> + .quad 0x3fc3b08b6757f2a9
> + .quad 0x3fc38b3e9e027479
> + .quad 0x3fc365fcb0159016
> + .quad 0x3fc340c59741142e
> + .quad 0x3fc31b994d3a4f85
> + .quad 0x3fc2f677cbbc0a96
> + .quad 0x3fc2d1610c86813a
> + .quad 0x3fc2ac55095f5c59
> + .quad 0x3fc28753bc11aba5
> + .quad 0x3fc2625d1e6ddf57
> + .quad 0x3fc23d712a49c202
> + .quad 0x3fc2188fd9807263
> + .quad 0x3fc1f3b925f25d41
> + .quad 0x3fc1ceed09853752
> + .quad 0x3fc1aa2b7e23f72a
> + .quad 0x3fc185747dbecf34
> + .quad 0x3fc160c8024b27b1
> + .quad 0x3fc13c2605c398c3
> + .quad 0x3fc1178e8227e47c
> + .quad 0x3fc0f301717cf0fb
> + .quad 0x3fc0ce7ecdccc28d
> + .quad 0x3fc0aa06912675d5
> + .quad 0x3fc08598b59e3a07
> + .quad 0x3fc06135354d4b18
> + .quad 0x3fc03cdc0a51ec0d
> + .quad 0x3fc0188d2ecf6140
> + .quad 0x3fbfe89139dbd566
> + .quad 0x3fbfa01c9db57ce2
> + .quad 0x3fbf57bc7d9005db
> + .quad 0x3fbf0f70cdd992e3
> + .quad 0x3fbec739830a1120
> + .quad 0x3fbe7f1691a32d3e
> + .quad 0x3fbe3707ee30487b
> + .quad 0x3fbdef0d8d466db9
> + .quad 0x3fbda727638446a2
> + .quad 0x3fbd5f55659210e2
> + .quad 0x3fbd179788219364
> + .quad 0x3fbccfedbfee13a8
> + .quad 0x3fbc885801bc4b23
> + .quad 0x3fbc40d6425a5cb1
> + .quad 0x3fbbf968769fca11
> + .quad 0x3fbbb20e936d6974
> + .quad 0x3fbb6ac88dad5b1c
> + .quad 0x3fbb23965a52ff00
> + .quad 0x3fbadc77ee5aea8c
> + .quad 0x3fba956d3ecade63
> + .quad 0x3fba4e7640b1bc38
> + .quad 0x3fba0792e9277cac
> + .quad 0x3fb9c0c32d4d2548
> + .quad 0x3fb97a07024cbe74
> + .quad 0x3fb9335e5d594989
> + .quad 0x3fb8ecc933aeb6e8
> + .quad 0x3fb8a6477a91dc29
> + .quad 0x3fb85fd927506a48
> + .quad 0x3fb8197e2f40e3f0
> + .quad 0x3fb7d33687c293c9
> + .quad 0x3fb78d02263d82d3
> + .quad 0x3fb746e100226ed9
> + .quad 0x3fb700d30aeac0e1
> + .quad 0x3fb6bad83c1883b6
> + .quad 0x3fb674f089365a7a
> + .quad 0x3fb62f1be7d77743
> + .quad 0x3fb5e95a4d9791cb
> + .quad 0x3fb5a3abb01ade25
> + .quad 0x3fb55e10050e0384
> + .quad 0x3fb518874226130a
> + .quad 0x3fb4d3115d207eac
> + .quad 0x3fb48dae4bc31018
> + .quad 0x3fb4485e03dbdfad
> + .quad 0x3fb403207b414b7f
> + .quad 0x3fb3bdf5a7d1ee64
> + .quad 0x3fb378dd7f749714
> + .quad 0x3fb333d7f8183f4b
> + .quad 0x3fb2eee507b40301
> + .quad 0x3fb2aa04a44717a5
> + .quad 0x3fb26536c3d8c369
> + .quad 0x3fb2207b5c78549e
> + .quad 0x3fb1dbd2643d190b
> + .quad 0x3fb1973bd1465567
> + .quad 0x3fb152b799bb3cc9
> + .quad 0x3fb10e45b3cae831
> + .quad 0x3fb0c9e615ac4e17
> + .quad 0x3fb08598b59e3a07
> + .quad 0x3fb0415d89e74444
> + .quad 0x3faffa6911ab9301
> + .quad 0x3faf723b517fc523
> + .quad 0x3faeea31c006b87c
> + .quad 0x3fae624c4a0b5e1b
> + .quad 0x3fadda8adc67ee4e
> + .quad 0x3fad52ed6405d86f
> + .quad 0x3faccb73cdddb2cc
> + .quad 0x3fac441e06f72a9e
> + .quad 0x3fabbcebfc68f420
> + .quad 0x3fab35dd9b58baad
> + .quad 0x3faaaef2d0fb10fc
> + .quad 0x3faa282b8a936171
> + .quad 0x3fa9a187b573de7c
> + .quad 0x3fa91b073efd7314
> + .quad 0x3fa894aa149fb343
> + .quad 0x3fa80e7023d8ccc4
> + .quad 0x3fa788595a3577ba
> + .quad 0x3fa70265a550e777
> + .quad 0x3fa67c94f2d4bb58
> + .quad 0x3fa5f6e73078efb8
> + .quad 0x3fa5715c4c03ceef
> + .quad 0x3fa4ebf43349e26f
> + .quad 0x3fa466aed42de3ea
> + .quad 0x3fa3e18c1ca0ae92
> + .quad 0x3fa35c8bfaa1306b
> + .quad 0x3fa2d7ae5c3c5bae
> + .quad 0x3fa252f32f8d183f
> + .quad 0x3fa1ce5a62bc353a
> + .quad 0x3fa149e3e4005a8d
> + .quad 0x3fa0c58fa19dfaaa
> + .quad 0x3fa0415d89e74444
> + .quad 0x3f9f7a9b16782856
> + .quad 0x3f9e72bf2813ce51
> + .quad 0x3f9d6b2725979802
> + .quad 0x3f9c63d2ec14aaf2
> + .quad 0x3f9b5cc258b718e6
> + .quad 0x3f9a55f548c5c43f
> + .quad 0x3f994f6b99a24475
> + .quad 0x3f98492528c8cabf
> + .quad 0x3f974321d3d006d3
> + .quad 0x3f963d6178690bd6
> + .quad 0x3f9537e3f45f3565
> + .quad 0x3f9432a925980cc1
> + .quad 0x3f932db0ea132e22
> + .quad 0x3f9228fb1fea2e28
> + .quad 0x3f912487a5507f70
> + .quad 0x3f90205658935847
> + .quad 0x3f8e38ce3033310c
> + .quad 0x3f8c317384c75f06
> + .quad 0x3f8a2a9c6c170462
> + .quad 0x3f882448a388a2aa
> + .quad 0x3f861e77e8b53fc6
> + .quad 0x3f841929f96832f0
> + .quad 0x3f82145e939ef1e9
> + .quad 0x3f8010157588de71
> + .quad 0x3f7c189cbb0e27fb
> + .quad 0x3f78121214586b54
> + .quad 0x3f740c8a747878e2
> + .quad 0x3f70080559588b35
> + .quad 0x3f680904828985c0
> + .quad 0x3f60040155d5889e
> + .quad 0x3f50020055655889
> + .quad 0x0000000000000000
> + /*== poly_coeff[4] ==*/
> + .align 16
> + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */
> + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */
> + .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */
> + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */
> + /*== ExpMask ==*/
> + .align 16
> + .quad 0x000fffffffffffff, 0x000fffffffffffff
> + /*== Two10 ==*/
> + .align 16
> + .quad 0x3f50000000000000, 0x3f50000000000000
> + /*== MinLog1p = -1+2^(-53) ==*/
> + .align 16
> + .quad 0xbfefffffffffffff, 0xbfefffffffffffff
> + /*== MaxLog1p ==*/
> + .align 16
> + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000
> + /*== One ==*/
> + .align 16
> + .quad 0x3ff0000000000000, 0x3ff0000000000000
> + /*== SgnMask ==*/
> + .align 16
> + .quad 0x7fffffffffffffff, 0x7fffffffffffffff
> + /*== XThreshold ==*/
> + .align 16
> + .quad 0x3e00000000000000, 0x3e00000000000000
> + /*== XhMask ==*/
> + .align 16
> + .quad 0xfffffffffffffc00, 0xfffffffffffffc00
> + /*== Threshold ==*/
> + .align 16
> + .quad 0x4086a00000000000, 0x4086a00000000000
> + /*== Bias ==*/
> + .align 16
> + .quad 0x408ff80000000000, 0x408ff80000000000
> + /*== Bias1 ==*/
> + .align 16
> + .quad 0x408ff00000000000, 0x408ff00000000000
> + /*== ExpMask ==*/
> + .align 16
> + .quad 0x7ff0000000000000, 0x7ff0000000000000
> + /*== ExpMask2 ==*/
> + .align 16
> + .quad 0x7f40000000000000, 0x7f40000000000000
> + /*== L2L ==*/
> + .align 16
> + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF
> + /*== dHalf ==*/
> + .align 16
> + .quad 0x3FE0000000000000, 0x3FE0000000000000
> + /*== dSign ==*/
> + .align 16
> + .quad 0x8000000000000000, 0x8000000000000000
> + /*== dTopMask12 ==*/
> + .align 16
> + .quad 0xFFFFFE0000000000, 0xFFFFFE0000000000
> + /*== dTopMask41 ==*/
> + .align 16
> + .quad 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000
> + /*== dTinyRange ==*/
> + .align 16
> + .quad 0x0350000000000000, 0x0350000000000000
> + .align 16
> + .type __svml_datanh_data_internal,@object
> + .size __svml_datanh_data_internal,.-__svml_datanh_data_internal
> + .align 16
> +
> +.FLT_21:
> + .long 0x00000000,0x43380000,0x00000000,0x43380000
> + .type .FLT_21,@object
> + .size .FLT_21,16
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core-sse.S
> new file mode 100644
> index 0000000000..a39cbb7595
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core-sse.S
> @@ -0,0 +1,20 @@
> +/* SSE version of vectorized atanh, vector length is 4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVdN4v_atanh _ZGVdN4v_atanh_sse_wrapper
> +#include "../svml_d_atanh4_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core.c
> new file mode 100644
> index 0000000000..e8ef343ae7
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core.c
> @@ -0,0 +1,27 @@
> +/* Multiple versions of vectorized atanh, vector length is 4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVdN4v_atanh
> +#include "ifunc-mathvec-avx2.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVdN4v_atanh, __GI__ZGVdN4v_atanh, __redirect__ZGVdN4v_atanh)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core_avx2.S
> new file mode 100644
> index 0000000000..1230029da2
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh4_core_avx2.S
> @@ -0,0 +1,1479 @@
> +/* Function atanh vectorized with AVX2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_datanh_data_internal
> + */
> +#define Log_HA_table 0
> +#define Log_LA_table 8224
> +#define poly_coeff 12352
> +#define ExpMask 12480
> +#define Two10 12512
> +#define MinLog1p 12544
> +#define MaxLog1p 12576
> +#define One 12608
> +#define SgnMask 12640
> +#define XThreshold 12672
> +#define XhMask 12704
> +#define Threshold 12736
> +#define Bias 12768
> +#define Bias1 12800
> +#define ExpMask0 12832
> +#define ExpMask2 12864
> +#define L2 12896
> +#define dHalf 12928
> +#define dSign 12960
> +#define dTopMask12 12992
> +#define dTopMask41 13024
> +#define TinyRange 13056
> +
> +/* Lookup bias for data table __svml_datanh_data_internal. */
> +#define Table_Lookup_Bias -0x405fe0
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.avx2,"ax",@progbits
> +ENTRY(_ZGVdN4v_atanh_avx2)
> + pushq %rbp
> + cfi_def_cfa_offset(16)
> + movq %rsp, %rbp
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> + andq $-32, %rsp
> + subq $96, %rsp
> + lea Table_Lookup_Bias+__svml_datanh_data_internal(%rip), %r8
> + vmovupd SgnMask+__svml_datanh_data_internal(%rip), %ymm7
> +
> +/* Load the constant 1 and a sign mask */
> + vmovupd One+__svml_datanh_data_internal(%rip), %ymm11
> + vmovapd %ymm0, %ymm12
> +
> +/* Strip off the sign, so treat X as positive until right at the end */
> + vandpd %ymm7, %ymm12, %ymm0
> + vsubpd %ymm0, %ymm11, %ymm6
> +
> +/*
> + * Check whether |X| < 1, in which case we use the main function.
> + * Otherwise set the rangemask so that the callout will get used.
> + * Note that this will also use the callout for NaNs since not(NaN < 1).
> + */
> + vcmpnlt_uqpd %ymm11, %ymm0, %ymm13
> + vcmplt_oqpd TinyRange+__svml_datanh_data_internal(%rip), %ymm0, %ymm10
> + vsubpd %ymm6, %ymm11, %ymm15
> +
> +/*
> + * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
> + * the upper part UHi being <= 41 bits long. Then we have
> + * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
> + */
> + vaddpd %ymm0, %ymm0, %ymm3
> + vcvtpd2ps %ymm6, %xmm5
> + vsubpd %ymm0, %ymm15, %ymm1
> + vrcpps %xmm5, %xmm4
> + vmovapd %ymm12, %ymm14
> + vfmadd213pd %ymm12, %ymm12, %ymm14
> + vcvtps2pd %xmm4, %ymm2
> +
> +/* Record the sign for eventual reincorporation. */
> + vandpd dSign+__svml_datanh_data_internal(%rip), %ymm12, %ymm9
> +
> +/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
> + vorpd %ymm9, %ymm14, %ymm8
> + vandpd dTopMask12+__svml_datanh_data_internal(%rip), %ymm2, %ymm14
> +
> +/* No need to split dU when FMA is available */
> + vfnmadd213pd %ymm11, %ymm14, %ymm6
> + vfnmadd231pd %ymm14, %ymm1, %ymm6
> +
> +/*
> + * Compute D = E + E^2 + E^3 + E^4 + E^5
> + * = E + (E + E^2) (E + E * E^2)
> + * Only saves when FMA is available
> + */
> + vmovapd %ymm11, %ymm0
> + vmovapd %ymm6, %ymm5
> + vfmadd231pd %ymm6, %ymm6, %ymm0
> + vfmadd213pd %ymm6, %ymm6, %ymm5
> + vfmadd213pd %ymm11, %ymm0, %ymm5
> + vmovmskpd %ymm13, %eax
> +
> +/*
> + * Split V as well into upper 41 bits and lower part, so that we can get
> + * a preliminary quotient estimate without rounding error.
> + */
> + vandpd dTopMask41+__svml_datanh_data_internal(%rip), %ymm3, %ymm13
> + vsubpd %ymm13, %ymm3, %ymm15
> +
> +/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
> + vmulpd %ymm13, %ymm14, %ymm2
> + vmulpd %ymm5, %ymm6, %ymm0
> + vmulpd %ymm15, %ymm14, %ymm4
> +
> +/* 2^ (-10-exp(X) ) */
> + vmovupd ExpMask2+__svml_datanh_data_internal(%rip), %ymm15
> +
> +/*
> + * Compute R * (VHi + VLo) * (1 + E + E^2 + E^3 + E^4 + E^5)
> + * = R * (VHi + VLo) * (1 + D)
> + * = QHi + (QHi * D + QLo + QLo * D)
> + */
> + vmulpd %ymm0, %ymm2, %ymm6
> + vfmadd213pd %ymm4, %ymm4, %ymm0
> + vaddpd %ymm0, %ymm6, %ymm5
> +
> +/*
> + * Now finally accumulate the high and low parts of the
> + * argument to log1p, H + L, with a final compensated summation.
> + */
> + vaddpd %ymm5, %ymm2, %ymm4
> +
> +/*
> + * Now we feed into the log1p code, using H in place of _VARG1 and
> + * later incorporating L into the reduced argument.
> + * compute 1+x as high, low parts
> + */
> + vmaxpd %ymm4, %ymm11, %ymm1
> + vminpd %ymm4, %ymm11, %ymm3
> + vandpd %ymm7, %ymm4, %ymm7
> + vcmplt_oqpd XThreshold+__svml_datanh_data_internal(%rip), %ymm7, %ymm0
> + vaddpd %ymm3, %ymm1, %ymm5
> + vorpd XhMask+__svml_datanh_data_internal(%rip), %ymm0, %ymm4
> + vandpd %ymm4, %ymm5, %ymm5
> +
> +/* preserve mantissa, set input exponent to 2^(-10) */
> + vandpd ExpMask+__svml_datanh_data_internal(%rip), %ymm5, %ymm6
> + vorpd Two10+__svml_datanh_data_internal(%rip), %ymm6, %ymm7
> +
> +/* reciprocal approximation good to at least 11 bits */
> + vcvtpd2ps %ymm7, %xmm13
> + vsubpd %ymm5, %ymm1, %ymm2
> + vrcpps %xmm13, %xmm14
> + vaddpd %ymm2, %ymm3, %ymm4
> + vcvtps2pd %xmm14, %ymm3
> +
> +/* exponent bits */
> + vpsrlq $20, %ymm5, %ymm2
> +
> +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */
> + vroundpd $0, %ymm3, %ymm3
> +
> +/*
> + * prepare table index
> + * table lookup
> + */
> + vpsrlq $40, %ymm3, %ymm13
> +
> +/* exponent of X needed to scale Xl */
> + vandps ExpMask0+__svml_datanh_data_internal(%rip), %ymm5, %ymm0
> + vpsubq %ymm0, %ymm15, %ymm6
> +
> +/* Finally, halve the result and reincorporate the sign */
> + vxorpd dHalf+__svml_datanh_data_internal(%rip), %ymm9, %ymm9
> + vmovd %xmm13, %edx
> + vextractf128 $1, %ymm13, %xmm0
> + movslq %edx, %rdx
> + vpextrd $2, %xmm13, %ecx
> + movslq %ecx, %rcx
> + vmovd %xmm0, %esi
> + vmovsd (%r8,%rdx), %xmm14
> + vmovhpd (%r8,%rcx), %xmm14, %xmm15
> +
> +/* exponent*log(2.0) */
> + vmovupd Threshold+__svml_datanh_data_internal(%rip), %ymm14
> + movslq %esi, %rsi
> + vpextrd $2, %xmm0, %edi
> + movslq %edi, %rdi
> + vextractf128 $1, %ymm2, %xmm1
> + vshufps $221, %xmm1, %xmm2, %xmm7
> +
> +/* scale DblRcp */
> + vmulpd %ymm6, %ymm3, %ymm2
> + vmovsd (%r8,%rsi), %xmm6
> +
> +/* biased exponent in DP format */
> + vcvtdq2pd %xmm7, %ymm1
> + vmovhpd (%r8,%rdi), %xmm6, %xmm7
> + vcmplt_oqpd %ymm3, %ymm14, %ymm3
> +
> +/* argument reduction */
> + vfmsub213pd %ymm11, %ymm2, %ymm5
> + vmulpd %ymm2, %ymm4, %ymm11
> + vmovupd poly_coeff+64+__svml_datanh_data_internal(%rip), %ymm2
> + vaddpd %ymm11, %ymm5, %ymm5
> + vandpd Bias+__svml_datanh_data_internal(%rip), %ymm3, %ymm3
> + vorpd Bias1+__svml_datanh_data_internal(%rip), %ymm3, %ymm6
> + vsubpd %ymm6, %ymm1, %ymm1
> + vfmadd213pd poly_coeff+96+__svml_datanh_data_internal(%rip), %ymm5, %ymm2
> + vmulpd %ymm5, %ymm5, %ymm4
> + vmulpd L2+__svml_datanh_data_internal(%rip), %ymm1, %ymm3
> +
> +/* polynomial */
> + vmovupd poly_coeff+__svml_datanh_data_internal(%rip), %ymm1
> + vfmadd213pd poly_coeff+32+__svml_datanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213pd %ymm2, %ymm4, %ymm1
> +
> +/* reconstruction */
> + vfmadd213pd %ymm5, %ymm4, %ymm1
> + vinsertf128 $1, %xmm7, %ymm15, %ymm0
> + vaddpd %ymm1, %ymm0, %ymm0
> + vaddpd %ymm0, %ymm3, %ymm6
> + vmulpd %ymm6, %ymm9, %ymm0
> + vblendvpd %ymm10, %ymm8, %ymm0, %ymm0
> + testl %eax, %eax
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm12
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + movq %rbp, %rsp
> + popq %rbp
> + cfi_def_cfa(7, 8)
> + cfi_restore(6)
> + ret
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + vmovupd %ymm12, 32(%rsp)
> + vmovupd %ymm0, 64(%rsp)
> + # LOE rbx r12 r13 r14 r15 eax ymm0
> +
> + xorl %edx, %edx
> + # LOE rbx r12 r13 r14 r15 eax edx
> +
> + vzeroupper
> + movq %r12, 16(%rsp)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
> + movl %edx, %r12d
> + movq %r13, 8(%rsp)
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
> + movl %eax, %r13d
> + movq %r14, (%rsp)
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $4, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + vmovupd 64(%rsp), %ymm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r12 r13 r14 r15 ymm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movsd 32(%rsp,%r14,8), %xmm0
> + call atanh@PLT
> + # LOE rbx r14 r15 r12d r13d xmm0
> +
> + movsd %xmm0, 64(%rsp,%r14,8)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx r15 r12d r13d
> +END(_ZGVdN4v_atanh_avx2)
> +
> + .section .rodata, "a"
> + .align 32
> +
> +#ifdef __svml_datanh_data_internal_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2];
> + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2];
> + __declspec(align(32)) VUINT32 poly_coeff[4][4][2];
> + __declspec(align(32)) VUINT32 ExpMask[4][2];
> + __declspec(align(32)) VUINT32 Two10[4][2];
> + __declspec(align(32)) VUINT32 MinLog1p[4][2];
> + __declspec(align(32)) VUINT32 MaxLog1p[4][2];
> + __declspec(align(32)) VUINT32 One[4][2];
> + __declspec(align(32)) VUINT32 SgnMask[4][2];
> + __declspec(align(32)) VUINT32 XThreshold[4][2];
> + __declspec(align(32)) VUINT32 XhMask[4][2];
> + __declspec(align(32)) VUINT32 Threshold[4][2];
> + __declspec(align(32)) VUINT32 Bias[4][2];
> + __declspec(align(32)) VUINT32 Bias1[4][2];
> + __declspec(align(32)) VUINT32 ExpMask0[4][2];
> + __declspec(align(32)) VUINT32 ExpMask2[4][2];
> + __declspec(align(32)) VUINT32 L2[4][2];
> + __declspec(align(32)) VUINT32 dHalf[4][2];
> + __declspec(align(32)) VUINT32 dSign[4][2];
> + __declspec(align(32)) VUINT32 dTopMask12[4][2];
> + __declspec(align(32)) VUINT32 dTopMask41[4][2];
> + __declspec(align(32)) VUINT32 TinyRange[4][2];
> +} __svml_datanh_data_internal;
> +#endif
> +__svml_datanh_data_internal:
> + /* Log_HA_table */
> + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100
> + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a
> + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff
> + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a
> + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb
> + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e
> + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b
> + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af
> + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e
> + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4
> + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597
> + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16
> + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6
> + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362
> + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557
> + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b
> + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed
> + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed
> + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f
> + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce
> + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7
> + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1
> + .quad 0xc086238206e94218, 0xbe1ceee898588610
> + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea
> + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6
> + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6
> + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165
> + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1
> + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b
> + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670
> + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c
> + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c
> + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84
> + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7
> + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b
> + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf
> + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62
> + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b
> + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98
> + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87
> + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff
> + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798
> + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e
> + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde
> + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b
> + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c
> + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98
> + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f
> + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358
> + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380
> + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4
> + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b
> + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2
> + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4
> + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400
> + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7
> + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a
> + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d
> + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b
> + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575
> + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951
> + .quad 0xc086241263e87f50, 0xbe1cf16e74768529
> + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb
> + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa
> + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11
> + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805
> + .quad 0xc08624242f008380, 0xbe1ceea988c5a417
> + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5
> + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38
> + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411
> + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739
> + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42
> + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d
> + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e
> + .quad 0xc086244055d2c968, 0xbe1cef345284c119
> + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219
> + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114
> + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189
> + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f
> + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f
> + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5
> + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31
> + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d
> + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb
> + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7
> + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f
> + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663
> + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2
> + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc
> + .quad 0xc086247419475160, 0xbe1cf03dd9922331
> + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129
> + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6
> + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100
> + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a
> + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7
> + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8
> + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002
> + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4
> + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c
> + .quad 0xc0862495e5179270, 0xbe1cee757f20c326
> + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4
> + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97
> + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb
> + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e
> + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b
> + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80
> + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71
> + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9
> + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139
> + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f
> + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7
> + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d
> + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75
> + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466
> + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55
> + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe
> + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f
> + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968
> + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78
> + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75
> + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2
> + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d
> + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed
> + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f
> + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65
> + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078
> + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a
> + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a
> + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2
> + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc
> + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501
> + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7
> + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c
> + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c
> + .quad 0xc0862507f9448db0, 0xbe1cf082da464994
> + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf
> + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531
> + .quad 0xc08625117667dd78, 0xbe1cf1106599c962
> + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f
> + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6
> + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4
> + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092
> + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd
> + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7
> + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25
> + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d
> + .quad 0xc086252dab033898, 0xbe1cf220bba8861f
> + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2
> + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae
> + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8
> + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171
> + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408
> + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b
> + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268
> + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5
> + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c
> + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc
> + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157
> + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997
> + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff
> + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f
> + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9
> + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d
> + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc
> + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9
> + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5
> + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b
> + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996
> + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945
> + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995
> + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c
> + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947
> + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22
> + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923
> + .quad 0xc08625830381da08, 0xbe1ceef1391a0372
> + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13
> + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83
> + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9
> + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0
> + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81
> + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766
> + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b
> + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2
> + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec
> + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e
> + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7
> + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780
> + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11
> + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219
> + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160
> + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495
> + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5
> + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5
> + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283
> + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889
> + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124
> + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86
> + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092
> + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb
> + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12
> + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7
> + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e
> + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701
> + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812
> + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e
> + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4
> + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12
> + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21
> + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2
> + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece
> + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12
> + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad
> + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3
> + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9
> + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1
> + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9
> + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2
> + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51
> + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e
> + .quad 0xc08626052294df58, 0xbe1cf1b745c57716
> + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23
> + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2
> + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef
> + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5
> + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6
> + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a
> + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545
> + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011
> + .quad 0xc086261e32267e98, 0xbe1cf19917010e96
> + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985
> + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3
> + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c
> + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50
> + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68
> + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9
> + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b
> + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238
> + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e
> + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d
> + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba
> + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279
> + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085
> + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2
> + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1
> + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f
> + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a
> + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39
> + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3
> + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2
> + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa
> + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366
> + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b
> + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0
> + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365
> + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544
> + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9
> + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9
> + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2
> + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c
> + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6
> + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d
> + .quad 0xc08626778c3d4798, 0xbe1cefe260819380
> + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3
> + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa
> + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1
> + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52
> + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd
> + .quad 0xc086268762086350, 0xbe1cefaee1edfa35
> + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936
> + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed
> + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49
> + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e
> + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc
> + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840
> + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be
> + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c
> + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06
> + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e
> + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3
> + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68
> + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5
> + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986
> + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d
> + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26
> + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06
> + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652
> + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f
> + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c
> + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73
> + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7
> + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe
> + .quad 0xc08626c586da9388, 0xbe1cef7de2452430
> + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae
> + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d
> + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3
> + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d
> + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e
> + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64
> + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2
> + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d
> + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab
> + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153
> + .quad 0xc08626e164224880, 0xbe1ceeb431709788
> + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5
> + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b
> + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93
> + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8
> + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2
> + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6
> + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef
> + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339
> + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1
> + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28
> + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f
> + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3
> + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6
> + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6
> + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3
> + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c
> + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445
> + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c
> + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f
> + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802
> + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd
> + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5
> + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570
> + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85
> + .quad 0xc086271f58064068, 0xbe1cef092a785e3f
> + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30
> + .quad 0xc086272438546be8, 0xbe1cf210907ded8b
> + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99
> + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc
> + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5
> + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899
> + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99
> + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe
> + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d
> + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8
> + .quad 0xc086273a05367688, 0xbe1cf18656c50806
> + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a
> + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911
> + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c
> + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80
> + .quad 0xc086274608397868, 0xbe1cf25a328c28e2
> + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8
> + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a
> + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228
> + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c
> + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44
> + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2
> + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4
> + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a
> + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9
> + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627
> + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e
> + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee
> + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad
> + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5
> + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f
> + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312
> + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85
> + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011
> + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7
> + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da
> + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554
> + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377
> + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd
> + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0
> + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766
> + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7
> + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec
> + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc
> + .quad 0xc086278a58297918, 0xbe1cf053073872bf
> + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947
> + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234
> + .quad 0xc086279148685aa0, 0xbe1cf162204794a8
> + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac
> + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3
> + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388
> + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5
> + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f
> + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a
> + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f
> + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f
> + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26
> + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a
> + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81
> + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d
> + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893
> + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0
> + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8
> + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00
> + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2
> + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4
> + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7
> + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3
> + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d
> + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e
> + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93
> + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a
> + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9
> + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f
> + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1
> + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4
> + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb
> + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b
> + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b
> + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96
> + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477
> + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2
> + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c
> + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875
> + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522
> + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57
> + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e
> + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548
> + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305
> + .quad 0xc08627f007f0a408, 0xbe1cf18134625550
> + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11
> + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc
> + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8
> + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe
> + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25
> + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9
> + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c
> + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b
> + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b
> + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c
> + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a
> + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4
> + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256
> + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea
> + .quad 0xc0862810d5af5880, 0xbe1cee622478393d
> + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f
> + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536
> + .quad 0xc086281755366778, 0xbe1cef2edae5837d
> + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9
> + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8
> + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83
> + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4
> + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9
> + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2
> + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d
> + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1
> + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b
> + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02
> + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9
> + .quad 0xc08628311f099420, 0xbe1cef247a9ec596
> + .quad 0xc086283341749490, 0xbe1cef74bbcc488a
> + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e
> + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810
> + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8
> + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065
> + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e
> + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234
> + .quad 0xc08628422284b168, 0xbe1cf0abf7638127
> + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058
> + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c
> + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1
> + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43
> + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09
> + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60
> + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393
> + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7
> + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da
> + .quad 0xc08628573479b220, 0xbe1ceec34cf49523
> + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb
> + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b
> + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d
> + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5
> + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792
> + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8
> + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc
> + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7
> + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98
> + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7
> + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d
> + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248
> + .quad 0xc086287246aab180, 0xbe1cefa7bc194186
> + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9
> + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07
> + .quad 0xc086287879041490, 0xbe1cf034803c8a48
> + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f
> + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7
> + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943
> + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0
> + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da
> + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc
> + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34
> + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486
> + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3
> + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0
> + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d
> + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f
> + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed
> + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d
> + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646
> + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9
> + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f
> + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35
> + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a
> + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464
> + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0
> + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c
> + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d
> + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2
> + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9
> + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979
> + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c
> + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32
> + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2
> + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303
> + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880
> + /*== Log_LA_table ==*/
> + .align 32
> + .quad 0x8000000000000000
> + .quad 0xbf5ff802a9ab10e6
> + .quad 0xbf6ff00aa2b10bc0
> + .quad 0xbf77ee11ebd82e94
> + .quad 0xbf7fe02a6b106789
> + .quad 0xbf83e7295d25a7d9
> + .quad 0xbf87dc475f810a77
> + .quad 0xbf8bcf712c74384c
> + .quad 0xbf8fc0a8b0fc03e4
> + .quad 0xbf91d7f7eb9eebe7
> + .quad 0xbf93cea44346a575
> + .quad 0xbf95c45a51b8d389
> + .quad 0xbf97b91b07d5b11b
> + .quad 0xbf99ace7551cc514
> + .quad 0xbf9b9fc027af9198
> + .quad 0xbf9d91a66c543cc4
> + .quad 0xbf9f829b0e783300
> + .quad 0xbfa0b94f7c196176
> + .quad 0xbfa1b0d98923d980
> + .quad 0xbfa2a7ec2214e873
> + .quad 0xbfa39e87b9febd60
> + .quad 0xbfa494acc34d911c
> + .quad 0xbfa58a5bafc8e4d5
> + .quad 0xbfa67f94f094bd98
> + .quad 0xbfa77458f632dcfc
> + .quad 0xbfa868a83083f6cf
> + .quad 0xbfa95c830ec8e3eb
> + .quad 0xbfaa4fe9ffa3d235
> + .quad 0xbfab42dd711971bf
> + .quad 0xbfac355dd0921f2d
> + .quad 0xbfad276b8adb0b52
> + .quad 0xbfae19070c276016
> + .quad 0xbfaf0a30c01162a6
> + .quad 0xbfaffae9119b9303
> + .quad 0xbfb075983598e471
> + .quad 0xbfb0ed839b5526fe
> + .quad 0xbfb16536eea37ae1
> + .quad 0xbfb1dcb263db1944
> + .quad 0xbfb253f62f0a1417
> + .quad 0xbfb2cb0283f5de1f
> + .quad 0xbfb341d7961bd1d1
> + .quad 0xbfb3b87598b1b6ee
> + .quad 0xbfb42edcbea646f0
> + .quad 0xbfb4a50d3aa1b040
> + .quad 0xbfb51b073f06183f
> + .quad 0xbfb590cafdf01c28
> + .quad 0xbfb60658a93750c4
> + .quad 0xbfb67bb0726ec0fc
> + .quad 0xbfb6f0d28ae56b4c
> + .quad 0xbfb765bf23a6be13
> + .quad 0xbfb7da766d7b12cd
> + .quad 0xbfb84ef898e8282a
> + .quad 0xbfb8c345d6319b21
> + .quad 0xbfb9375e55595ede
> + .quad 0xbfb9ab42462033ad
> + .quad 0xbfba1ef1d8061cd4
> + .quad 0xbfba926d3a4ad563
> + .quad 0xbfbb05b49bee43fe
> + .quad 0xbfbb78c82bb0eda1
> + .quad 0xbfbbeba818146765
> + .quad 0xbfbc5e548f5bc743
> + .quad 0xbfbcd0cdbf8c13e1
> + .quad 0xbfbd4313d66cb35d
> + .quad 0xbfbdb5270187d927
> + .quad 0xbfbe27076e2af2e6
> + .quad 0xbfbe98b549671467
> + .quad 0xbfbf0a30c01162a6
> + .quad 0xbfbf7b79fec37ddf
> + .quad 0xbfbfec9131dbeabb
> + .quad 0xbfc02ebb42bf3d4b
> + .quad 0xbfc0671512ca596e
> + .quad 0xbfc09f561ee719c3
> + .quad 0xbfc0d77e7cd08e59
> + .quad 0xbfc10f8e422539b1
> + .quad 0xbfc14785846742ac
> + .quad 0xbfc17f6458fca611
> + .quad 0xbfc1b72ad52f67a0
> + .quad 0xbfc1eed90e2dc2c3
> + .quad 0xbfc2266f190a5acb
> + .quad 0xbfc25ded0abc6ad2
> + .quad 0xbfc29552f81ff523
> + .quad 0xbfc2cca0f5f5f251
> + .quad 0xbfc303d718e47fd3
> + .quad 0xbfc33af575770e4f
> + .quad 0xbfc371fc201e8f74
> + .quad 0xbfc3a8eb2d31a376
> + .quad 0xbfc3dfc2b0ecc62a
> + .quad 0xbfc41682bf727bc0
> + .quad 0xbfc44d2b6ccb7d1e
> + .quad 0xbfc483bccce6e3dd
> + .quad 0xbfc4ba36f39a55e5
> + .quad 0xbfc4f099f4a230b2
> + .quad 0xbfc526e5e3a1b438
> + .quad 0xbfc55d1ad4232d6f
> + .quad 0xbfc59338d9982086
> + .quad 0xbfc5c940075972b9
> + .quad 0xbfc5ff3070a793d4
> + .quad 0xbfc6350a28aaa758
> + .quad 0xbfc66acd4272ad51
> + .quad 0xbfc6a079d0f7aad2
> + .quad 0xbfc6d60fe719d21d
> + .quad 0xbfc70b8f97a1aa75
> + .quad 0xbfc740f8f54037a5
> + .quad 0xbfc7764c128f2127
> + .quad 0xbfc7ab890210d909
> + .quad 0xbfc7e0afd630c274
> + .quad 0xbfc815c0a14357eb
> + .quad 0xbfc84abb75865139
> + .quad 0xbfc87fa06520c911
> + .quad 0xbfc8b46f8223625b
> + .quad 0xbfc8e928de886d41
> + .quad 0xbfc91dcc8c340bde
> + .quad 0xbfc9525a9cf456b4
> + .quad 0xbfc986d3228180ca
> + .quad 0xbfc9bb362e7dfb83
> + .quad 0xbfc9ef83d2769a34
> + .quad 0xbfca23bc1fe2b563
> + .quad 0xbfca57df28244dcd
> + .quad 0xbfca8becfc882f19
> + .quad 0xbfcabfe5ae46124c
> + .quad 0xbfcaf3c94e80bff3
> + .quad 0xbfcb2797ee46320c
> + .quad 0xbfcb5b519e8fb5a4
> + .quad 0xbfcb8ef670420c3b
> + .quad 0xbfcbc286742d8cd6
> + .quad 0xbfcbf601bb0e44e2
> + .quad 0xbfcc2968558c18c1
> + .quad 0xbfcc5cba543ae425
> + .quad 0xbfcc8ff7c79a9a22
> + .quad 0xbfccc320c0176502
> + .quad 0xbfccf6354e09c5dc
> + .quad 0xbfcd293581b6b3e7
> + .quad 0xbfcd5c216b4fbb91
> + .quad 0xbfcd8ef91af31d5e
> + .quad 0xbfcdc1bca0abec7d
> + .quad 0xbfcdf46c0c722d2f
> + .quad 0xbfce27076e2af2e6
> + .quad 0xbfce598ed5a87e2f
> + .quad 0xbfce8c0252aa5a60
> + .quad 0xbfcebe61f4dd7b0b
> + .quad 0xbfcef0adcbdc5936
> + .quad 0xbfcf22e5e72f105d
> + .quad 0xbfcf550a564b7b37
> + .quad 0xbfcf871b28955045
> + .quad 0xbfcfb9186d5e3e2b
> + .quad 0xbfcfeb0233e607cc
> + .quad 0xbfd00e6c45ad501d
> + .quad 0xbfd0274dc16c232f
> + .quad 0xbfd0402594b4d041
> + .quad 0xbfd058f3c703ebc6
> + .quad 0xbfd071b85fcd590d
> + .quad 0xbfd08a73667c57af
> + .quad 0xbfd0a324e27390e3
> + .quad 0xbfd0bbccdb0d24bd
> + .quad 0xbfd0d46b579ab74b
> + .quad 0xbfd0ed005f657da4
> + .quad 0xbfd1058bf9ae4ad5
> + .quad 0xbfd11e0e2dad9cb7
> + .quad 0xbfd136870293a8b0
> + .quad 0xbfd14ef67f88685a
> + .quad 0xbfd1675cababa60e
> + .quad 0xbfd17fb98e15095d
> + .quad 0xbfd1980d2dd4236f
> + .quad 0xbfd1b05791f07b49
> + .quad 0xbfd1c898c16999fb
> + .quad 0xbfd1e0d0c33716be
> + .quad 0xbfd1f8ff9e48a2f3
> + .quad 0xbfd211255986160c
> + .quad 0xbfd22941fbcf7966
> + .quad 0xbfd241558bfd1404
> + .quad 0xbfd2596010df763a
> + .quad 0xbfd27161913f853d
> + .quad 0xbfd2895a13de86a3
> + .quad 0xbfd2a1499f762bc9
> + .quad 0xbfd2b9303ab89d25
> + .quad 0xbfd2d10dec508583
> + .quad 0xbfd2e8e2bae11d31
> + .quad 0xbfd300aead06350c
> + .quad 0xbfd31871c9544185
> + .quad 0xbfd3302c16586588
> + .quad 0xbfd347dd9a987d55
> + .quad 0xbfd35f865c93293e
> + .quad 0xbfd3772662bfd85b
> + .quad 0xbfd38ebdb38ed321
> + .quad 0xbfd3a64c556945ea
> + .quad 0xbfd3bdd24eb14b6a
> + .quad 0xbfd3d54fa5c1f710
> + .quad 0xbfd3ecc460ef5f50
> + .quad 0xbfd404308686a7e4
> + .quad 0xbfd41b941cce0bee
> + .quad 0xbfd432ef2a04e814
> + .quad 0xbfd44a41b463c47c
> + .quad 0xbfd4618bc21c5ec2
> + .quad 0xbfd478cd5959b3d9
> + .quad 0xbfd49006804009d1
> + .quad 0xbfd4a7373cecf997
> + .quad 0xbfd4be5f957778a1
> + .quad 0xbfd4d57f8fefe27f
> + .quad 0xbfd4ec973260026a
> + .quad 0xbfd503a682cb1cb3
> + .quad 0xbfd51aad872df82d
> + .quad 0xbfd531ac457ee77e
> + .quad 0xbfd548a2c3add263
> + .quad 0xbfd55f9107a43ee2
> + .quad 0xbfd5767717455a6c
> + .quad 0xbfd58d54f86e02f2
> + .quad 0xbfd5a42ab0f4cfe2
> + .quad 0xbfd5baf846aa1b19
> + .quad 0xbfd5d1bdbf5809ca
> + .quad 0xbfd5e87b20c2954a
> + .quad 0xbfd5ff3070a793d4
> + .quad 0xbfd615ddb4bec13c
> + .quad 0xbfd62c82f2b9c795
> + .quad 0x3fd61965cdb02c1f
> + .quad 0x3fd602d08af091ec
> + .quad 0x3fd5ec433d5c35ae
> + .quad 0x3fd5d5bddf595f30
> + .quad 0x3fd5bf406b543db2
> + .quad 0x3fd5a8cadbbedfa1
> + .quad 0x3fd5925d2b112a59
> + .quad 0x3fd57bf753c8d1fb
> + .quad 0x3fd565995069514c
> + .quad 0x3fd54f431b7be1a9
> + .quad 0x3fd538f4af8f72fe
> + .quad 0x3fd522ae0738a3d8
> + .quad 0x3fd50c6f1d11b97c
> + .quad 0x3fd4f637ebba9810
> + .quad 0x3fd4e0086dd8baca
> + .quad 0x3fd4c9e09e172c3c
> + .quad 0x3fd4b3c077267e9a
> + .quad 0x3fd49da7f3bcc41f
> + .quad 0x3fd487970e958770
> + .quad 0x3fd4718dc271c41b
> + .quad 0x3fd45b8c0a17df13
> + .quad 0x3fd44591e0539f49
> + .quad 0x3fd42f9f3ff62642
> + .quad 0x3fd419b423d5e8c7
> + .quad 0x3fd403d086cea79c
> + .quad 0x3fd3edf463c1683e
> + .quad 0x3fd3d81fb5946dba
> + .quad 0x3fd3c25277333184
> + .quad 0x3fd3ac8ca38e5c5f
> + .quad 0x3fd396ce359bbf54
> + .quad 0x3fd3811728564cb2
> + .quad 0x3fd36b6776be1117
> + .quad 0x3fd355bf1bd82c8b
> + .quad 0x3fd3401e12aecba1
> + .quad 0x3fd32a84565120a8
> + .quad 0x3fd314f1e1d35ce4
> + .quad 0x3fd2ff66b04ea9d4
> + .quad 0x3fd2e9e2bce12286
> + .quad 0x3fd2d46602adccee
> + .quad 0x3fd2bef07cdc9354
> + .quad 0x3fd2a982269a3dbf
> + .quad 0x3fd2941afb186b7c
> + .quad 0x3fd27ebaf58d8c9d
> + .quad 0x3fd269621134db92
> + .quad 0x3fd25410494e56c7
> + .quad 0x3fd23ec5991eba49
> + .quad 0x3fd22981fbef797b
> + .quad 0x3fd214456d0eb8d4
> + .quad 0x3fd1ff0fe7cf47a7
> + .quad 0x3fd1e9e1678899f4
> + .quad 0x3fd1d4b9e796c245
> + .quad 0x3fd1bf99635a6b95
> + .quad 0x3fd1aa7fd638d33f
> + .quad 0x3fd1956d3b9bc2fa
> + .quad 0x3fd180618ef18adf
> + .quad 0x3fd16b5ccbacfb73
> + .quad 0x3fd1565eed455fc3
> + .quad 0x3fd14167ef367783
> + .quad 0x3fd12c77cd00713b
> + .quad 0x3fd1178e8227e47c
> + .quad 0x3fd102ac0a35cc1c
> + .quad 0x3fd0edd060b78081
> + .quad 0x3fd0d8fb813eb1ef
> + .quad 0x3fd0c42d676162e3
> + .quad 0x3fd0af660eb9e279
> + .quad 0x3fd09aa572e6c6d4
> + .quad 0x3fd085eb8f8ae797
> + .quad 0x3fd07138604d5862
> + .quad 0x3fd05c8be0d9635a
> + .quad 0x3fd047e60cde83b8
> + .quad 0x3fd03346e0106062
> + .quad 0x3fd01eae5626c691
> + .quad 0x3fd00a1c6adda473
> + .quad 0x3fcfeb2233ea07cd
> + .quad 0x3fcfc218be620a5e
> + .quad 0x3fcf991c6cb3b379
> + .quad 0x3fcf702d36777df0
> + .quad 0x3fcf474b134df229
> + .quad 0x3fcf1e75fadf9bde
> + .quad 0x3fcef5ade4dcffe6
> + .quad 0x3fceccf2c8fe920a
> + .quad 0x3fcea4449f04aaf5
> + .quad 0x3fce7ba35eb77e2a
> + .quad 0x3fce530effe71012
> + .quad 0x3fce2a877a6b2c12
> + .quad 0x3fce020cc6235ab5
> + .quad 0x3fcdd99edaf6d7e9
> + .quad 0x3fcdb13db0d48940
> + .quad 0x3fcd88e93fb2f450
> + .quad 0x3fcd60a17f903515
> + .quad 0x3fcd38666871f465
> + .quad 0x3fcd1037f2655e7b
> + .quad 0x3fcce816157f1988
> + .quad 0x3fccc000c9db3c52
> + .quad 0x3fcc97f8079d44ec
> + .quad 0x3fcc6ffbc6f00f71
> + .quad 0x3fcc480c0005ccd1
> + .quad 0x3fcc2028ab17f9b4
> + .quad 0x3fcbf851c067555f
> + .quad 0x3fcbd087383bd8ad
> + .quad 0x3fcba8c90ae4ad19
> + .quad 0x3fcb811730b823d2
> + .quad 0x3fcb5971a213acdb
> + .quad 0x3fcb31d8575bce3d
> + .quad 0x3fcb0a4b48fc1b46
> + .quad 0x3fcae2ca6f672bd4
> + .quad 0x3fcabb55c31693ad
> + .quad 0x3fca93ed3c8ad9e3
> + .quad 0x3fca6c90d44b704e
> + .quad 0x3fca454082e6ab05
> + .quad 0x3fca1dfc40f1b7f1
> + .quad 0x3fc9f6c407089664
> + .quad 0x3fc9cf97cdce0ec3
> + .quad 0x3fc9a8778debaa38
> + .quad 0x3fc981634011aa75
> + .quad 0x3fc95a5adcf7017f
> + .quad 0x3fc9335e5d594989
> + .quad 0x3fc90c6db9fcbcd9
> + .quad 0x3fc8e588ebac2dbf
> + .quad 0x3fc8beafeb38fe8c
> + .quad 0x3fc897e2b17b19a5
> + .quad 0x3fc871213750e994
> + .quad 0x3fc84a6b759f512f
> + .quad 0x3fc823c16551a3c2
> + .quad 0x3fc7fd22ff599d4f
> + .quad 0x3fc7d6903caf5ad0
> + .quad 0x3fc7b0091651528c
> + .quad 0x3fc7898d85444c73
> + .quad 0x3fc7631d82935a86
> + .quad 0x3fc73cb9074fd14d
> + .quad 0x3fc716600c914054
> + .quad 0x3fc6f0128b756abc
> + .quad 0x3fc6c9d07d203fc7
> + .quad 0x3fc6a399dabbd383
> + .quad 0x3fc67d6e9d785771
> + .quad 0x3fc6574ebe8c133a
> + .quad 0x3fc6313a37335d76
> + .quad 0x3fc60b3100b09476
> + .quad 0x3fc5e533144c1719
> + .quad 0x3fc5bf406b543db2
> + .quad 0x3fc59958ff1d52f1
> + .quad 0x3fc5737cc9018cdd
> + .quad 0x3fc54dabc26105d2
> + .quad 0x3fc527e5e4a1b58d
> + .quad 0x3fc5022b292f6a45
> + .quad 0x3fc4dc7b897bc1c8
> + .quad 0x3fc4b6d6fefe22a4
> + .quad 0x3fc4913d8333b561
> + .quad 0x3fc46baf0f9f5db7
> + .quad 0x3fc4462b9dc9b3dc
> + .quad 0x3fc420b32740fdd4
> + .quad 0x3fc3fb45a59928cc
> + .quad 0x3fc3d5e3126bc27f
> + .quad 0x3fc3b08b6757f2a9
> + .quad 0x3fc38b3e9e027479
> + .quad 0x3fc365fcb0159016
> + .quad 0x3fc340c59741142e
> + .quad 0x3fc31b994d3a4f85
> + .quad 0x3fc2f677cbbc0a96
> + .quad 0x3fc2d1610c86813a
> + .quad 0x3fc2ac55095f5c59
> + .quad 0x3fc28753bc11aba5
> + .quad 0x3fc2625d1e6ddf57
> + .quad 0x3fc23d712a49c202
> + .quad 0x3fc2188fd9807263
> + .quad 0x3fc1f3b925f25d41
> + .quad 0x3fc1ceed09853752
> + .quad 0x3fc1aa2b7e23f72a
> + .quad 0x3fc185747dbecf34
> + .quad 0x3fc160c8024b27b1
> + .quad 0x3fc13c2605c398c3
> + .quad 0x3fc1178e8227e47c
> + .quad 0x3fc0f301717cf0fb
> + .quad 0x3fc0ce7ecdccc28d
> + .quad 0x3fc0aa06912675d5
> + .quad 0x3fc08598b59e3a07
> + .quad 0x3fc06135354d4b18
> + .quad 0x3fc03cdc0a51ec0d
> + .quad 0x3fc0188d2ecf6140
> + .quad 0x3fbfe89139dbd566
> + .quad 0x3fbfa01c9db57ce2
> + .quad 0x3fbf57bc7d9005db
> + .quad 0x3fbf0f70cdd992e3
> + .quad 0x3fbec739830a1120
> + .quad 0x3fbe7f1691a32d3e
> + .quad 0x3fbe3707ee30487b
> + .quad 0x3fbdef0d8d466db9
> + .quad 0x3fbda727638446a2
> + .quad 0x3fbd5f55659210e2
> + .quad 0x3fbd179788219364
> + .quad 0x3fbccfedbfee13a8
> + .quad 0x3fbc885801bc4b23
> + .quad 0x3fbc40d6425a5cb1
> + .quad 0x3fbbf968769fca11
> + .quad 0x3fbbb20e936d6974
> + .quad 0x3fbb6ac88dad5b1c
> + .quad 0x3fbb23965a52ff00
> + .quad 0x3fbadc77ee5aea8c
> + .quad 0x3fba956d3ecade63
> + .quad 0x3fba4e7640b1bc38
> + .quad 0x3fba0792e9277cac
> + .quad 0x3fb9c0c32d4d2548
> + .quad 0x3fb97a07024cbe74
> + .quad 0x3fb9335e5d594989
> + .quad 0x3fb8ecc933aeb6e8
> + .quad 0x3fb8a6477a91dc29
> + .quad 0x3fb85fd927506a48
> + .quad 0x3fb8197e2f40e3f0
> + .quad 0x3fb7d33687c293c9
> + .quad 0x3fb78d02263d82d3
> + .quad 0x3fb746e100226ed9
> + .quad 0x3fb700d30aeac0e1
> + .quad 0x3fb6bad83c1883b6
> + .quad 0x3fb674f089365a7a
> + .quad 0x3fb62f1be7d77743
> + .quad 0x3fb5e95a4d9791cb
> + .quad 0x3fb5a3abb01ade25
> + .quad 0x3fb55e10050e0384
> + .quad 0x3fb518874226130a
> + .quad 0x3fb4d3115d207eac
> + .quad 0x3fb48dae4bc31018
> + .quad 0x3fb4485e03dbdfad
> + .quad 0x3fb403207b414b7f
> + .quad 0x3fb3bdf5a7d1ee64
> + .quad 0x3fb378dd7f749714
> + .quad 0x3fb333d7f8183f4b
> + .quad 0x3fb2eee507b40301
> + .quad 0x3fb2aa04a44717a5
> + .quad 0x3fb26536c3d8c369
> + .quad 0x3fb2207b5c78549e
> + .quad 0x3fb1dbd2643d190b
> + .quad 0x3fb1973bd1465567
> + .quad 0x3fb152b799bb3cc9
> + .quad 0x3fb10e45b3cae831
> + .quad 0x3fb0c9e615ac4e17
> + .quad 0x3fb08598b59e3a07
> + .quad 0x3fb0415d89e74444
> + .quad 0x3faffa6911ab9301
> + .quad 0x3faf723b517fc523
> + .quad 0x3faeea31c006b87c
> + .quad 0x3fae624c4a0b5e1b
> + .quad 0x3fadda8adc67ee4e
> + .quad 0x3fad52ed6405d86f
> + .quad 0x3faccb73cdddb2cc
> + .quad 0x3fac441e06f72a9e
> + .quad 0x3fabbcebfc68f420
> + .quad 0x3fab35dd9b58baad
> + .quad 0x3faaaef2d0fb10fc
> + .quad 0x3faa282b8a936171
> + .quad 0x3fa9a187b573de7c
> + .quad 0x3fa91b073efd7314
> + .quad 0x3fa894aa149fb343
> + .quad 0x3fa80e7023d8ccc4
> + .quad 0x3fa788595a3577ba
> + .quad 0x3fa70265a550e777
> + .quad 0x3fa67c94f2d4bb58
> + .quad 0x3fa5f6e73078efb8
> + .quad 0x3fa5715c4c03ceef
> + .quad 0x3fa4ebf43349e26f
> + .quad 0x3fa466aed42de3ea
> + .quad 0x3fa3e18c1ca0ae92
> + .quad 0x3fa35c8bfaa1306b
> + .quad 0x3fa2d7ae5c3c5bae
> + .quad 0x3fa252f32f8d183f
> + .quad 0x3fa1ce5a62bc353a
> + .quad 0x3fa149e3e4005a8d
> + .quad 0x3fa0c58fa19dfaaa
> + .quad 0x3fa0415d89e74444
> + .quad 0x3f9f7a9b16782856
> + .quad 0x3f9e72bf2813ce51
> + .quad 0x3f9d6b2725979802
> + .quad 0x3f9c63d2ec14aaf2
> + .quad 0x3f9b5cc258b718e6
> + .quad 0x3f9a55f548c5c43f
> + .quad 0x3f994f6b99a24475
> + .quad 0x3f98492528c8cabf
> + .quad 0x3f974321d3d006d3
> + .quad 0x3f963d6178690bd6
> + .quad 0x3f9537e3f45f3565
> + .quad 0x3f9432a925980cc1
> + .quad 0x3f932db0ea132e22
> + .quad 0x3f9228fb1fea2e28
> + .quad 0x3f912487a5507f70
> + .quad 0x3f90205658935847
> + .quad 0x3f8e38ce3033310c
> + .quad 0x3f8c317384c75f06
> + .quad 0x3f8a2a9c6c170462
> + .quad 0x3f882448a388a2aa
> + .quad 0x3f861e77e8b53fc6
> + .quad 0x3f841929f96832f0
> + .quad 0x3f82145e939ef1e9
> + .quad 0x3f8010157588de71
> + .quad 0x3f7c189cbb0e27fb
> + .quad 0x3f78121214586b54
> + .quad 0x3f740c8a747878e2
> + .quad 0x3f70080559588b35
> + .quad 0x3f680904828985c0
> + .quad 0x3f60040155d5889e
> + .quad 0x3f50020055655889
> + .quad 0x0000000000000000
> + /*== poly_coeff[4] ==*/
> + .align 32
> + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */
> + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */
> + .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */
> + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */
> + /*== ExpMask ==*/
> + .align 32
> + .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff
> + /*== Two10 ==*/
> + .align 32
> + .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000
> + /*== MinLog1p = -1+2^(-53) ==*/
> + .align 32
> + .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff
> + /*== MaxLog1p ==*/
> + .align 32
> + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000
> + /*== One ==*/
> + .align 32
> + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
> + /*== SgnMask ==*/
> + .align 32
> + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
> + /*== XThreshold ==*/
> + .align 32
> + .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000
> + /*== XhMask ==*/
> + .align 32
> + .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00
> + /*== Threshold ==*/
> + .align 32
> + .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000
> + /*== Bias ==*/
> + .align 32
> + .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000
> + /*== Bias1 ==*/
> + .align 32
> + .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000
> + /*== ExpMask ==*/
> + .align 32
> + .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000
> + /*== ExpMask2 ==*/
> + .align 32
> + .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000
> + /*== L2L ==*/
> + .align 32
> + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF
> + /*== dHalf ==*/
> + .align 32
> + .quad 0x3FE0000000000000, 0x3FE0000000000000, 0x3FE0000000000000, 0x3FE0000000000000
> + /*== dSign ==*/
> + .align 32
> + .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000
> + /*== dTopMask12 ==*/
> + .align 32
> + .quad 0xFFFFFE0000000000, 0xFFFFFE0000000000, 0xFFFFFE0000000000, 0xFFFFFE0000000000
> + /*== dTopMask41 ==*/
> + .align 32
> + .quad 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000
> + /*== dTinyRange ==*/
> + .align 32
> + .quad 0x0350000000000000, 0x0350000000000000, 0x0350000000000000, 0x0350000000000000
> + .align 32
> + .type __svml_datanh_data_internal,@object
> + .size __svml_datanh_data_internal,.-__svml_datanh_data_internal
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core-avx2.S
> new file mode 100644
> index 0000000000..675ebd2fd6
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core-avx2.S
> @@ -0,0 +1,20 @@
> +/* AVX2 version of vectorized atanh, vector length is 8.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVeN8v_atanh _ZGVeN8v_atanh_avx2_wrapper
> +#include "../svml_d_atanh8_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core.c
> new file mode 100644
> index 0000000000..4da8e20fad
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core.c
> @@ -0,0 +1,27 @@
> +/* Multiple versions of vectorized atanh, vector length is 8.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVeN8v_atanh
> +#include "ifunc-mathvec-avx512-skx.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVeN8v_atanh, __GI__ZGVeN8v_atanh, __redirect__ZGVeN8v_atanh)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core_avx512.S
> new file mode 100644
> index 0000000000..ef600c073a
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atanh8_core_avx512.S
> @@ -0,0 +1,401 @@
> +/* Function atanh vectorized with AVX-512.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + * using small lookup table that map to AVX-512 permute instructions
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_datanh_data_internal_avx512
> + */
> +#define Log_tbl_H 0
> +#define Log_tbl_L 128
> +#define One 256
> +#define AbsMask 320
> +#define AddB5 384
> +#define RcpBitMask 448
> +#define poly_coeff8 512
> +#define poly_coeff7 576
> +#define poly_coeff6 640
> +#define poly_coeff5 704
> +#define poly_coeff4 768
> +#define poly_coeff3 832
> +#define poly_coeff2 896
> +#define poly_coeff1 960
> +#define poly_coeff0 1024
> +#define Half 1088
> +#define L2H 1152
> +#define L2L 1216
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.evex512,"ax",@progbits
> +ENTRY(_ZGVeN8v_atanh_skx)
> + pushq %rbp
> + cfi_def_cfa_offset(16)
> + movq %rsp, %rbp
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> + andq $-64, %rsp
> + subq $192, %rsp
> + vmovups One+__svml_datanh_data_internal_avx512(%rip), %zmm15
> +
> +/* round reciprocals to 1+4b mantissas */
> + vmovups AddB5+__svml_datanh_data_internal_avx512(%rip), %zmm6
> + vmovups RcpBitMask+__svml_datanh_data_internal_avx512(%rip), %zmm9
> + vmovaps %zmm0, %zmm2
> + vandpd AbsMask+__svml_datanh_data_internal_avx512(%rip), %zmm2, %zmm13
> +
> +/* 1+y */
> + vaddpd {rn-sae}, %zmm15, %zmm13, %zmm0
> +
> +/* 1-y */
> + vsubpd {rn-sae}, %zmm13, %zmm15, %zmm4
> + vxorpd %zmm13, %zmm2, %zmm1
> +
> +/* Yp_high */
> + vsubpd {rn-sae}, %zmm15, %zmm0, %zmm7
> +
> +/* -Ym_high */
> + vsubpd {rn-sae}, %zmm15, %zmm4, %zmm12
> +
> +/* RcpP ~ 1/Yp */
> + vrcp14pd %zmm0, %zmm3
> +
> +/* RcpM ~ 1/Ym */
> + vrcp14pd %zmm4, %zmm5
> +
> +/* input outside (-1, 1) ? */
> + vcmppd $21, {sae}, %zmm15, %zmm13, %k0
> + vpaddq %zmm6, %zmm3, %zmm11
> + vpaddq %zmm6, %zmm5, %zmm10
> +
> +/* Yp_low */
> + vsubpd {rn-sae}, %zmm7, %zmm13, %zmm8
> + vandpd %zmm9, %zmm11, %zmm14
> + vandpd %zmm9, %zmm10, %zmm3
> +
> +/* Ym_low */
> + vaddpd {rn-sae}, %zmm12, %zmm13, %zmm12
> +
> +/* Reduced argument: Rp = (RcpP*Yp - 1)+RcpP*Yp_low */
> + vfmsub213pd {rn-sae}, %zmm15, %zmm14, %zmm0
> +
> +/* Reduced argument: Rm = (RcpM*Ym - 1)+RcpM*Ym_low */
> + vfmsub231pd {rn-sae}, %zmm3, %zmm4, %zmm15
> +
> +/* exponents */
> + vgetexppd {sae}, %zmm14, %zmm5
> + vgetexppd {sae}, %zmm3, %zmm4
> +
> +/* Table lookups */
> + vmovups __svml_datanh_data_internal_avx512(%rip), %zmm9
> + vmovups Log_tbl_H+64+__svml_datanh_data_internal_avx512(%rip), %zmm13
> + vmovups Log_tbl_L+__svml_datanh_data_internal_avx512(%rip), %zmm7
> + vfmadd231pd {rn-sae}, %zmm14, %zmm8, %zmm0
> + vfnmadd231pd {rn-sae}, %zmm3, %zmm12, %zmm15
> +
> +/* Prepare table index */
> + vpsrlq $48, %zmm14, %zmm11
> + vpsrlq $48, %zmm3, %zmm8
> + vmovups Log_tbl_L+64+__svml_datanh_data_internal_avx512(%rip), %zmm14
> +
> +/* polynomials */
> + vmovups poly_coeff8+__svml_datanh_data_internal_avx512(%rip), %zmm3
> +
> +/* Km-Kp */
> + vsubpd {rn-sae}, %zmm5, %zmm4, %zmm5
> + vmovups poly_coeff7+__svml_datanh_data_internal_avx512(%rip), %zmm4
> + kmovw %k0, %edx
> + vmovaps %zmm11, %zmm10
> + vmovaps %zmm4, %zmm6
> + vpermi2pd %zmm13, %zmm9, %zmm10
> + vpermi2pd %zmm14, %zmm7, %zmm11
> + vpermt2pd %zmm13, %zmm8, %zmm9
> + vpermt2pd %zmm14, %zmm8, %zmm7
> + vmovups poly_coeff6+__svml_datanh_data_internal_avx512(%rip), %zmm8
> + vfmadd231pd {rn-sae}, %zmm0, %zmm3, %zmm6
> + vfmadd231pd {rn-sae}, %zmm15, %zmm3, %zmm4
> + vmovups poly_coeff3+__svml_datanh_data_internal_avx512(%rip), %zmm13
> + vmovups poly_coeff2+__svml_datanh_data_internal_avx512(%rip), %zmm14
> + vfmadd213pd {rn-sae}, %zmm8, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm8, %zmm15, %zmm4
> + vmovups poly_coeff0+__svml_datanh_data_internal_avx512(%rip), %zmm8
> + vsubpd {rn-sae}, %zmm11, %zmm7, %zmm12
> +
> +/* table values */
> + vsubpd {rn-sae}, %zmm10, %zmm9, %zmm3
> + vmovups poly_coeff5+__svml_datanh_data_internal_avx512(%rip), %zmm7
> + vmovups poly_coeff4+__svml_datanh_data_internal_avx512(%rip), %zmm9
> +
> +/* K*L2H + Th */
> + vmovups L2H+__svml_datanh_data_internal_avx512(%rip), %zmm10
> +
> +/* K*L2L + Tl */
> + vmovups L2L+__svml_datanh_data_internal_avx512(%rip), %zmm11
> + vfmadd213pd {rn-sae}, %zmm7, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm7, %zmm15, %zmm4
> + vmovups poly_coeff1+__svml_datanh_data_internal_avx512(%rip), %zmm7
> + vfmadd231pd {rn-sae}, %zmm5, %zmm10, %zmm3
> + vfmadd213pd {rn-sae}, %zmm12, %zmm11, %zmm5
> + vfmadd213pd {rn-sae}, %zmm9, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm9, %zmm15, %zmm4
> + vfmadd213pd {rn-sae}, %zmm13, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm13, %zmm15, %zmm4
> + vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm14, %zmm15, %zmm4
> + vfmadd213pd {rn-sae}, %zmm7, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm7, %zmm15, %zmm4
> + vfmadd213pd {rn-sae}, %zmm8, %zmm0, %zmm6
> + vfmadd213pd {rn-sae}, %zmm8, %zmm15, %zmm4
> +
> +/* (K*L2L + Tl) + Rp*PolyP */
> + vfmadd213pd {rn-sae}, %zmm5, %zmm0, %zmm6
> + vorpd Half+__svml_datanh_data_internal_avx512(%rip), %zmm1, %zmm0
> +
> +/* (K*L2L + Tl) + Rp*PolyP -Rm*PolyM */
> + vfnmadd213pd {rn-sae}, %zmm6, %zmm15, %zmm4
> + vaddpd {rn-sae}, %zmm4, %zmm3, %zmm1
> + vmulpd {rn-sae}, %zmm0, %zmm1, %zmm0
> + testl %edx, %edx
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm2
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + movq %rbp, %rsp
> + popq %rbp
> + cfi_def_cfa(7, 8)
> + cfi_restore(6)
> + ret
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + vmovups %zmm2, 64(%rsp)
> + vmovups %zmm0, 128(%rsp)
> + # LOE rbx r12 r13 r14 r15 edx zmm0
> +
> + xorl %eax, %eax
> + # LOE rbx r12 r13 r14 r15 eax edx
> +
> + vzeroupper
> + movq %r12, 16(%rsp)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
> + movl %eax, %r12d
> + movq %r13, 8(%rsp)
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
> + movl %edx, %r13d
> + movq %r14, (%rsp)
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $8, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + vmovups 128(%rsp), %zmm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r12 r13 r14 r15 zmm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movsd 64(%rsp,%r14,8), %xmm0
> + call atanh@PLT
> + # LOE rbx r14 r15 r12d r13d xmm0
> +
> + movsd %xmm0, 128(%rsp,%r14,8)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx r15 r12d r13d
> +END(_ZGVeN8v_atanh_skx)
> +
> + .section .rodata, "a"
> + .align 64
> +
> +#ifdef __svml_datanh_data_internal_avx512_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(64)) VUINT32 Log_tbl_H[16][2];
> + __declspec(align(64)) VUINT32 Log_tbl_L[16][2];
> + __declspec(align(64)) VUINT32 One[8][2];
> + __declspec(align(64)) VUINT32 AbsMask[8][2];
> + __declspec(align(64)) VUINT32 AddB5[8][2];
> + __declspec(align(64)) VUINT32 RcpBitMask[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff8[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff7[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff6[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff5[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff4[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff3[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff2[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff1[8][2];
> + __declspec(align(64)) VUINT32 poly_coeff0[8][2];
> + __declspec(align(64)) VUINT32 Half[8][2];
> + __declspec(align(64)) VUINT32 L2H[8][2];
> + __declspec(align(64)) VUINT32 L2L[8][2];
> + } __svml_datanh_data_internal_avx512;
> +#endif
> +__svml_datanh_data_internal_avx512:
> + /*== Log_tbl_H ==*/
> + .quad 0x0000000000000000
> + .quad 0x3faf0a30c0100000
> + .quad 0x3fbe27076e2a0000
> + .quad 0x3fc5ff3070a80000
> + .quad 0x3fcc8ff7c79b0000
> + .quad 0x3fd1675cabab8000
> + .quad 0x3fd4618bc21c8000
> + .quad 0x3fd739d7f6bc0000
> + .quad 0x3fd9f323ecbf8000
> + .quad 0x3fdc8ff7c79a8000
> + .quad 0x3fdf128f5faf0000
> + .quad 0x3fe0be72e4254000
> + .quad 0x3fe1e85f5e704000
> + .quad 0x3fe307d7334f0000
> + .quad 0x3fe41d8fe8468000
> + .quad 0x3fe52a2d265bc000
> + /*== Log_tbl_L ==*/
> + .align 64
> + .quad 0x0000000000000000
> + .quad 0x3d662a6617cc9717
> + .quad 0x3d6e5cbd3d50fffc
> + .quad 0xbd6b0b0de3077d7e
> + .quad 0xbd697794f689f843
> + .quad 0x3d630701ce63eab9
> + .quad 0xbd609ec17a426426
> + .quad 0xbd67fcb18ed9d603
> + .quad 0x3d584bf2b68d766f
> + .quad 0x3d5a21ac25d81ef3
> + .quad 0x3d3bb2cd720ec44c
> + .quad 0xbd657d49676844cc
> + .quad 0x3d1a07bd8b34be7c
> + .quad 0x3d60be1fb590a1f5
> + .quad 0xbd5aa33736867a17
> + .quad 0x3d46abb9df22bc57
> + /*== One ==*/
> + .align 64
> + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
> + /*== AbsMask ==*/
> + .align 64
> + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
> + /*== AddB5 ==*/
> + .align 64
> + .quad 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000
> + /*== RcpBitMask ==*/
> + .align 64
> + .quad 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000
> + /*== poly_coeff8 ==*/
> + .align 64
> + .quad 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142
> + /*== poly_coeff7 ==*/
> + .align 64
> + .quad 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70
> + /*== poly_coeff6 ==*/
> + .align 64
> + .quad 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8
> + /*== poly_coeff5 ==*/
> + .align 64
> + .quad 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5
> + /*== poly_coeff4 ==*/
> + .align 64
> + .quad 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a
> + /*== poly_coeff3 ==*/
> + .align 64
> + .quad 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01
> + /*== poly_coeff2 ==*/
> + .align 64
> + .quad 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462
> + /*== poly_coeff1 ==*/
> + .align 64
> + .quad 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5
> + /*== poly_coeff0 ==*/
> + .align 64
> + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
> + /*== Half ==*/
> + .align 64
> + .quad 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000
> + /*== L2H = log(2)_high ==*/
> + .align 64
> + .quad 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000
> + /*== L2L = log(2)_low ==*/
> + .align 64
> + .quad 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000
> + .align 64
> + .type __svml_datanh_data_internal_avx512,@object
> + .size __svml_datanh_data_internal_avx512,.-__svml_datanh_data_internal_avx512
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core-avx2.S
> new file mode 100644
> index 0000000000..1af3662f65
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core-avx2.S
> @@ -0,0 +1,20 @@
> +/* AVX2 version of vectorized atanhf.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVeN16v_atanhf _ZGVeN16v_atanhf_avx2_wrapper
> +#include "../svml_s_atanhf16_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core.c
> new file mode 100644
> index 0000000000..4b1190f0eb
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core.c
> @@ -0,0 +1,28 @@
> +/* Multiple versions of vectorized atanhf, vector length is 16.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVeN16v_atanhf
> +#include "ifunc-mathvec-avx512-skx.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVeN16v_atanhf, __GI__ZGVeN16v_atanhf,
> + __redirect__ZGVeN16v_atanhf)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core_avx512.S
> new file mode 100644
> index 0000000000..6c5f6a54fa
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf16_core_avx512.S
> @@ -0,0 +1,393 @@
> +/* Function atanhf vectorized with AVX-512.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + * using small lookup table that map to AVX-512 permute instructions
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_satanh_data_internal_avx512
> + */
> +#define Log_tbl_H 0
> +#define Log_tbl_L 128
> +#define One 256
> +#define AbsMask 320
> +#define AddB5 384
> +#define RcpBitMask 448
> +#define poly_coeff3 512
> +#define poly_coeff2 576
> +#define poly_coeff1 640
> +#define poly_coeff0 704
> +#define Half 768
> +#define L2H 832
> +#define L2L 896
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.exex512,"ax",@progbits
> +ENTRY(_ZGVeN16v_atanhf_skx)
> + pushq %rbp
> + cfi_def_cfa_offset(16)
> + movq %rsp, %rbp
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> + andq $-64, %rsp
> + subq $192, %rsp
> + vmovups One+__svml_satanh_data_internal_avx512(%rip), %zmm4
> +
> +/* round reciprocals to 1+5b mantissas */
> + vmovups AddB5+__svml_satanh_data_internal_avx512(%rip), %zmm14
> + vmovups RcpBitMask+__svml_satanh_data_internal_avx512(%rip), %zmm1
> + vmovaps %zmm0, %zmm11
> + vandps AbsMask+__svml_satanh_data_internal_avx512(%rip), %zmm11, %zmm6
> +
> +/* 1+y */
> + vaddps {rn-sae}, %zmm4, %zmm6, %zmm9
> +
> +/* 1-y */
> + vsubps {rn-sae}, %zmm6, %zmm4, %zmm8
> + vxorps %zmm6, %zmm11, %zmm10
> +
> +/* Yp_high */
> + vsubps {rn-sae}, %zmm4, %zmm9, %zmm2
> +
> +/* -Ym_high */
> + vsubps {rn-sae}, %zmm4, %zmm8, %zmm5
> +
> +/* RcpP ~ 1/Yp */
> + vrcp14ps %zmm9, %zmm12
> +
> +/* RcpM ~ 1/Ym */
> + vrcp14ps %zmm8, %zmm13
> +
> +/* input outside (-1, 1) ? */
> + vcmpps $21, {sae}, %zmm4, %zmm6, %k0
> + vpaddd %zmm14, %zmm12, %zmm15
> + vpaddd %zmm14, %zmm13, %zmm0
> +
> +/* Yp_low */
> + vsubps {rn-sae}, %zmm2, %zmm6, %zmm3
> + vandps %zmm1, %zmm15, %zmm7
> + vandps %zmm1, %zmm0, %zmm12
> +
> +/* Ym_low */
> + vaddps {rn-sae}, %zmm5, %zmm6, %zmm5
> +
> +/* Reduced argument: Rp = (RcpP*Yp - 1)+RcpP*Yp_low */
> + vfmsub213ps {rn-sae}, %zmm4, %zmm7, %zmm9
> +
> +/* Reduced argument: Rm = (RcpM*Ym - 1)+RcpM*Ym_low */
> + vfmsub231ps {rn-sae}, %zmm12, %zmm8, %zmm4
> + vmovups Log_tbl_L+__svml_satanh_data_internal_avx512(%rip), %zmm8
> + vmovups Log_tbl_L+64+__svml_satanh_data_internal_avx512(%rip), %zmm13
> +
> +/* exponents */
> + vgetexpps {sae}, %zmm7, %zmm15
> + vfmadd231ps {rn-sae}, %zmm7, %zmm3, %zmm9
> +
> +/* Table lookups */
> + vmovups __svml_satanh_data_internal_avx512(%rip), %zmm6
> + vgetexpps {sae}, %zmm12, %zmm14
> + vfnmadd231ps {rn-sae}, %zmm12, %zmm5, %zmm4
> +
> +/* Prepare table index */
> + vpsrld $18, %zmm7, %zmm3
> + vpsrld $18, %zmm12, %zmm2
> + vmovups Log_tbl_H+64+__svml_satanh_data_internal_avx512(%rip), %zmm7
> + vmovups poly_coeff1+__svml_satanh_data_internal_avx512(%rip), %zmm12
> +
> +/* Km-Kp */
> + vsubps {rn-sae}, %zmm15, %zmm14, %zmm1
> + kmovw %k0, %edx
> + vmovaps %zmm3, %zmm0
> + vpermi2ps %zmm13, %zmm8, %zmm3
> + vpermt2ps %zmm13, %zmm2, %zmm8
> + vpermi2ps %zmm7, %zmm6, %zmm0
> + vpermt2ps %zmm7, %zmm2, %zmm6
> + vsubps {rn-sae}, %zmm3, %zmm8, %zmm5
> +
> +/* K*L2H + Th */
> + vmovups L2H+__svml_satanh_data_internal_avx512(%rip), %zmm2
> +
> +/* K*L2L + Tl */
> + vmovups L2L+__svml_satanh_data_internal_avx512(%rip), %zmm3
> +
> +/* polynomials */
> + vmovups poly_coeff3+__svml_satanh_data_internal_avx512(%rip), %zmm7
> + vmovups poly_coeff0+__svml_satanh_data_internal_avx512(%rip), %zmm13
> +
> +/* table values */
> + vsubps {rn-sae}, %zmm0, %zmm6, %zmm0
> + vfmadd231ps {rn-sae}, %zmm1, %zmm2, %zmm0
> + vfmadd213ps {rn-sae}, %zmm5, %zmm3, %zmm1
> + vmovups poly_coeff2+__svml_satanh_data_internal_avx512(%rip), %zmm3
> + vmovaps %zmm3, %zmm2
> + vfmadd231ps {rn-sae}, %zmm9, %zmm7, %zmm2
> + vfmadd231ps {rn-sae}, %zmm4, %zmm7, %zmm3
> + vfmadd213ps {rn-sae}, %zmm12, %zmm9, %zmm2
> + vfmadd213ps {rn-sae}, %zmm12, %zmm4, %zmm3
> + vfmadd213ps {rn-sae}, %zmm13, %zmm9, %zmm2
> + vfmadd213ps {rn-sae}, %zmm13, %zmm4, %zmm3
> +
> +/* (K*L2L + Tl) + Rp*PolyP */
> + vfmadd213ps {rn-sae}, %zmm1, %zmm9, %zmm2
> + vorps Half+__svml_satanh_data_internal_avx512(%rip), %zmm10, %zmm9
> +
> +/* (K*L2L + Tl) + Rp*PolyP -Rm*PolyM */
> + vfnmadd213ps {rn-sae}, %zmm2, %zmm4, %zmm3
> + vaddps {rn-sae}, %zmm3, %zmm0, %zmm4
> + vmulps {rn-sae}, %zmm9, %zmm4, %zmm0
> + testl %edx, %edx
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + movq %rbp, %rsp
> + popq %rbp
> + cfi_def_cfa(7, 8)
> + cfi_restore(6)
> + ret
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + vmovups %zmm11, 64(%rsp)
> + vmovups %zmm0, 128(%rsp)
> + # LOE rbx r12 r13 r14 r15 edx zmm0
> +
> + xorl %eax, %eax
> + # LOE rbx r12 r13 r14 r15 eax edx
> +
> + vzeroupper
> + movq %r12, 16(%rsp)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
> + movl %eax, %r12d
> + movq %r13, 8(%rsp)
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
> + movl %edx, %r13d
> + movq %r14, (%rsp)
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $16, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + vmovups 128(%rsp), %zmm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r12 r13 r14 r15 zmm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movss 64(%rsp,%r14,4), %xmm0
> + call atanhf@PLT
> + # LOE rbx r14 r15 r12d r13d xmm0
> +
> + movss %xmm0, 128(%rsp,%r14,4)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx r15 r12d r13d
> +END(_ZGVeN16v_atanhf_skx)
> +
> + .section .rodata, "a"
> + .align 64
> +
> +#ifdef __svml_satanh_data_internal_avx512_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(64)) VUINT32 Log_tbl_H[32][1];
> + __declspec(align(64)) VUINT32 Log_tbl_L[32][1];
> + __declspec(align(64)) VUINT32 One[16][1];
> + __declspec(align(64)) VUINT32 AbsMask[16][1];
> + __declspec(align(64)) VUINT32 AddB5[16][1];
> + __declspec(align(64)) VUINT32 RcpBitMask[16][1];
> + __declspec(align(64)) VUINT32 poly_coeff3[16][1];
> + __declspec(align(64)) VUINT32 poly_coeff2[16][1];
> + __declspec(align(64)) VUINT32 poly_coeff1[16][1];
> + __declspec(align(64)) VUINT32 poly_coeff0[16][1];
> + __declspec(align(64)) VUINT32 Half[16][1];
> + __declspec(align(64)) VUINT32 L2H[16][1];
> + __declspec(align(64)) VUINT32 L2L[16][1];
> + } __svml_satanh_data_internal_avx512;
> +#endif
> +__svml_satanh_data_internal_avx512:
> + /*== Log_tbl_H ==*/
> + .long 0x00000000
> + .long 0x3cfc0000
> + .long 0x3d780000
> + .long 0x3db78000
> + .long 0x3df10000
> + .long 0x3e14c000
> + .long 0x3e300000
> + .long 0x3e4a8000
> + .long 0x3e648000
> + .long 0x3e7dc000
> + .long 0x3e8b4000
> + .long 0x3e974000
> + .long 0x3ea30000
> + .long 0x3eae8000
> + .long 0x3eb9c000
> + .long 0x3ec4e000
> + .long 0x3ecfa000
> + .long 0x3eda2000
> + .long 0x3ee48000
> + .long 0x3eeea000
> + .long 0x3ef8a000
> + .long 0x3f013000
> + .long 0x3f05f000
> + .long 0x3f0aa000
> + .long 0x3f0f4000
> + .long 0x3f13d000
> + .long 0x3f184000
> + .long 0x3f1ca000
> + .long 0x3f20f000
> + .long 0x3f252000
> + .long 0x3f295000
> + .long 0x3f2d7000
> + /*== Log_tbl_L ==*/
> + .align 64
> + .long 0x00000000
> + .long 0x3726c39e
> + .long 0x38a30c01
> + .long 0x37528ae5
> + .long 0x38e0edc5
> + .long 0xb8ab41f8
> + .long 0xb7cf8f58
> + .long 0x3896a73d
> + .long 0xb5838656
> + .long 0x380c36af
> + .long 0xb8235454
> + .long 0x3862bae1
> + .long 0x38c5e10e
> + .long 0x38dedfac
> + .long 0x38ebfb5e
> + .long 0xb8e63c9f
> + .long 0xb85c1340
> + .long 0x38777bcd
> + .long 0xb6038656
> + .long 0x37d40984
> + .long 0xb8b85028
> + .long 0xb8ad5a5a
> + .long 0x3865c84a
> + .long 0x38c3d2f5
> + .long 0x383ebce1
> + .long 0xb8a1ed76
> + .long 0xb7a332c4
> + .long 0xb779654f
> + .long 0xb8602f73
> + .long 0x38f85db0
> + .long 0x37b4996f
> + .long 0xb8bfb3ca
> + /*== One ==*/
> + .align 64
> + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
> + /*== AbsMask ==*/
> + .align 64
> + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
> + /*== AddB5 ==*/
> + .align 64
> + .long 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000
> + /*== RcpBitMask ==*/
> + .align 64
> + .long 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000
> + /*== poly_coeff3 ==*/
> + .align 64
> + .long 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810
> + /*== poly_coeff2 ==*/
> + .align 64
> + .long 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e
> + /*== poly_coeff1 ==*/
> + .align 64
> + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000
> + /*== poly_coeff0 ==*/
> + .align 64
> + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
> + /*== Half ==*/
> + .align 64
> + .long 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000
> + /*== L2H = log(2)_high ==*/
> + .align 64
> + .long 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000
> + /*== L2L = log(2)_low ==*/
> + .align 64
> + .long 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4
> + .align 64
> + .type __svml_satanh_data_internal_avx512,@object
> + .size __svml_satanh_data_internal_avx512,.-__svml_satanh_data_internal_avx512
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core-sse2.S
> new file mode 100644
> index 0000000000..b750092887
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core-sse2.S
> @@ -0,0 +1,20 @@
> +/* SSE2 version of vectorized atanhf, vector length is 4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVbN4v_atanhf _ZGVbN4v_atanhf_sse2
> +#include "../svml_s_atanhf4_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core.c
> new file mode 100644
> index 0000000000..46624c48cd
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core.c
> @@ -0,0 +1,28 @@
> +/* Multiple versions of vectorized atanhf, vector length is 4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVbN4v_atanhf
> +#include "ifunc-mathvec-sse4_1.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVbN4v_atanhf, __GI__ZGVbN4v_atanhf,
> + __redirect__ZGVbN4v_atanhf)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core_sse4.S
> new file mode 100644
> index 0000000000..77e46cb5b9
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf4_core_sse4.S
> @@ -0,0 +1,361 @@
> +/* Function atanhf vectorized with SSE4.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_satanh_data_internal
> + */
> +#define SgnMask 0
> +#define sOne 16
> +#define sPoly 32
> +#define iBrkValue 160
> +#define iOffExpoMask 176
> +#define sHalf 192
> +#define sSign 208
> +#define sTopMask12 224
> +#define TinyRange 240
> +#define sLn2 256
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.sse4,"ax",@progbits
> +ENTRY(_ZGVbN4v_atanhf_sse4)
> + subq $72, %rsp
> + cfi_def_cfa_offset(80)
> + movaps %xmm0, %xmm5
> +
> +/* Load constants including One = 1 */
> + movups sOne+__svml_satanh_data_internal(%rip), %xmm4
> + movaps %xmm5, %xmm3
> +
> +/* Strip off the sign, so treat X as positive until right at the end */
> + movups SgnMask+__svml_satanh_data_internal(%rip), %xmm7
> + movaps %xmm4, %xmm8
> + andps %xmm5, %xmm7
> + movaps %xmm4, %xmm10
> + movups sTopMask12+__svml_satanh_data_internal(%rip), %xmm11
> + movaps %xmm4, %xmm14
> + movaps %xmm11, %xmm9
> +
> +/*
> + * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
> + * the upper part UHi being <= 12 bits long. Then we have
> + * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
> + */
> + movaps %xmm7, %xmm12
> +
> +/*
> + * Check whether |X| < 1, in which case we use the main function.
> + * Otherwise set the rangemask so that the callout will get used.
> + * Note that this will also use the callout for NaNs since not(NaN < 1).
> + */
> + movaps %xmm7, %xmm6
> + movaps %xmm7, %xmm2
> + cmpnltps %xmm4, %xmm6
> + cmpltps TinyRange+__svml_satanh_data_internal(%rip), %xmm2
> + mulps %xmm5, %xmm3
> + subps %xmm7, %xmm8
> + addps %xmm7, %xmm12
> + movmskps %xmm6, %edx
> + subps %xmm8, %xmm10
> + addps %xmm5, %xmm3
> + subps %xmm7, %xmm10
> + andps %xmm8, %xmm9
> +
> +/*
> + * Now we feed into the log1p code, using H in place of _VARG1 and
> + * later incorporating L into the reduced argument.
> + * compute 1+x as high, low parts
> + */
> + movaps %xmm4, %xmm7
> +
> +/*
> + * Now compute R = 1/(UHi+ULo) * (1 - E) and the error term E
> + * The first FMR is exact (we force R to 12 bits just in case it
> + * isn't already, to make absolutely sure), and since E is ~ 2^-12,
> + * the rounding error in the other one is acceptable.
> + */
> + rcpps %xmm9, %xmm15
> + subps %xmm9, %xmm8
> + andps %xmm11, %xmm15
> +
> +/*
> + * Split V as well into upper 12 bits and lower part, so that we can get
> + * a preliminary quotient estimate without rounding error.
> + */
> + andps %xmm12, %xmm11
> + mulps %xmm15, %xmm9
> + addps %xmm8, %xmm10
> + subps %xmm11, %xmm12
> +
> +/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
> + mulps %xmm15, %xmm11
> + mulps %xmm15, %xmm10
> + subps %xmm9, %xmm14
> + mulps %xmm12, %xmm15
> + subps %xmm10, %xmm14
> +
> +/* Compute D = E + E^2 */
> + movaps %xmm14, %xmm13
> + movaps %xmm4, %xmm8
> + mulps %xmm14, %xmm13
> +
> +/* reduction: compute r,n */
> + movdqu iBrkValue+__svml_satanh_data_internal(%rip), %xmm9
> + addps %xmm13, %xmm14
> +
> +/*
> + * Compute R * (VHi + VLo) * (1 + E + E^2)
> + * = R * (VHi + VLo) * (1 + D)
> + * = QHi + (QHi * D + QLo + QLo * D)
> + */
> + movaps %xmm14, %xmm0
> + mulps %xmm15, %xmm14
> + mulps %xmm11, %xmm0
> + addps %xmm14, %xmm15
> + movdqu iOffExpoMask+__svml_satanh_data_internal(%rip), %xmm12
> + movaps %xmm4, %xmm14
> +
> +/* Record the sign for eventual reincorporation. */
> + movups sSign+__svml_satanh_data_internal(%rip), %xmm1
> + addps %xmm15, %xmm0
> +
> +/*
> + * Now finally accumulate the high and low parts of the
> + * argument to log1p, H + L, with a final compensated summation.
> + */
> + movaps %xmm0, %xmm6
> + andps %xmm5, %xmm1
> +
> +/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
> + orps %xmm1, %xmm3
> + addps %xmm11, %xmm6
> + maxps %xmm6, %xmm7
> + minps %xmm6, %xmm8
> + subps %xmm6, %xmm11
> + movaps %xmm7, %xmm10
> + andps %xmm2, %xmm3
> + addps %xmm8, %xmm10
> + addps %xmm11, %xmm0
> + subps %xmm10, %xmm7
> + psubd %xmm9, %xmm10
> + addps %xmm7, %xmm8
> + pand %xmm10, %xmm12
> + psrad $23, %xmm10
> + cvtdq2ps %xmm10, %xmm13
> + addps %xmm8, %xmm0
> +
> +/* final reconstruction */
> + mulps sLn2+__svml_satanh_data_internal(%rip), %xmm13
> + pslld $23, %xmm10
> + paddd %xmm9, %xmm12
> + psubd %xmm10, %xmm14
> +
> +/* polynomial evaluation */
> + subps %xmm4, %xmm12
> + mulps %xmm0, %xmm14
> + movups sPoly+112+__svml_satanh_data_internal(%rip), %xmm0
> + addps %xmm12, %xmm14
> + mulps %xmm14, %xmm0
> +
> +/* Finally, halve the result and reincorporate the sign */
> + movups sHalf+__svml_satanh_data_internal(%rip), %xmm4
> + pxor %xmm1, %xmm4
> + addps sPoly+96+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+80+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+64+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+48+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+32+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+16+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + addps sPoly+__svml_satanh_data_internal(%rip), %xmm0
> + mulps %xmm14, %xmm0
> + mulps %xmm14, %xmm0
> + addps %xmm0, %xmm14
> + movaps %xmm2, %xmm0
> + addps %xmm13, %xmm14
> + mulps %xmm14, %xmm4
> + andnps %xmm4, %xmm0
> + orps %xmm3, %xmm0
> + testl %edx, %edx
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm5
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + addq $72, %rsp
> + cfi_def_cfa_offset(8)
> + ret
> + cfi_def_cfa_offset(80)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + movups %xmm5, 32(%rsp)
> + movups %xmm0, 48(%rsp)
> + # LOE rbx rbp r12 r13 r14 r15 edx
> +
> + xorl %eax, %eax
> + movq %r12, 16(%rsp)
> + cfi_offset(12, -64)
> + movl %eax, %r12d
> + movq %r13, 8(%rsp)
> + cfi_offset(13, -72)
> + movl %edx, %r13d
> + movq %r14, (%rsp)
> + cfi_offset(14, -80)
> + # LOE rbx rbp r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx rbp r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $4, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx rbp r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + movups 48(%rsp), %xmm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + cfi_offset(12, -64)
> + cfi_offset(13, -72)
> + cfi_offset(14, -80)
> + # LOE rbx rbp r12 r13 r14 r15 xmm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movss 32(%rsp,%r14,4), %xmm0
> + call atanhf@PLT
> + # LOE rbx rbp r14 r15 r12d r13d xmm0
> +
> + movss %xmm0, 48(%rsp,%r14,4)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx rbp r15 r12d r13d
> +END(_ZGVbN4v_atanhf_sse4)
> +
> + .section .rodata, "a"
> + .align 16
> +
> +#ifdef __svml_satanh_data_internal_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(16)) VUINT32 SgnMask[4][1];
> + __declspec(align(16)) VUINT32 sOne[4][1];
> + __declspec(align(16)) VUINT32 sPoly[8][4][1];
> + __declspec(align(16)) VUINT32 iBrkValue[4][1];
> + __declspec(align(16)) VUINT32 iOffExpoMask[4][1];
> + __declspec(align(16)) VUINT32 sHalf[4][1];
> + __declspec(align(16)) VUINT32 sSign[4][1];
> + __declspec(align(16)) VUINT32 sTopMask12[4][1];
> + __declspec(align(16)) VUINT32 TinyRange[4][1];
> + __declspec(align(16)) VUINT32 sLn2[4][1];
> +} __svml_satanh_data_internal;
> +#endif
> +__svml_satanh_data_internal:
> + /*== SgnMask ==*/
> + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
> + /*== sOne = SP 1.0 ==*/
> + .align 16
> + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
> + /*== sPoly[] = SP polynomial ==*/
> + .align 16
> + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */
> + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */
> + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */
> + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */
> + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */
> + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */
> + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */
> + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */
> + /*== iBrkValue = SP 2/3 ==*/
> + .align 16
> + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
> + /*== iOffExpoMask = SP significand mask ==*/
> + .align 16
> + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
> + /*== sHalf ==*/
> + .align 16
> + .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000
> + /*== sSign ==*/
> + .align 16
> + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
> + /*== sTopMask12 ==*/
> + .align 16
> + .long 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000
> + /*== TinyRange ==*/
> + .align 16
> + .long 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000
> + /*== sLn2 = SP ln(2) ==*/
> + .align 16
> + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218
> + .align 16
> + .type __svml_satanh_data_internal,@object
> + .size __svml_satanh_data_internal,.-__svml_satanh_data_internal
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core-sse.S
> new file mode 100644
> index 0000000000..b293bd5b41
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core-sse.S
> @@ -0,0 +1,20 @@
> +/* SSE version of vectorized atanhf, vector length is 8.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define _ZGVdN8v_atanhf _ZGVdN8v_atanhf_sse_wrapper
> +#include "../svml_s_atanhf8_core.S"
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core.c
> new file mode 100644
> index 0000000000..3df8d66c94
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core.c
> @@ -0,0 +1,28 @@
> +/* Multiple versions of vectorized atanhf, vector length is 8.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define SYMBOL_NAME _ZGVdN8v_atanhf
> +#include "ifunc-mathvec-avx2.h"
> +
> +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
> +
> +#ifdef SHARED
> +__hidden_ver1 (_ZGVdN8v_atanhf, __GI__ZGVdN8v_atanhf,
> + __redirect__ZGVdN8v_atanhf)
> + __attribute__ ((visibility ("hidden")));
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core_avx2.S
> new file mode 100644
> index 0000000000..00225207a8
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanhf8_core_avx2.S
> @@ -0,0 +1,335 @@
> +/* Function atanhf vectorized with AVX2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + https://www.gnu.org/licenses/. */
> +
> +/*
> + * ALGORITHM DESCRIPTION:
> + *
> + * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
> + *
> + * Special cases:
> + *
> + * atanh(0) = 0
> + * atanh(+1) = +INF
> + * atanh(-1) = -INF
> + * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
> + *
> + */
> +
> +/* Offsets for data table __svml_satanh_data_internal
> + */
> +#define SgnMask 0
> +#define sOne 32
> +#define sPoly 64
> +#define iBrkValue 320
> +#define iOffExpoMask 352
> +#define sHalf 384
> +#define sSign 416
> +#define sTopMask12 448
> +#define TinyRange 480
> +#define sLn2 512
> +
> +#include <sysdep.h>
> +
> + .text
> + .section .text.avx2,"ax",@progbits
> +ENTRY(_ZGVdN8v_atanhf_avx2)
> + pushq %rbp
> + cfi_def_cfa_offset(16)
> + movq %rsp, %rbp
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> + andq $-32, %rsp
> + subq $96, %rsp
> +
> +/* Load constants including One = 1 */
> + vmovups sOne+__svml_satanh_data_internal(%rip), %ymm5
> + vmovups sTopMask12+__svml_satanh_data_internal(%rip), %ymm13
> + vmovaps %ymm0, %ymm6
> +
> +/* Strip off the sign, so treat X as positive until right at the end */
> + vandps SgnMask+__svml_satanh_data_internal(%rip), %ymm6, %ymm10
> + vsubps %ymm10, %ymm5, %ymm1
> +
> +/*
> + * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
> + * the upper part UHi being <= 12 bits long. Then we have
> + * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
> + */
> + vaddps %ymm10, %ymm10, %ymm14
> +
> +/*
> + * Check whether |X| < 1, in which case we use the main function.
> + * Otherwise set the rangemask so that the callout will get used.
> + * Note that this will also use the callout for NaNs since not(NaN < 1).
> + */
> + vcmpnlt_uqps %ymm5, %ymm10, %ymm7
> + vsubps %ymm1, %ymm5, %ymm9
> + vcmplt_oqps TinyRange+__svml_satanh_data_internal(%rip), %ymm10, %ymm4
> + vrcpps %ymm1, %ymm11
> + vsubps %ymm10, %ymm9, %ymm12
> + vandps %ymm13, %ymm11, %ymm0
> +
> +/* No need to split sU when FMA is available */
> + vfnmadd213ps %ymm5, %ymm0, %ymm1
> + vmovaps %ymm6, %ymm8
> + vfmadd213ps %ymm6, %ymm6, %ymm8
> + vfnmadd231ps %ymm0, %ymm12, %ymm1
> +
> +/*
> + * Split V as well into upper 12 bits and lower part, so that we can get
> + * a preliminary quotient estimate without rounding error.
> + */
> + vandps %ymm13, %ymm14, %ymm15
> + vmovmskps %ymm7, %edx
> + vsubps %ymm15, %ymm14, %ymm7
> +
> +/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
> + vmulps %ymm15, %ymm0, %ymm10
> +
> +/* Compute D = E + E^2 */
> + vfmadd213ps %ymm1, %ymm1, %ymm1
> +
> +/* Record the sign for eventual reincorporation. */
> + vandps sSign+__svml_satanh_data_internal(%rip), %ymm6, %ymm3
> +
> +/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
> + vorps %ymm3, %ymm8, %ymm2
> + vmulps %ymm7, %ymm0, %ymm8
> +
> +/*
> + * Compute R * (VHi + VLo) * (1 + E + E^2)
> + * = R * (VHi + VLo) * (1 + D)
> + * = QHi + (QHi * D + QLo + QLo * D)
> + */
> + vmulps %ymm1, %ymm10, %ymm9
> + vfmadd213ps %ymm8, %ymm8, %ymm1
> + vaddps %ymm1, %ymm9, %ymm1
> +
> +/* reduction: compute r,n */
> + vmovups iBrkValue+__svml_satanh_data_internal(%rip), %ymm9
> +
> +/*
> + * Now finally accumulate the high and low parts of the
> + * argument to log1p, H + L, with a final compensated summation.
> + */
> + vaddps %ymm1, %ymm10, %ymm12
> + vsubps %ymm12, %ymm10, %ymm11
> +
> +/*
> + * Now we feed into the log1p code, using H in place of _VARG1 and
> + * later incorporating L into the reduced argument.
> + * compute 1+x as high, low parts
> + */
> + vmaxps %ymm12, %ymm5, %ymm13
> + vminps %ymm12, %ymm5, %ymm14
> + vaddps %ymm11, %ymm1, %ymm0
> + vaddps %ymm14, %ymm13, %ymm1
> + vpsubd %ymm9, %ymm1, %ymm7
> + vsubps %ymm1, %ymm13, %ymm15
> + vpsrad $23, %ymm7, %ymm10
> + vpand iOffExpoMask+__svml_satanh_data_internal(%rip), %ymm7, %ymm8
> + vaddps %ymm15, %ymm14, %ymm13
> + vpslld $23, %ymm10, %ymm11
> + vpaddd %ymm9, %ymm8, %ymm15
> + vaddps %ymm13, %ymm0, %ymm14
> + vcvtdq2ps %ymm10, %ymm0
> + vpsubd %ymm11, %ymm5, %ymm12
> +
> +/* polynomial evaluation */
> + vsubps %ymm5, %ymm15, %ymm5
> + vmulps %ymm14, %ymm12, %ymm1
> + vaddps %ymm5, %ymm1, %ymm5
> + vmovups sPoly+224+__svml_satanh_data_internal(%rip), %ymm1
> + vfmadd213ps sPoly+192+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+160+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+128+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+96+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+64+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+32+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vfmadd213ps sPoly+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
> + vmulps %ymm1, %ymm5, %ymm7
> + vfmadd213ps %ymm5, %ymm5, %ymm7
> +
> +/* final reconstruction */
> + vfmadd132ps sLn2+__svml_satanh_data_internal(%rip), %ymm7, %ymm0
> +
> +/* Finally, halve the result and reincorporate the sign */
> + vxorps sHalf+__svml_satanh_data_internal(%rip), %ymm3, %ymm3
> + vmulps %ymm0, %ymm3, %ymm0
> + vblendvps %ymm4, %ymm2, %ymm0, %ymm0
> + testl %edx, %edx
> +
> +/* Go to special inputs processing branch */
> + jne L(SPECIAL_VALUES_BRANCH)
> + # LOE rbx r12 r13 r14 r15 edx ymm0 ymm6
> +
> +/* Restore registers
> + * and exit the function
> + */
> +
> +L(EXIT):
> + movq %rbp, %rsp
> + popq %rbp
> + cfi_def_cfa(7, 8)
> + cfi_restore(6)
> + ret
> + cfi_def_cfa(6, 16)
> + cfi_offset(6, -16)
> +
> +/* Branch to process
> + * special inputs
> + */
> +
> +L(SPECIAL_VALUES_BRANCH):
> + vmovups %ymm6, 32(%rsp)
> + vmovups %ymm0, 64(%rsp)
> + # LOE rbx r12 r13 r14 r15 edx ymm0
> +
> + xorl %eax, %eax
> + # LOE rbx r12 r13 r14 r15 eax edx
> +
> + vzeroupper
> + movq %r12, 16(%rsp)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
> + movl %eax, %r12d
> + movq %r13, 8(%rsp)
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
> + movl %edx, %r13d
> + movq %r14, (%rsp)
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r15 r12d r13d
> +
> +/* Range mask
> + * bits check
> + */
> +
> +L(RANGEMASK_CHECK):
> + btl %r12d, %r13d
> +
> +/* Call scalar math function */
> + jc L(SCALAR_MATH_CALL)
> + # LOE rbx r15 r12d r13d
> +
> +/* Special inputs
> + * processing loop
> + */
> +
> +L(SPECIAL_VALUES_LOOP):
> + incl %r12d
> + cmpl $8, %r12d
> +
> +/* Check bits in range mask */
> + jl L(RANGEMASK_CHECK)
> + # LOE rbx r15 r12d r13d
> +
> + movq 16(%rsp), %r12
> + cfi_restore(12)
> + movq 8(%rsp), %r13
> + cfi_restore(13)
> + movq (%rsp), %r14
> + cfi_restore(14)
> + vmovups 64(%rsp), %ymm0
> +
> +/* Go to exit */
> + jmp L(EXIT)
> + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
> + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
> + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
> + # LOE rbx r12 r13 r14 r15 ymm0
> +
> +/* Scalar math fucntion call
> + * to process special input
> + */
> +
> +L(SCALAR_MATH_CALL):
> + movl %r12d, %r14d
> + movss 32(%rsp,%r14,4), %xmm0
> + call atanhf@PLT
> + # LOE rbx r14 r15 r12d r13d xmm0
> +
> + movss %xmm0, 64(%rsp,%r14,4)
> +
> +/* Process special inputs in loop */
> + jmp L(SPECIAL_VALUES_LOOP)
> + # LOE rbx r15 r12d r13d
> +END(_ZGVdN8v_atanhf_avx2)
> +
> + .section .rodata, "a"
> + .align 32
> +
> +#ifdef __svml_satanh_data_internal_typedef
> +typedef unsigned int VUINT32;
> +typedef struct {
> + __declspec(align(32)) VUINT32 SgnMask[8][1];
> + __declspec(align(32)) VUINT32 sOne[8][1];
> + __declspec(align(32)) VUINT32 sPoly[8][8][1];
> + __declspec(align(32)) VUINT32 iBrkValue[8][1];
> + __declspec(align(32)) VUINT32 iOffExpoMask[8][1];
> + __declspec(align(32)) VUINT32 sHalf[8][1];
> + __declspec(align(32)) VUINT32 sSign[8][1];
> + __declspec(align(32)) VUINT32 sTopMask12[8][1];
> + __declspec(align(32)) VUINT32 TinyRange[8][1];
> + __declspec(align(32)) VUINT32 sLn2[8][1];
> +} __svml_satanh_data_internal;
> +#endif
> +__svml_satanh_data_internal:
> + /*== SgnMask ==*/
> + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
> + /*== sOne = SP 1.0 ==*/
> + .align 32
> + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
> + /*== sPoly[] = SP polynomial ==*/
> + .align 32
> + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */
> + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */
> + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */
> + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */
> + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */
> + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */
> + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */
> + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */
> + /*== iBrkValue = SP 2/3 ==*/
> + .align 32
> + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
> + /*== iOffExpoMask = SP significand mask ==*/
> + .align 32
> + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
> + /*== sHalf ==*/
> + .align 32
> + .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000
> + /*== sSign ==*/
> + .align 32
> + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
> + /*== sTopMask12 ==*/
> + .align 32
> + .long 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000
> + /*== TinyRange ==*/
> + .align 32
> + .long 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000
> + /*== sLn2 = SP ln(2) ==*/
> + .align 32
> + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218
> + .align 32
> + .type __svml_satanh_data_internal,@object
> + .size __svml_satanh_data_internal,.-__svml_satanh_data_internal
> diff --git a/sysdeps/x86_64/fpu/svml_d_atanh2_core.S b/sysdeps/x86_64/fpu/svml_d_atanh2_core.S
> new file mode 100644
> index 0000000000..36f549ddd9
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_d_atanh2_core.S
> @@ -0,0 +1,29 @@
> +/* Function atanh vectorized with SSE2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_d_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVbN2v_atanh)
> +WRAPPER_IMPL_SSE2 atanh
> +END (_ZGVbN2v_atanh)
> +
> +#ifndef USE_MULTIARCH
> + libmvec_hidden_def (_ZGVbN2v_atanh)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/svml_d_atanh4_core.S b/sysdeps/x86_64/fpu/svml_d_atanh4_core.S
> new file mode 100644
> index 0000000000..6d6d11e85e
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_d_atanh4_core.S
> @@ -0,0 +1,29 @@
> +/* Function atanh vectorized with AVX2, wrapper version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_d_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVdN4v_atanh)
> +WRAPPER_IMPL_AVX _ZGVbN2v_atanh
> +END (_ZGVdN4v_atanh)
> +
> +#ifndef USE_MULTIARCH
> + libmvec_hidden_def (_ZGVdN4v_atanh)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S
> new file mode 100644
> index 0000000000..b4cfa275c8
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_d_atanh4_core_avx.S
> @@ -0,0 +1,25 @@
> +/* Function atanh vectorized in AVX ISA as wrapper to SSE4 ISA version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_d_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVcN4v_atanh)
> +WRAPPER_IMPL_AVX _ZGVbN2v_atanh
> +END (_ZGVcN4v_atanh)
> diff --git a/sysdeps/x86_64/fpu/svml_d_atanh8_core.S b/sysdeps/x86_64/fpu/svml_d_atanh8_core.S
> new file mode 100644
> index 0000000000..b31a6a72a1
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_d_atanh8_core.S
> @@ -0,0 +1,25 @@
> +/* Function atanh vectorized with AVX-512, wrapper to AVX2.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_d_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVeN8v_atanh)
> +WRAPPER_IMPL_AVX512 _ZGVdN4v_atanh
> +END (_ZGVeN8v_atanh)
> diff --git a/sysdeps/x86_64/fpu/svml_s_atanhf16_core.S b/sysdeps/x86_64/fpu/svml_s_atanhf16_core.S
> new file mode 100644
> index 0000000000..2ea61888e7
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_s_atanhf16_core.S
> @@ -0,0 +1,25 @@
> +/* Function atanhf vectorized with AVX-512. Wrapper to AVX2 version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_s_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVeN16v_atanhf)
> +WRAPPER_IMPL_AVX512 _ZGVdN8v_atanhf
> +END (_ZGVeN16v_atanhf)
> diff --git a/sysdeps/x86_64/fpu/svml_s_atanhf4_core.S b/sysdeps/x86_64/fpu/svml_s_atanhf4_core.S
> new file mode 100644
> index 0000000000..6904cc388a
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_s_atanhf4_core.S
> @@ -0,0 +1,29 @@
> +/* Function atanhf vectorized with SSE2, wrapper version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_s_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVbN4v_atanhf)
> +WRAPPER_IMPL_SSE2 atanhf
> +END (_ZGVbN4v_atanhf)
> +
> +#ifndef USE_MULTIARCH
> + libmvec_hidden_def (_ZGVbN4v_atanhf)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/svml_s_atanhf8_core.S b/sysdeps/x86_64/fpu/svml_s_atanhf8_core.S
> new file mode 100644
> index 0000000000..31d695fb5d
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_s_atanhf8_core.S
> @@ -0,0 +1,29 @@
> +/* Function atanhf vectorized with AVX2, wrapper version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_s_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVdN8v_atanhf)
> +WRAPPER_IMPL_AVX _ZGVbN4v_atanhf
> +END (_ZGVdN8v_atanhf)
> +
> +#ifndef USE_MULTIARCH
> + libmvec_hidden_def (_ZGVdN8v_atanhf)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S
> new file mode 100644
> index 0000000000..6c24eaf45c
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/svml_s_atanhf8_core_avx.S
> @@ -0,0 +1,25 @@
> +/* Function atanhf vectorized in AVX ISA as wrapper to SSE4 ISA version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include "svml_s_wrapper_impl.h"
> +
> + .text
> +ENTRY (_ZGVcN8v_atanhf)
> +WRAPPER_IMPL_AVX _ZGVbN4v_atanhf
> +END (_ZGVcN8v_atanhf)
> diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx.c
> new file mode 100644
> index 0000000000..0bdeec7851
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx.c
> @@ -0,0 +1 @@
> +#include "test-double-libmvec-atanh.c"
> diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx2.c
> new file mode 100644
> index 0000000000..0bdeec7851
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx2.c
> @@ -0,0 +1 @@
> +#include "test-double-libmvec-atanh.c"
> diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx512f.c
> new file mode 100644
> index 0000000000..0bdeec7851
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-double-libmvec-atanh-avx512f.c
> @@ -0,0 +1 @@
> +#include "test-double-libmvec-atanh.c"
> diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atanh.c b/sysdeps/x86_64/fpu/test-double-libmvec-atanh.c
> new file mode 100644
> index 0000000000..41dd8e7af3
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-double-libmvec-atanh.c
> @@ -0,0 +1,3 @@
> +#define LIBMVEC_TYPE double
> +#define LIBMVEC_FUNC atanh
> +#include "test-vector-abi-arg1.h"
> diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
> index 38359b05e3..04a4fe654b 100644
> --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2)
> VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10)
> VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2)
> VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVbN2v_atanh)
>
> #define VEC_INT_TYPE __m128i
>
> diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
> index 17701e7731..f9ac2fad5d 100644
> --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
> @@ -44,6 +44,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2)
> VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10)
> VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2)
> VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVdN4v_atanh)
>
> #ifndef __ILP32__
> # define VEC_INT_TYPE __m256i
> diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
> index bba62b2446..185801fa82 100644
> --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2)
> VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10)
> VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2)
> VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVcN4v_atanh)
>
> #define VEC_INT_TYPE __m128i
>
> diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
> index 8a04e13a07..1cc8aaecbf 100644
> --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2)
> VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10)
> VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2)
> VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVeN8v_atanh)
>
> #ifndef __ILP32__
> # define VEC_INT_TYPE __m512i
> diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx.c
> new file mode 100644
> index 0000000000..6f89ae70f2
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx.c
> @@ -0,0 +1 @@
> +#include "test-float-libmvec-atanhf.c"
> diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx2.c
> new file mode 100644
> index 0000000000..6f89ae70f2
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx2.c
> @@ -0,0 +1 @@
> +#include "test-float-libmvec-atanhf.c"
> diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx512f.c
> new file mode 100644
> index 0000000000..6f89ae70f2
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf-avx512f.c
> @@ -0,0 +1 @@
> +#include "test-float-libmvec-atanhf.c"
> diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanhf.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf.c
> new file mode 100644
> index 0000000000..33a022adb8
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanhf.c
> @@ -0,0 +1,3 @@
> +#define LIBMVEC_TYPE float
> +#define LIBMVEC_FUNC atanhf
> +#include "test-vector-abi-arg1.h"
> diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
> index 706f52c618..b5d76d80e0 100644
> --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f)
> VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVeN16v_atanhf)
>
> #define VEC_INT_TYPE __m512i
>
> diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
> index ceace4c53a..c1df6a03c1 100644
> --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f)
> VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVbN4v_atanhf)
>
> #define VEC_INT_TYPE __m128i
>
> diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
> index 06a4753409..f4c646683f 100644
> --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
> @@ -44,6 +44,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f)
> VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVdN8v_atanhf)
>
> /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
> #undef VECTOR_WRAPPER_fFF
> diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
> index a87e5298e0..a6acd3ffca 100644
> --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
> +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
> @@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f)
> VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f)
> VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf)
> +VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVcN8v_atanhf)
>
> #define VEC_INT_TYPE __m128i
>
> --
> 2.31.1
>
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
H.J.
@@ -252,4 +252,15 @@
#define __DECL_SIMD_log1pf32x
#define __DECL_SIMD_log1pf64x
#define __DECL_SIMD_log1pf128x
+
+#define __DECL_SIMD_atanh
+#define __DECL_SIMD_atanhf
+#define __DECL_SIMD_atanhl
+#define __DECL_SIMD_atanhf16
+#define __DECL_SIMD_atanhf32
+#define __DECL_SIMD_atanhf64
+#define __DECL_SIMD_atanhf128
+#define __DECL_SIMD_atanhf32x
+#define __DECL_SIMD_atanhf64x
+#define __DECL_SIMD_atanhf128x
#endif
@@ -86,7 +86,7 @@ __MATHCALL (acosh,, (_Mdouble_ __x));
/* Hyperbolic arc sine of X. */
__MATHCALL (asinh,, (_Mdouble_ __x));
/* Hyperbolic arc tangent of X. */
-__MATHCALL (atanh,, (_Mdouble_ __x));
+__MATHCALL_VEC (atanh,, (_Mdouble_ __x));
#endif
/* Exponential and logarithmic functions. */
@@ -49,6 +49,7 @@ GLIBC_2.22 _ZGVeN8vvv_sincos F
GLIBC_2.35 _ZGVbN2v_acos F
GLIBC_2.35 _ZGVbN2v_asin F
GLIBC_2.35 _ZGVbN2v_atan F
+GLIBC_2.35 _ZGVbN2v_atanh F
GLIBC_2.35 _ZGVbN2v_cbrt F
GLIBC_2.35 _ZGVbN2v_cosh F
GLIBC_2.35 _ZGVbN2v_exp10 F
@@ -63,6 +64,7 @@ GLIBC_2.35 _ZGVbN2vv_hypot F
GLIBC_2.35 _ZGVbN4v_acosf F
GLIBC_2.35 _ZGVbN4v_asinf F
GLIBC_2.35 _ZGVbN4v_atanf F
+GLIBC_2.35 _ZGVbN4v_atanhf F
GLIBC_2.35 _ZGVbN4v_cbrtf F
GLIBC_2.35 _ZGVbN4v_coshf F
GLIBC_2.35 _ZGVbN4v_exp10f F
@@ -77,6 +79,7 @@ GLIBC_2.35 _ZGVbN4vv_hypotf F
GLIBC_2.35 _ZGVcN4v_acos F
GLIBC_2.35 _ZGVcN4v_asin F
GLIBC_2.35 _ZGVcN4v_atan F
+GLIBC_2.35 _ZGVcN4v_atanh F
GLIBC_2.35 _ZGVcN4v_cbrt F
GLIBC_2.35 _ZGVcN4v_cosh F
GLIBC_2.35 _ZGVcN4v_exp10 F
@@ -91,6 +94,7 @@ GLIBC_2.35 _ZGVcN4vv_hypot F
GLIBC_2.35 _ZGVcN8v_acosf F
GLIBC_2.35 _ZGVcN8v_asinf F
GLIBC_2.35 _ZGVcN8v_atanf F
+GLIBC_2.35 _ZGVcN8v_atanhf F
GLIBC_2.35 _ZGVcN8v_cbrtf F
GLIBC_2.35 _ZGVcN8v_coshf F
GLIBC_2.35 _ZGVcN8v_exp10f F
@@ -105,6 +109,7 @@ GLIBC_2.35 _ZGVcN8vv_hypotf F
GLIBC_2.35 _ZGVdN4v_acos F
GLIBC_2.35 _ZGVdN4v_asin F
GLIBC_2.35 _ZGVdN4v_atan F
+GLIBC_2.35 _ZGVdN4v_atanh F
GLIBC_2.35 _ZGVdN4v_cbrt F
GLIBC_2.35 _ZGVdN4v_cosh F
GLIBC_2.35 _ZGVdN4v_exp10 F
@@ -119,6 +124,7 @@ GLIBC_2.35 _ZGVdN4vv_hypot F
GLIBC_2.35 _ZGVdN8v_acosf F
GLIBC_2.35 _ZGVdN8v_asinf F
GLIBC_2.35 _ZGVdN8v_atanf F
+GLIBC_2.35 _ZGVdN8v_atanhf F
GLIBC_2.35 _ZGVdN8v_cbrtf F
GLIBC_2.35 _ZGVdN8v_coshf F
GLIBC_2.35 _ZGVdN8v_exp10f F
@@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVdN8vv_hypotf F
GLIBC_2.35 _ZGVeN16v_acosf F
GLIBC_2.35 _ZGVeN16v_asinf F
GLIBC_2.35 _ZGVeN16v_atanf F
+GLIBC_2.35 _ZGVeN16v_atanhf F
GLIBC_2.35 _ZGVeN16v_cbrtf F
GLIBC_2.35 _ZGVeN16v_coshf F
GLIBC_2.35 _ZGVeN16v_exp10f F
@@ -147,6 +154,7 @@ GLIBC_2.35 _ZGVeN16vv_hypotf F
GLIBC_2.35 _ZGVeN8v_acos F
GLIBC_2.35 _ZGVeN8v_asin F
GLIBC_2.35 _ZGVeN8v_atan F
+GLIBC_2.35 _ZGVeN8v_atanh F
GLIBC_2.35 _ZGVeN8v_cbrt F
GLIBC_2.35 _ZGVeN8v_cosh F
GLIBC_2.35 _ZGVeN8v_exp10 F
@@ -114,6 +114,10 @@
# define __DECL_SIMD_log1p __DECL_SIMD_x86_64
# undef __DECL_SIMD_log1pf
# define __DECL_SIMD_log1pf __DECL_SIMD_x86_64
+# undef __DECL_SIMD_atanh
+# define __DECL_SIMD_atanh __DECL_SIMD_x86_64
+# undef __DECL_SIMD_atanhf
+# define __DECL_SIMD_atanhf __DECL_SIMD_x86_64
# endif
#endif
@@ -56,6 +56,8 @@
!GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atanh) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atanhf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -97,3 +99,5 @@
!GCC$ builtin (log2f) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log1p) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atanh) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atanhf) attributes simd (notinbranch) if('x32')
@@ -26,6 +26,7 @@ libmvec-funcs = \
asin \
atan \
atan2 \
+ atanh \
cbrt \
cos \
cosh \
@@ -17,6 +17,7 @@ libmvec {
_ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
_ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin;
_ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
+ _ZGVbN2v_atanh; _ZGVcN4v_atanh; _ZGVdN4v_atanh; _ZGVeN8v_atanh;
_ZGVbN2v_cbrt; _ZGVcN4v_cbrt; _ZGVdN4v_cbrt; _ZGVeN8v_cbrt;
_ZGVbN2v_cosh; _ZGVcN4v_cosh; _ZGVdN4v_cosh; _ZGVeN8v_cosh;
_ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10;
@@ -31,6 +32,7 @@ libmvec {
_ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
_ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf;
_ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
+ _ZGVbN4v_atanhf; _ZGVcN8v_atanhf; _ZGVdN8v_atanhf; _ZGVeN16v_atanhf;
_ZGVbN4v_cbrtf; _ZGVcN8v_cbrtf; _ZGVdN8v_cbrtf; _ZGVeN16v_cbrtf;
_ZGVbN4v_coshf; _ZGVcN8v_coshf; _ZGVdN8v_coshf; _ZGVeN16v_coshf;
_ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f;
@@ -248,6 +248,26 @@ float: 3
float128: 4
ldouble: 5
+Function: "atanh_vlen16":
+float: 1
+
+Function: "atanh_vlen2":
+double: 1
+
+Function: "atanh_vlen4":
+double: 1
+float: 1
+
+Function: "atanh_vlen4_avx2":
+double: 1
+
+Function: "atanh_vlen8":
+double: 1
+float: 1
+
+Function: "atanh_vlen8_avx2":
+float: 1
+
Function: "cabs":
double: 1
float128: 1
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized atanh, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_atanh _ZGVbN2v_atanh_sse2
+#include "../svml_d_atanh2_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized atanh, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_atanh
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_atanh, __GI__ZGVbN2v_atanh, __redirect__ZGVbN2v_atanh)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1519 @@
+/* Function atanh vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_datanh_data_internal
+ */
+#define Log_HA_table 0
+#define Log_LA_table 8208
+#define poly_coeff 12320
+#define ExpMask 12384
+#define Two10 12400
+#define MinLog1p 12416
+#define MaxLog1p 12432
+#define One 12448
+#define SgnMask 12464
+#define XThreshold 12480
+#define XhMask 12496
+#define Threshold 12512
+#define Bias 12528
+#define Bias1 12544
+#define ExpMask0 12560
+#define ExpMask2 12576
+#define L2 12592
+#define dHalf 12608
+#define dSign 12624
+#define dTopMask12 12640
+#define dTopMask41 12656
+#define TinyRange 12672
+
+/* Lookup bias for data table __svml_datanh_data_internal. */
+#define Table_Lookup_Bias -0x405ff0
+
+#include <sysdep.h>
+
+ .text
+ .section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN2v_atanh_sse4)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-32, %rsp
+ subq $64, %rsp
+ movaps %xmm0, %xmm12
+ movups SgnMask+__svml_datanh_data_internal(%rip), %xmm7
+ lea Table_Lookup_Bias+__svml_datanh_data_internal(%rip), %rsi
+
+/* Load the constant 1 and a sign mask */
+ movups One+__svml_datanh_data_internal(%rip), %xmm11
+
+/* Strip off the sign, so treat X as positive until right at the end */
+ movaps %xmm7, %xmm14
+ andps %xmm12, %xmm14
+ movaps %xmm11, %xmm15
+ subpd %xmm14, %xmm15
+ movups dTopMask41+__svml_datanh_data_internal(%rip), %xmm2
+ movaps %xmm11, %xmm5
+ movaps %xmm2, %xmm0
+
+/*
+ * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
+ * the upper part UHi being <= 41 bits long. Then we have
+ * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
+ */
+ movaps %xmm14, %xmm6
+ andps %xmm15, %xmm0
+
+/*
+ * Check whether |X| < 1, in which case we use the main function.
+ * Otherwise set the rangemask so that the callout will get used.
+ * Note that this will also use the callout for NaNs since not(NaN < 1).
+ */
+ movaps %xmm14, %xmm13
+
+/*
+ * Now compute R = 1/(UHi+ULo) * (1 - E) and the error term E
+ * The first FMR is exact (we force R to 12 bits just in case it
+ * isn't already, to make absolutely sure), and since E is ~ 2^-12,
+ * the rounding error in the other one is acceptable.
+ */
+ cvtpd2ps %xmm0, %xmm1
+ subpd %xmm15, %xmm5
+ addpd %xmm14, %xmm6
+ subpd %xmm0, %xmm15
+ cmpnltpd %xmm11, %xmm13
+ subpd %xmm14, %xmm5
+ movmskpd %xmm13, %edx
+ movlhps %xmm1, %xmm1
+ movaps %xmm14, %xmm9
+ rcpps %xmm1, %xmm4
+ addpd %xmm15, %xmm5
+ cmpltpd TinyRange+__svml_datanh_data_internal(%rip), %xmm9
+ cvtps2pd %xmm4, %xmm14
+ andps dTopMask12+__svml_datanh_data_internal(%rip), %xmm14
+ movaps %xmm11, %xmm13
+ mulpd %xmm14, %xmm0
+ mulpd %xmm14, %xmm5
+ subpd %xmm0, %xmm13
+
+/*
+ * Split V as well into upper 41 bits and lower part, so that we can get
+ * a preliminary quotient estimate without rounding error.
+ */
+ andps %xmm6, %xmm2
+
+/*
+ * Now we feed into the log1p code, using H in place of _VARG1 and
+ * later incorporating L into the reduced argument.
+ * compute 1+x as high, low parts
+ */
+ movaps %xmm11, %xmm0
+ subpd %xmm5, %xmm13
+ subpd %xmm2, %xmm6
+
+/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
+ mulpd %xmm14, %xmm2
+ mulpd %xmm6, %xmm14
+
+/*
+ * Compute D = E + E^2 + E^3 + E^4 + E^5
+ * = E + (E + E^2) (E + E * E^2)
+ */
+ movaps %xmm13, %xmm6
+ movaps %xmm13, %xmm3
+ mulpd %xmm13, %xmm6
+ mulpd %xmm6, %xmm3
+ addpd %xmm13, %xmm6
+ addpd %xmm13, %xmm3
+ mulpd %xmm3, %xmm6
+ addpd %xmm6, %xmm13
+
+/*
+ * Compute R * (VHi + VLo) * (1 + E + E^2 + E^3 + E^4 + E^5)
+ * = R * (VHi + VLo) * (1 + D)
+ * = QHi + (QHi * D + QLo + QLo * D)
+ */
+ movaps %xmm13, %xmm1
+ movaps %xmm11, %xmm5
+ mulpd %xmm14, %xmm13
+ mulpd %xmm2, %xmm1
+ addpd %xmm13, %xmm14
+ addpd %xmm14, %xmm1
+
+/*
+ * Now finally accumulate the high and low parts of the
+ * argument to log1p, H + L, with a final compensated summation.
+ */
+ addpd %xmm1, %xmm2
+ maxpd %xmm2, %xmm0
+ minpd %xmm2, %xmm5
+ andps %xmm7, %xmm2
+ movaps %xmm0, %xmm4
+ cmpltpd XThreshold+__svml_datanh_data_internal(%rip), %xmm2
+ addpd %xmm5, %xmm4
+ orps XhMask+__svml_datanh_data_internal(%rip), %xmm2
+ movaps %xmm12, %xmm10
+
+/* preserve mantissa, set input exponent to 2^(-10) */
+ movups ExpMask+__svml_datanh_data_internal(%rip), %xmm7
+ andps %xmm2, %xmm4
+ andps %xmm4, %xmm7
+
+/* exponent bits */
+ movaps %xmm4, %xmm6
+ orps Two10+__svml_datanh_data_internal(%rip), %xmm7
+ psrlq $20, %xmm6
+
+/* reciprocal approximation good to at least 11 bits */
+ cvtpd2ps %xmm7, %xmm1
+ subpd %xmm4, %xmm0
+ mulpd %xmm12, %xmm10
+ addpd %xmm0, %xmm5
+ addpd %xmm12, %xmm10
+ movlhps %xmm1, %xmm1
+ rcpps %xmm1, %xmm15
+ cvtps2pd %xmm15, %xmm3
+
+/* round reciprocal to nearest integer, will have 1+9 mantissa bits */
+ movups .FLT_21(%rip), %xmm1
+ addpd %xmm1, %xmm3
+ subpd %xmm1, %xmm3
+
+/* exponent of X needed to scale Xl */
+ movdqu ExpMask0+__svml_datanh_data_internal(%rip), %xmm0
+
+/*
+ * prepare table index
+ * table lookup
+ */
+ movaps %xmm3, %xmm13
+
+/* 2^ (-10-exp(X) ) */
+ movdqu ExpMask2+__svml_datanh_data_internal(%rip), %xmm2
+ pand %xmm4, %xmm0
+ psubq %xmm0, %xmm2
+
+/* scale DblRcp */
+ mulpd %xmm3, %xmm2
+
+/* argument reduction */
+ mulpd %xmm2, %xmm4
+ mulpd %xmm2, %xmm5
+ subpd %xmm11, %xmm4
+ addpd %xmm5, %xmm4
+
+/* polynomial */
+ movups poly_coeff+__svml_datanh_data_internal(%rip), %xmm11
+ psrlq $40, %xmm13
+ mulpd %xmm4, %xmm11
+ movd %xmm13, %eax
+ pshufd $221, %xmm6, %xmm7
+
+/* exponent*log(2.0) */
+ movups Threshold+__svml_datanh_data_internal(%rip), %xmm6
+ cmpltpd %xmm3, %xmm6
+ addpd poly_coeff+16+__svml_datanh_data_internal(%rip), %xmm11
+
+/* biased exponent in DP format */
+ cvtdq2pd %xmm7, %xmm1
+ movaps %xmm4, %xmm3
+ mulpd %xmm4, %xmm3
+ movups poly_coeff+32+__svml_datanh_data_internal(%rip), %xmm2
+ mulpd %xmm4, %xmm2
+ mulpd %xmm3, %xmm11
+ addpd poly_coeff+48+__svml_datanh_data_internal(%rip), %xmm2
+ addpd %xmm11, %xmm2
+
+/* reconstruction */
+ mulpd %xmm2, %xmm3
+ andps Bias+__svml_datanh_data_internal(%rip), %xmm6
+ orps Bias1+__svml_datanh_data_internal(%rip), %xmm6
+ pshufd $2, %xmm13, %xmm14
+ subpd %xmm6, %xmm1
+ addpd %xmm3, %xmm4
+ movd %xmm14, %ecx
+ mulpd L2+__svml_datanh_data_internal(%rip), %xmm1
+ movslq %eax, %rax
+ movslq %ecx, %rcx
+
+/* Record the sign for eventual reincorporation. */
+ movups dSign+__svml_datanh_data_internal(%rip), %xmm8
+ andps %xmm12, %xmm8
+ movsd (%rsi,%rax), %xmm0
+
+/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
+ orps %xmm8, %xmm10
+ movhpd (%rsi,%rcx), %xmm0
+ andps %xmm9, %xmm10
+ addpd %xmm4, %xmm0
+ addpd %xmm0, %xmm1
+
+/* Finally, halve the result and reincorporate the sign */
+ movups dHalf+__svml_datanh_data_internal(%rip), %xmm4
+ movaps %xmm9, %xmm0
+ pxor %xmm8, %xmm4
+ mulpd %xmm1, %xmm4
+ andnps %xmm4, %xmm0
+ orps %xmm10, %xmm0
+ testl %edx, %edx
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 edx xmm0 xmm12
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ movups %xmm12, 32(%rsp)
+ movups %xmm0, 48(%rsp)
+ # LOE rbx r12 r13 r14 r15 edx
+
+ xorl %eax, %eax
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $2, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ movups 48(%rsp), %xmm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 xmm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movsd 32(%rsp,%r14,8), %xmm0
+ call atanh@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
+
+ movsd %xmm0, 48(%rsp,%r14,8)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
+END(_ZGVbN2v_atanh_sse4)
+
+ .section .rodata, "a"
+ .align 16
+
+#ifdef __svml_datanh_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2];
+ __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2];
+ __declspec(align(16)) VUINT32 poly_coeff[4][2][2];
+ __declspec(align(16)) VUINT32 ExpMask[2][2];
+ __declspec(align(16)) VUINT32 Two10[2][2];
+ __declspec(align(16)) VUINT32 MinLog1p[2][2];
+ __declspec(align(16)) VUINT32 MaxLog1p[2][2];
+ __declspec(align(16)) VUINT32 One[2][2];
+ __declspec(align(16)) VUINT32 SgnMask[2][2];
+ __declspec(align(16)) VUINT32 XThreshold[2][2];
+ __declspec(align(16)) VUINT32 XhMask[2][2];
+ __declspec(align(16)) VUINT32 Threshold[2][2];
+ __declspec(align(16)) VUINT32 Bias[2][2];
+ __declspec(align(16)) VUINT32 Bias1[2][2];
+ __declspec(align(16)) VUINT32 ExpMask0[2][2];
+ __declspec(align(16)) VUINT32 ExpMask2[2][2];
+ __declspec(align(16)) VUINT32 L2[2][2];
+ __declspec(align(16)) VUINT32 dHalf[2][2];
+ __declspec(align(16)) VUINT32 dSign[2][2];
+ __declspec(align(16)) VUINT32 dTopMask12[2][2];
+ __declspec(align(16)) VUINT32 dTopMask41[2][2];
+ __declspec(align(16)) VUINT32 TinyRange[2][2];
+} __svml_datanh_data_internal;
+#endif
+__svml_datanh_data_internal:
+ /* Log_HA_table */
+ .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100
+ .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a
+ .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff
+ .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a
+ .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb
+ .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e
+ .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b
+ .quad 0xc0862347acebaf68, 0xbe1cef3b152048af
+ .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e
+ .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4
+ .quad 0xc08623537ac30980, 0xbe1cefc4642ee597
+ .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16
+ .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6
+ .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362
+ .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557
+ .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b
+ .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed
+ .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed
+ .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f
+ .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce
+ .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7
+ .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1
+ .quad 0xc086238206e94218, 0xbe1ceee898588610
+ .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea
+ .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6
+ .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6
+ .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165
+ .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1
+ .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b
+ .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670
+ .quad 0xc08623a07b28ae60, 0xbe1cef359363787c
+ .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c
+ .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84
+ .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7
+ .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b
+ .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf
+ .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62
+ .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b
+ .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98
+ .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87
+ .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff
+ .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798
+ .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e
+ .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde
+ .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b
+ .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c
+ .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98
+ .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f
+ .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358
+ .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380
+ .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4
+ .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b
+ .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2
+ .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4
+ .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400
+ .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7
+ .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a
+ .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d
+ .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b
+ .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575
+ .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951
+ .quad 0xc086241263e87f50, 0xbe1cf16e74768529
+ .quad 0xc0862415f6193658, 0xbe1cefec64b8becb
+ .quad 0xc086241986b28f30, 0xbe1cf0838d210baa
+ .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11
+ .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805
+ .quad 0xc08624242f008380, 0xbe1ceea988c5a417
+ .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5
+ .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38
+ .quad 0xc086242ec92eaee8, 0xbe1cef0946455411
+ .quad 0xc08624324ecbaf98, 0xbe1cefea60907739
+ .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42
+ .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d
+ .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e
+ .quad 0xc086244055d2c968, 0xbe1cef345284c119
+ .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219
+ .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114
+ .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189
+ .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f
+ .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f
+ .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5
+ .quad 0xc0862458a789e250, 0xbe1cf0b173796a31
+ .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d
+ .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb
+ .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7
+ .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f
+ .quad 0xc0862469d9a591c0, 0xbe1cef503d947663
+ .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2
+ .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc
+ .quad 0xc086247419475160, 0xbe1cf03dd9922331
+ .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129
+ .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6
+ .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100
+ .quad 0xc0862481af27c528, 0xbe1cee8a6593278a
+ .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7
+ .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8
+ .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002
+ .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4
+ .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c
+ .quad 0xc0862495e5179270, 0xbe1cee757f20c326
+ .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4
+ .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97
+ .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb
+ .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e
+ .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b
+ .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80
+ .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71
+ .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9
+ .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139
+ .quad 0xc08624b72472a528, 0xbe1cf031c931c11f
+ .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7
+ .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d
+ .quad 0xc08624c103245238, 0xbe1cefd492f1ba75
+ .quad 0xc08624c44aacab08, 0xbe1cf1253e154466
+ .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55
+ .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe
+ .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f
+ .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968
+ .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78
+ .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75
+ .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2
+ .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d
+ .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed
+ .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f
+ .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65
+ .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078
+ .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a
+ .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a
+ .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2
+ .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc
+ .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501
+ .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7
+ .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c
+ .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c
+ .quad 0xc0862507f9448db0, 0xbe1cf082da464994
+ .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf
+ .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531
+ .quad 0xc08625117667dd78, 0xbe1cf1106599c962
+ .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f
+ .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6
+ .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4
+ .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092
+ .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd
+ .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7
+ .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25
+ .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d
+ .quad 0xc086252dab033898, 0xbe1cf220bba8861f
+ .quad 0xc0862530c732b078, 0xbe1cef51e310eae2
+ .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae
+ .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8
+ .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171
+ .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408
+ .quad 0xc08625404216d160, 0xbe1cf22d2536f06b
+ .quad 0xc08625435715e498, 0xbe1cef6abbf2e268
+ .quad 0xc08625466ae57648, 0xbe1cf093a14789f5
+ .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c
+ .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc
+ .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157
+ .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997
+ .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff
+ .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f
+ .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9
+ .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d
+ .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc
+ .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9
+ .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5
+ .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b
+ .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996
+ .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945
+ .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995
+ .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c
+ .quad 0xc086257a09acaae0, 0xbe1cf172c3078947
+ .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22
+ .quad 0xc086258006ae71b8, 0xbe1cefdb80426923
+ .quad 0xc08625830381da08, 0xbe1ceef1391a0372
+ .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13
+ .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83
+ .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9
+ .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0
+ .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81
+ .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766
+ .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b
+ .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2
+ .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec
+ .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e
+ .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7
+ .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780
+ .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11
+ .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219
+ .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160
+ .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495
+ .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5
+ .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5
+ .quad 0xc08625baf725ae28, 0xbe1cf05c80779283
+ .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889
+ .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124
+ .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86
+ .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092
+ .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb
+ .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12
+ .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7
+ .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e
+ .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701
+ .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812
+ .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e
+ .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4
+ .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12
+ .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21
+ .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2
+ .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece
+ .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12
+ .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad
+ .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3
+ .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9
+ .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1
+ .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9
+ .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2
+ .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51
+ .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e
+ .quad 0xc08626052294df58, 0xbe1cf1b745c57716
+ .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23
+ .quad 0xc086260abb103458, 0xbe1cef480ff1acd2
+ .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef
+ .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5
+ .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6
+ .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a
+ .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545
+ .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011
+ .quad 0xc086261e32267e98, 0xbe1cf19917010e96
+ .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985
+ .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3
+ .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c
+ .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50
+ .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68
+ .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9
+ .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b
+ .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238
+ .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e
+ .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d
+ .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba
+ .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279
+ .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085
+ .quad 0xc086264494738e08, 0xbe1cf06797bd03b2
+ .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1
+ .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f
+ .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a
+ .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39
+ .quad 0xc08626521daf7758, 0xbe1cf252595aceb3
+ .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2
+ .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa
+ .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366
+ .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b
+ .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0
+ .quad 0xc08626623df56e38, 0xbe1cf080e10b8365
+ .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544
+ .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9
+ .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9
+ .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2
+ .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c
+ .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6
+ .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d
+ .quad 0xc08626778c3d4798, 0xbe1cefe260819380
+ .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3
+ .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa
+ .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1
+ .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52
+ .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd
+ .quad 0xc086268762086350, 0xbe1cefaee1edfa35
+ .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936
+ .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed
+ .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49
+ .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e
+ .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc
+ .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840
+ .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be
+ .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c
+ .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06
+ .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e
+ .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3
+ .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68
+ .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5
+ .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986
+ .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d
+ .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26
+ .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06
+ .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652
+ .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f
+ .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c
+ .quad 0xc08626bddc737648, 0xbe1ceec10a020e73
+ .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7
+ .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe
+ .quad 0xc08626c586da9388, 0xbe1cef7de2452430
+ .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae
+ .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d
+ .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3
+ .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d
+ .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e
+ .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64
+ .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2
+ .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d
+ .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab
+ .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153
+ .quad 0xc08626e164224880, 0xbe1ceeb431709788
+ .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5
+ .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b
+ .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93
+ .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8
+ .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2
+ .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6
+ .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef
+ .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339
+ .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1
+ .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28
+ .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f
+ .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3
+ .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6
+ .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6
+ .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3
+ .quad 0xc086270941934b10, 0xbe1ceefe32981f2c
+ .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445
+ .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c
+ .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f
+ .quad 0xc08627131a321318, 0xbe1cef04ac0fb802
+ .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd
+ .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5
+ .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570
+ .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85
+ .quad 0xc086271f58064068, 0xbe1cef092a785e3f
+ .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30
+ .quad 0xc086272438546be8, 0xbe1cf210907ded8b
+ .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99
+ .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc
+ .quad 0xc086272b833b8df0, 0xbe1cf06874992df5
+ .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899
+ .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99
+ .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe
+ .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d
+ .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8
+ .quad 0xc086273a05367688, 0xbe1cf18656c50806
+ .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a
+ .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911
+ .quad 0xc08627413c621848, 0xbe1cf188a4ea680c
+ .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80
+ .quad 0xc086274608397868, 0xbe1cf25a328c28e2
+ .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8
+ .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a
+ .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228
+ .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c
+ .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44
+ .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2
+ .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4
+ .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a
+ .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9
+ .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627
+ .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e
+ .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee
+ .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad
+ .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5
+ .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f
+ .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312
+ .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85
+ .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011
+ .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7
+ .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da
+ .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554
+ .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377
+ .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd
+ .quad 0xc086277eba506158, 0xbe1cf0b911b029f0
+ .quad 0xc08627810e6f4028, 0xbe1cefdc24719766
+ .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7
+ .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec
+ .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc
+ .quad 0xc086278a58297918, 0xbe1cf053073872bf
+ .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947
+ .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234
+ .quad 0xc086279148685aa0, 0xbe1cf162204794a8
+ .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac
+ .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3
+ .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388
+ .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5
+ .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f
+ .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a
+ .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f
+ .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f
+ .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26
+ .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a
+ .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81
+ .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d
+ .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893
+ .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0
+ .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8
+ .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00
+ .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2
+ .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4
+ .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7
+ .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3
+ .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d
+ .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e
+ .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93
+ .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a
+ .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9
+ .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f
+ .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1
+ .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4
+ .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb
+ .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b
+ .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b
+ .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96
+ .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477
+ .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2
+ .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c
+ .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875
+ .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522
+ .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57
+ .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e
+ .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548
+ .quad 0xc08627edd34756b8, 0xbe1cef36b3366305
+ .quad 0xc08627f007f0a408, 0xbe1cf18134625550
+ .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11
+ .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc
+ .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8
+ .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe
+ .quad 0xc08627fb06290f90, 0xbe1cf25188430e25
+ .quad 0xc08627fd37324070, 0xbe1ceea1713490f9
+ .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c
+ .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b
+ .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b
+ .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c
+ .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a
+ .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4
+ .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256
+ .quad 0xc086280eaa003050, 0xbe1cf010ad787fea
+ .quad 0xc0862810d5af5880, 0xbe1cee622478393d
+ .quad 0xc086281300c7e368, 0xbe1cf01c7482564f
+ .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536
+ .quad 0xc086281755366778, 0xbe1cef2edae5837d
+ .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9
+ .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8
+ .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83
+ .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4
+ .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9
+ .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2
+ .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d
+ .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1
+ .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b
+ .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02
+ .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9
+ .quad 0xc08628311f099420, 0xbe1cef247a9ec596
+ .quad 0xc086283341749490, 0xbe1cef74bbcc488a
+ .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e
+ .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810
+ .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8
+ .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065
+ .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e
+ .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234
+ .quad 0xc08628422284b168, 0xbe1cf0abf7638127
+ .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058
+ .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c
+ .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1
+ .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43
+ .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09
+ .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60
+ .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393
+ .quad 0xc0862853021d4588, 0xbe1cf176adb417f7
+ .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da
+ .quad 0xc08628573479b220, 0xbe1ceec34cf49523
+ .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb
+ .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b
+ .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d
+ .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5
+ .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792
+ .quad 0xc0862863be697458, 0xbe1cf097f890c6f8
+ .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc
+ .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7
+ .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98
+ .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7
+ .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d
+ .quad 0xc086287034d0b690, 0xbe1ceff262d0a248
+ .quad 0xc086287246aab180, 0xbe1cefa7bc194186
+ .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9
+ .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07
+ .quad 0xc086287879041490, 0xbe1cf034803c8a48
+ .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f
+ .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7
+ .quad 0xc086287ea6946958, 0xbe1cefb1e4625943
+ .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0
+ .quad 0xc0862882c24faff8, 0xbe1cee9896d016da
+ .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc
+ .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34
+ .quad 0xc0862888e7f699e0, 0xbe1cf05603549486
+ .quad 0xc086288af37750b0, 0xbe1cef50fff513d3
+ .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0
+ .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d
+ .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f
+ .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed
+ .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d
+ .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646
+ .quad 0xc0862899356c1150, 0xbe1ceec4501167e9
+ .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f
+ .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35
+ .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a
+ .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464
+ .quad 0xc08628a355104818, 0xbe1cf0435e2782b0
+ .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c
+ .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d
+ .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2
+ .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9
+ .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979
+ .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c
+ .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32
+ .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2
+ .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303
+ .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880
+ /*== Log_LA_table ==*/
+ .align 16
+ .quad 0x8000000000000000
+ .quad 0xbf5ff802a9ab10e6
+ .quad 0xbf6ff00aa2b10bc0
+ .quad 0xbf77ee11ebd82e94
+ .quad 0xbf7fe02a6b106789
+ .quad 0xbf83e7295d25a7d9
+ .quad 0xbf87dc475f810a77
+ .quad 0xbf8bcf712c74384c
+ .quad 0xbf8fc0a8b0fc03e4
+ .quad 0xbf91d7f7eb9eebe7
+ .quad 0xbf93cea44346a575
+ .quad 0xbf95c45a51b8d389
+ .quad 0xbf97b91b07d5b11b
+ .quad 0xbf99ace7551cc514
+ .quad 0xbf9b9fc027af9198
+ .quad 0xbf9d91a66c543cc4
+ .quad 0xbf9f829b0e783300
+ .quad 0xbfa0b94f7c196176
+ .quad 0xbfa1b0d98923d980
+ .quad 0xbfa2a7ec2214e873
+ .quad 0xbfa39e87b9febd60
+ .quad 0xbfa494acc34d911c
+ .quad 0xbfa58a5bafc8e4d5
+ .quad 0xbfa67f94f094bd98
+ .quad 0xbfa77458f632dcfc
+ .quad 0xbfa868a83083f6cf
+ .quad 0xbfa95c830ec8e3eb
+ .quad 0xbfaa4fe9ffa3d235
+ .quad 0xbfab42dd711971bf
+ .quad 0xbfac355dd0921f2d
+ .quad 0xbfad276b8adb0b52
+ .quad 0xbfae19070c276016
+ .quad 0xbfaf0a30c01162a6
+ .quad 0xbfaffae9119b9303
+ .quad 0xbfb075983598e471
+ .quad 0xbfb0ed839b5526fe
+ .quad 0xbfb16536eea37ae1
+ .quad 0xbfb1dcb263db1944
+ .quad 0xbfb253f62f0a1417
+ .quad 0xbfb2cb0283f5de1f
+ .quad 0xbfb341d7961bd1d1
+ .quad 0xbfb3b87598b1b6ee
+ .quad 0xbfb42edcbea646f0
+ .quad 0xbfb4a50d3aa1b040
+ .quad 0xbfb51b073f06183f
+ .quad 0xbfb590cafdf01c28
+ .quad 0xbfb60658a93750c4
+ .quad 0xbfb67bb0726ec0fc
+ .quad 0xbfb6f0d28ae56b4c
+ .quad 0xbfb765bf23a6be13
+ .quad 0xbfb7da766d7b12cd
+ .quad 0xbfb84ef898e8282a
+ .quad 0xbfb8c345d6319b21
+ .quad 0xbfb9375e55595ede
+ .quad 0xbfb9ab42462033ad
+ .quad 0xbfba1ef1d8061cd4
+ .quad 0xbfba926d3a4ad563
+ .quad 0xbfbb05b49bee43fe
+ .quad 0xbfbb78c82bb0eda1
+ .quad 0xbfbbeba818146765
+ .quad 0xbfbc5e548f5bc743
+ .quad 0xbfbcd0cdbf8c13e1
+ .quad 0xbfbd4313d66cb35d
+ .quad 0xbfbdb5270187d927
+ .quad 0xbfbe27076e2af2e6
+ .quad 0xbfbe98b549671467
+ .quad 0xbfbf0a30c01162a6
+ .quad 0xbfbf7b79fec37ddf
+ .quad 0xbfbfec9131dbeabb
+ .quad 0xbfc02ebb42bf3d4b
+ .quad 0xbfc0671512ca596e
+ .quad 0xbfc09f561ee719c3
+ .quad 0xbfc0d77e7cd08e59
+ .quad 0xbfc10f8e422539b1
+ .quad 0xbfc14785846742ac
+ .quad 0xbfc17f6458fca611
+ .quad 0xbfc1b72ad52f67a0
+ .quad 0xbfc1eed90e2dc2c3
+ .quad 0xbfc2266f190a5acb
+ .quad 0xbfc25ded0abc6ad2
+ .quad 0xbfc29552f81ff523
+ .quad 0xbfc2cca0f5f5f251
+ .quad 0xbfc303d718e47fd3
+ .quad 0xbfc33af575770e4f
+ .quad 0xbfc371fc201e8f74
+ .quad 0xbfc3a8eb2d31a376
+ .quad 0xbfc3dfc2b0ecc62a
+ .quad 0xbfc41682bf727bc0
+ .quad 0xbfc44d2b6ccb7d1e
+ .quad 0xbfc483bccce6e3dd
+ .quad 0xbfc4ba36f39a55e5
+ .quad 0xbfc4f099f4a230b2
+ .quad 0xbfc526e5e3a1b438
+ .quad 0xbfc55d1ad4232d6f
+ .quad 0xbfc59338d9982086
+ .quad 0xbfc5c940075972b9
+ .quad 0xbfc5ff3070a793d4
+ .quad 0xbfc6350a28aaa758
+ .quad 0xbfc66acd4272ad51
+ .quad 0xbfc6a079d0f7aad2
+ .quad 0xbfc6d60fe719d21d
+ .quad 0xbfc70b8f97a1aa75
+ .quad 0xbfc740f8f54037a5
+ .quad 0xbfc7764c128f2127
+ .quad 0xbfc7ab890210d909
+ .quad 0xbfc7e0afd630c274
+ .quad 0xbfc815c0a14357eb
+ .quad 0xbfc84abb75865139
+ .quad 0xbfc87fa06520c911
+ .quad 0xbfc8b46f8223625b
+ .quad 0xbfc8e928de886d41
+ .quad 0xbfc91dcc8c340bde
+ .quad 0xbfc9525a9cf456b4
+ .quad 0xbfc986d3228180ca
+ .quad 0xbfc9bb362e7dfb83
+ .quad 0xbfc9ef83d2769a34
+ .quad 0xbfca23bc1fe2b563
+ .quad 0xbfca57df28244dcd
+ .quad 0xbfca8becfc882f19
+ .quad 0xbfcabfe5ae46124c
+ .quad 0xbfcaf3c94e80bff3
+ .quad 0xbfcb2797ee46320c
+ .quad 0xbfcb5b519e8fb5a4
+ .quad 0xbfcb8ef670420c3b
+ .quad 0xbfcbc286742d8cd6
+ .quad 0xbfcbf601bb0e44e2
+ .quad 0xbfcc2968558c18c1
+ .quad 0xbfcc5cba543ae425
+ .quad 0xbfcc8ff7c79a9a22
+ .quad 0xbfccc320c0176502
+ .quad 0xbfccf6354e09c5dc
+ .quad 0xbfcd293581b6b3e7
+ .quad 0xbfcd5c216b4fbb91
+ .quad 0xbfcd8ef91af31d5e
+ .quad 0xbfcdc1bca0abec7d
+ .quad 0xbfcdf46c0c722d2f
+ .quad 0xbfce27076e2af2e6
+ .quad 0xbfce598ed5a87e2f
+ .quad 0xbfce8c0252aa5a60
+ .quad 0xbfcebe61f4dd7b0b
+ .quad 0xbfcef0adcbdc5936
+ .quad 0xbfcf22e5e72f105d
+ .quad 0xbfcf550a564b7b37
+ .quad 0xbfcf871b28955045
+ .quad 0xbfcfb9186d5e3e2b
+ .quad 0xbfcfeb0233e607cc
+ .quad 0xbfd00e6c45ad501d
+ .quad 0xbfd0274dc16c232f
+ .quad 0xbfd0402594b4d041
+ .quad 0xbfd058f3c703ebc6
+ .quad 0xbfd071b85fcd590d
+ .quad 0xbfd08a73667c57af
+ .quad 0xbfd0a324e27390e3
+ .quad 0xbfd0bbccdb0d24bd
+ .quad 0xbfd0d46b579ab74b
+ .quad 0xbfd0ed005f657da4
+ .quad 0xbfd1058bf9ae4ad5
+ .quad 0xbfd11e0e2dad9cb7
+ .quad 0xbfd136870293a8b0
+ .quad 0xbfd14ef67f88685a
+ .quad 0xbfd1675cababa60e
+ .quad 0xbfd17fb98e15095d
+ .quad 0xbfd1980d2dd4236f
+ .quad 0xbfd1b05791f07b49
+ .quad 0xbfd1c898c16999fb
+ .quad 0xbfd1e0d0c33716be
+ .quad 0xbfd1f8ff9e48a2f3
+ .quad 0xbfd211255986160c
+ .quad 0xbfd22941fbcf7966
+ .quad 0xbfd241558bfd1404
+ .quad 0xbfd2596010df763a
+ .quad 0xbfd27161913f853d
+ .quad 0xbfd2895a13de86a3
+ .quad 0xbfd2a1499f762bc9
+ .quad 0xbfd2b9303ab89d25
+ .quad 0xbfd2d10dec508583
+ .quad 0xbfd2e8e2bae11d31
+ .quad 0xbfd300aead06350c
+ .quad 0xbfd31871c9544185
+ .quad 0xbfd3302c16586588
+ .quad 0xbfd347dd9a987d55
+ .quad 0xbfd35f865c93293e
+ .quad 0xbfd3772662bfd85b
+ .quad 0xbfd38ebdb38ed321
+ .quad 0xbfd3a64c556945ea
+ .quad 0xbfd3bdd24eb14b6a
+ .quad 0xbfd3d54fa5c1f710
+ .quad 0xbfd3ecc460ef5f50
+ .quad 0xbfd404308686a7e4
+ .quad 0xbfd41b941cce0bee
+ .quad 0xbfd432ef2a04e814
+ .quad 0xbfd44a41b463c47c
+ .quad 0xbfd4618bc21c5ec2
+ .quad 0xbfd478cd5959b3d9
+ .quad 0xbfd49006804009d1
+ .quad 0xbfd4a7373cecf997
+ .quad 0xbfd4be5f957778a1
+ .quad 0xbfd4d57f8fefe27f
+ .quad 0xbfd4ec973260026a
+ .quad 0xbfd503a682cb1cb3
+ .quad 0xbfd51aad872df82d
+ .quad 0xbfd531ac457ee77e
+ .quad 0xbfd548a2c3add263
+ .quad 0xbfd55f9107a43ee2
+ .quad 0xbfd5767717455a6c
+ .quad 0xbfd58d54f86e02f2
+ .quad 0xbfd5a42ab0f4cfe2
+ .quad 0xbfd5baf846aa1b19
+ .quad 0xbfd5d1bdbf5809ca
+ .quad 0xbfd5e87b20c2954a
+ .quad 0xbfd5ff3070a793d4
+ .quad 0xbfd615ddb4bec13c
+ .quad 0xbfd62c82f2b9c795
+ .quad 0x3fd61965cdb02c1f
+ .quad 0x3fd602d08af091ec
+ .quad 0x3fd5ec433d5c35ae
+ .quad 0x3fd5d5bddf595f30
+ .quad 0x3fd5bf406b543db2
+ .quad 0x3fd5a8cadbbedfa1
+ .quad 0x3fd5925d2b112a59
+ .quad 0x3fd57bf753c8d1fb
+ .quad 0x3fd565995069514c
+ .quad 0x3fd54f431b7be1a9
+ .quad 0x3fd538f4af8f72fe
+ .quad 0x3fd522ae0738a3d8
+ .quad 0x3fd50c6f1d11b97c
+ .quad 0x3fd4f637ebba9810
+ .quad 0x3fd4e0086dd8baca
+ .quad 0x3fd4c9e09e172c3c
+ .quad 0x3fd4b3c077267e9a
+ .quad 0x3fd49da7f3bcc41f
+ .quad 0x3fd487970e958770
+ .quad 0x3fd4718dc271c41b
+ .quad 0x3fd45b8c0a17df13
+ .quad 0x3fd44591e0539f49
+ .quad 0x3fd42f9f3ff62642
+ .quad 0x3fd419b423d5e8c7
+ .quad 0x3fd403d086cea79c
+ .quad 0x3fd3edf463c1683e
+ .quad 0x3fd3d81fb5946dba
+ .quad 0x3fd3c25277333184
+ .quad 0x3fd3ac8ca38e5c5f
+ .quad 0x3fd396ce359bbf54
+ .quad 0x3fd3811728564cb2
+ .quad 0x3fd36b6776be1117
+ .quad 0x3fd355bf1bd82c8b
+ .quad 0x3fd3401e12aecba1
+ .quad 0x3fd32a84565120a8
+ .quad 0x3fd314f1e1d35ce4
+ .quad 0x3fd2ff66b04ea9d4
+ .quad 0x3fd2e9e2bce12286
+ .quad 0x3fd2d46602adccee
+ .quad 0x3fd2bef07cdc9354
+ .quad 0x3fd2a982269a3dbf
+ .quad 0x3fd2941afb186b7c
+ .quad 0x3fd27ebaf58d8c9d
+ .quad 0x3fd269621134db92
+ .quad 0x3fd25410494e56c7
+ .quad 0x3fd23ec5991eba49
+ .quad 0x3fd22981fbef797b
+ .quad 0x3fd214456d0eb8d4
+ .quad 0x3fd1ff0fe7cf47a7
+ .quad 0x3fd1e9e1678899f4
+ .quad 0x3fd1d4b9e796c245
+ .quad 0x3fd1bf99635a6b95
+ .quad 0x3fd1aa7fd638d33f
+ .quad 0x3fd1956d3b9bc2fa
+ .quad 0x3fd180618ef18adf
+ .quad 0x3fd16b5ccbacfb73
+ .quad 0x3fd1565eed455fc3
+ .quad 0x3fd14167ef367783
+ .quad 0x3fd12c77cd00713b
+ .quad 0x3fd1178e8227e47c
+ .quad 0x3fd102ac0a35cc1c
+ .quad 0x3fd0edd060b78081
+ .quad 0x3fd0d8fb813eb1ef
+ .quad 0x3fd0c42d676162e3
+ .quad 0x3fd0af660eb9e279
+ .quad 0x3fd09aa572e6c6d4
+ .quad 0x3fd085eb8f8ae797
+ .quad 0x3fd07138604d5862
+ .quad 0x3fd05c8be0d9635a
+ .quad 0x3fd047e60cde83b8
+ .quad 0x3fd03346e0106062
+ .quad 0x3fd01eae5626c691
+ .quad 0x3fd00a1c6adda473
+ .quad 0x3fcfeb2233ea07cd
+ .quad 0x3fcfc218be620a5e
+ .quad 0x3fcf991c6cb3b379
+ .quad 0x3fcf702d36777df0
+ .quad 0x3fcf474b134df229
+ .quad 0x3fcf1e75fadf9bde
+ .quad 0x3fcef5ade4dcffe6
+ .quad 0x3fceccf2c8fe920a
+ .quad 0x3fcea4449f04aaf5
+ .quad 0x3fce7ba35eb77e2a
+ .quad 0x3fce530effe71012
+ .quad 0x3fce2a877a6b2c12
+ .quad 0x3fce020cc6235ab5
+ .quad 0x3fcdd99edaf6d7e9
+ .quad 0x3fcdb13db0d48940
+ .quad 0x3fcd88e93fb2f450
+ .quad 0x3fcd60a17f903515
+ .quad 0x3fcd38666871f465
+ .quad 0x3fcd1037f2655e7b
+ .quad 0x3fcce816157f1988
+ .quad 0x3fccc000c9db3c52
+ .quad 0x3fcc97f8079d44ec
+ .quad 0x3fcc6ffbc6f00f71
+ .quad 0x3fcc480c0005ccd1
+ .quad 0x3fcc2028ab17f9b4
+ .quad 0x3fcbf851c067555f
+ .quad 0x3fcbd087383bd8ad
+ .quad 0x3fcba8c90ae4ad19
+ .quad 0x3fcb811730b823d2
+ .quad 0x3fcb5971a213acdb
+ .quad 0x3fcb31d8575bce3d
+ .quad 0x3fcb0a4b48fc1b46
+ .quad 0x3fcae2ca6f672bd4
+ .quad 0x3fcabb55c31693ad
+ .quad 0x3fca93ed3c8ad9e3
+ .quad 0x3fca6c90d44b704e
+ .quad 0x3fca454082e6ab05
+ .quad 0x3fca1dfc40f1b7f1
+ .quad 0x3fc9f6c407089664
+ .quad 0x3fc9cf97cdce0ec3
+ .quad 0x3fc9a8778debaa38
+ .quad 0x3fc981634011aa75
+ .quad 0x3fc95a5adcf7017f
+ .quad 0x3fc9335e5d594989
+ .quad 0x3fc90c6db9fcbcd9
+ .quad 0x3fc8e588ebac2dbf
+ .quad 0x3fc8beafeb38fe8c
+ .quad 0x3fc897e2b17b19a5
+ .quad 0x3fc871213750e994
+ .quad 0x3fc84a6b759f512f
+ .quad 0x3fc823c16551a3c2
+ .quad 0x3fc7fd22ff599d4f
+ .quad 0x3fc7d6903caf5ad0
+ .quad 0x3fc7b0091651528c
+ .quad 0x3fc7898d85444c73
+ .quad 0x3fc7631d82935a86
+ .quad 0x3fc73cb9074fd14d
+ .quad 0x3fc716600c914054
+ .quad 0x3fc6f0128b756abc
+ .quad 0x3fc6c9d07d203fc7
+ .quad 0x3fc6a399dabbd383
+ .quad 0x3fc67d6e9d785771
+ .quad 0x3fc6574ebe8c133a
+ .quad 0x3fc6313a37335d76
+ .quad 0x3fc60b3100b09476
+ .quad 0x3fc5e533144c1719
+ .quad 0x3fc5bf406b543db2
+ .quad 0x3fc59958ff1d52f1
+ .quad 0x3fc5737cc9018cdd
+ .quad 0x3fc54dabc26105d2
+ .quad 0x3fc527e5e4a1b58d
+ .quad 0x3fc5022b292f6a45
+ .quad 0x3fc4dc7b897bc1c8
+ .quad 0x3fc4b6d6fefe22a4
+ .quad 0x3fc4913d8333b561
+ .quad 0x3fc46baf0f9f5db7
+ .quad 0x3fc4462b9dc9b3dc
+ .quad 0x3fc420b32740fdd4
+ .quad 0x3fc3fb45a59928cc
+ .quad 0x3fc3d5e3126bc27f
+ .quad 0x3fc3b08b6757f2a9
+ .quad 0x3fc38b3e9e027479
+ .quad 0x3fc365fcb0159016
+ .quad 0x3fc340c59741142e
+ .quad 0x3fc31b994d3a4f85
+ .quad 0x3fc2f677cbbc0a96
+ .quad 0x3fc2d1610c86813a
+ .quad 0x3fc2ac55095f5c59
+ .quad 0x3fc28753bc11aba5
+ .quad 0x3fc2625d1e6ddf57
+ .quad 0x3fc23d712a49c202
+ .quad 0x3fc2188fd9807263
+ .quad 0x3fc1f3b925f25d41
+ .quad 0x3fc1ceed09853752
+ .quad 0x3fc1aa2b7e23f72a
+ .quad 0x3fc185747dbecf34
+ .quad 0x3fc160c8024b27b1
+ .quad 0x3fc13c2605c398c3
+ .quad 0x3fc1178e8227e47c
+ .quad 0x3fc0f301717cf0fb
+ .quad 0x3fc0ce7ecdccc28d
+ .quad 0x3fc0aa06912675d5
+ .quad 0x3fc08598b59e3a07
+ .quad 0x3fc06135354d4b18
+ .quad 0x3fc03cdc0a51ec0d
+ .quad 0x3fc0188d2ecf6140
+ .quad 0x3fbfe89139dbd566
+ .quad 0x3fbfa01c9db57ce2
+ .quad 0x3fbf57bc7d9005db
+ .quad 0x3fbf0f70cdd992e3
+ .quad 0x3fbec739830a1120
+ .quad 0x3fbe7f1691a32d3e
+ .quad 0x3fbe3707ee30487b
+ .quad 0x3fbdef0d8d466db9
+ .quad 0x3fbda727638446a2
+ .quad 0x3fbd5f55659210e2
+ .quad 0x3fbd179788219364
+ .quad 0x3fbccfedbfee13a8
+ .quad 0x3fbc885801bc4b23
+ .quad 0x3fbc40d6425a5cb1
+ .quad 0x3fbbf968769fca11
+ .quad 0x3fbbb20e936d6974
+ .quad 0x3fbb6ac88dad5b1c
+ .quad 0x3fbb23965a52ff00
+ .quad 0x3fbadc77ee5aea8c
+ .quad 0x3fba956d3ecade63
+ .quad 0x3fba4e7640b1bc38
+ .quad 0x3fba0792e9277cac
+ .quad 0x3fb9c0c32d4d2548
+ .quad 0x3fb97a07024cbe74
+ .quad 0x3fb9335e5d594989
+ .quad 0x3fb8ecc933aeb6e8
+ .quad 0x3fb8a6477a91dc29
+ .quad 0x3fb85fd927506a48
+ .quad 0x3fb8197e2f40e3f0
+ .quad 0x3fb7d33687c293c9
+ .quad 0x3fb78d02263d82d3
+ .quad 0x3fb746e100226ed9
+ .quad 0x3fb700d30aeac0e1
+ .quad 0x3fb6bad83c1883b6
+ .quad 0x3fb674f089365a7a
+ .quad 0x3fb62f1be7d77743
+ .quad 0x3fb5e95a4d9791cb
+ .quad 0x3fb5a3abb01ade25
+ .quad 0x3fb55e10050e0384
+ .quad 0x3fb518874226130a
+ .quad 0x3fb4d3115d207eac
+ .quad 0x3fb48dae4bc31018
+ .quad 0x3fb4485e03dbdfad
+ .quad 0x3fb403207b414b7f
+ .quad 0x3fb3bdf5a7d1ee64
+ .quad 0x3fb378dd7f749714
+ .quad 0x3fb333d7f8183f4b
+ .quad 0x3fb2eee507b40301
+ .quad 0x3fb2aa04a44717a5
+ .quad 0x3fb26536c3d8c369
+ .quad 0x3fb2207b5c78549e
+ .quad 0x3fb1dbd2643d190b
+ .quad 0x3fb1973bd1465567
+ .quad 0x3fb152b799bb3cc9
+ .quad 0x3fb10e45b3cae831
+ .quad 0x3fb0c9e615ac4e17
+ .quad 0x3fb08598b59e3a07
+ .quad 0x3fb0415d89e74444
+ .quad 0x3faffa6911ab9301
+ .quad 0x3faf723b517fc523
+ .quad 0x3faeea31c006b87c
+ .quad 0x3fae624c4a0b5e1b
+ .quad 0x3fadda8adc67ee4e
+ .quad 0x3fad52ed6405d86f
+ .quad 0x3faccb73cdddb2cc
+ .quad 0x3fac441e06f72a9e
+ .quad 0x3fabbcebfc68f420
+ .quad 0x3fab35dd9b58baad
+ .quad 0x3faaaef2d0fb10fc
+ .quad 0x3faa282b8a936171
+ .quad 0x3fa9a187b573de7c
+ .quad 0x3fa91b073efd7314
+ .quad 0x3fa894aa149fb343
+ .quad 0x3fa80e7023d8ccc4
+ .quad 0x3fa788595a3577ba
+ .quad 0x3fa70265a550e777
+ .quad 0x3fa67c94f2d4bb58
+ .quad 0x3fa5f6e73078efb8
+ .quad 0x3fa5715c4c03ceef
+ .quad 0x3fa4ebf43349e26f
+ .quad 0x3fa466aed42de3ea
+ .quad 0x3fa3e18c1ca0ae92
+ .quad 0x3fa35c8bfaa1306b
+ .quad 0x3fa2d7ae5c3c5bae
+ .quad 0x3fa252f32f8d183f
+ .quad 0x3fa1ce5a62bc353a
+ .quad 0x3fa149e3e4005a8d
+ .quad 0x3fa0c58fa19dfaaa
+ .quad 0x3fa0415d89e74444
+ .quad 0x3f9f7a9b16782856
+ .quad 0x3f9e72bf2813ce51
+ .quad 0x3f9d6b2725979802
+ .quad 0x3f9c63d2ec14aaf2
+ .quad 0x3f9b5cc258b718e6
+ .quad 0x3f9a55f548c5c43f
+ .quad 0x3f994f6b99a24475
+ .quad 0x3f98492528c8cabf
+ .quad 0x3f974321d3d006d3
+ .quad 0x3f963d6178690bd6
+ .quad 0x3f9537e3f45f3565
+ .quad 0x3f9432a925980cc1
+ .quad 0x3f932db0ea132e22
+ .quad 0x3f9228fb1fea2e28
+ .quad 0x3f912487a5507f70
+ .quad 0x3f90205658935847
+ .quad 0x3f8e38ce3033310c
+ .quad 0x3f8c317384c75f06
+ .quad 0x3f8a2a9c6c170462
+ .quad 0x3f882448a388a2aa
+ .quad 0x3f861e77e8b53fc6
+ .quad 0x3f841929f96832f0
+ .quad 0x3f82145e939ef1e9
+ .quad 0x3f8010157588de71
+ .quad 0x3f7c189cbb0e27fb
+ .quad 0x3f78121214586b54
+ .quad 0x3f740c8a747878e2
+ .quad 0x3f70080559588b35
+ .quad 0x3f680904828985c0
+ .quad 0x3f60040155d5889e
+ .quad 0x3f50020055655889
+ .quad 0x0000000000000000
+ /*== poly_coeff[4] ==*/
+ .align 16
+ .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */
+ .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */
+ .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */
+ .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */
+ /*== ExpMask ==*/
+ .align 16
+ .quad 0x000fffffffffffff, 0x000fffffffffffff
+ /*== Two10 ==*/
+ .align 16
+ .quad 0x3f50000000000000, 0x3f50000000000000
+ /*== MinLog1p = -1+2^(-53) ==*/
+ .align 16
+ .quad 0xbfefffffffffffff, 0xbfefffffffffffff
+ /*== MaxLog1p ==*/
+ .align 16
+ .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000
+ /*== One ==*/
+ .align 16
+ .quad 0x3ff0000000000000, 0x3ff0000000000000
+ /*== SgnMask ==*/
+ .align 16
+ .quad 0x7fffffffffffffff, 0x7fffffffffffffff
+ /*== XThreshold ==*/
+ .align 16
+ .quad 0x3e00000000000000, 0x3e00000000000000
+ /*== XhMask ==*/
+ .align 16
+ .quad 0xfffffffffffffc00, 0xfffffffffffffc00
+ /*== Threshold ==*/
+ .align 16
+ .quad 0x4086a00000000000, 0x4086a00000000000
+ /*== Bias ==*/
+ .align 16
+ .quad 0x408ff80000000000, 0x408ff80000000000
+ /*== Bias1 ==*/
+ .align 16
+ .quad 0x408ff00000000000, 0x408ff00000000000
+ /*== ExpMask ==*/
+ .align 16
+ .quad 0x7ff0000000000000, 0x7ff0000000000000
+ /*== ExpMask2 ==*/
+ .align 16
+ .quad 0x7f40000000000000, 0x7f40000000000000
+ /*== L2L ==*/
+ .align 16
+ .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF
+ /*== dHalf ==*/
+ .align 16
+ .quad 0x3FE0000000000000, 0x3FE0000000000000
+ /*== dSign ==*/
+ .align 16
+ .quad 0x8000000000000000, 0x8000000000000000
+ /*== dTopMask12 ==*/
+ .align 16
+ .quad 0xFFFFFE0000000000, 0xFFFFFE0000000000
+ /*== dTopMask41 ==*/
+ .align 16
+ .quad 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000
+ /*== dTinyRange ==*/
+ .align 16
+ .quad 0x0350000000000000, 0x0350000000000000
+ .align 16
+ .type __svml_datanh_data_internal,@object
+ .size __svml_datanh_data_internal,.-__svml_datanh_data_internal
+ .align 16
+
+.FLT_21:
+ .long 0x00000000,0x43380000,0x00000000,0x43380000
+ .type .FLT_21,@object
+ .size .FLT_21,16
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE version of vectorized atanh, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_atanh _ZGVdN4v_atanh_sse_wrapper
+#include "../svml_d_atanh4_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized atanh, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_atanh
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_atanh, __GI__ZGVdN4v_atanh, __redirect__ZGVdN4v_atanh)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1479 @@
+/* Function atanh vectorized with AVX2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_datanh_data_internal
+ */
+#define Log_HA_table 0
+#define Log_LA_table 8224
+#define poly_coeff 12352
+#define ExpMask 12480
+#define Two10 12512
+#define MinLog1p 12544
+#define MaxLog1p 12576
+#define One 12608
+#define SgnMask 12640
+#define XThreshold 12672
+#define XhMask 12704
+#define Threshold 12736
+#define Bias 12768
+#define Bias1 12800
+#define ExpMask0 12832
+#define ExpMask2 12864
+#define L2 12896
+#define dHalf 12928
+#define dSign 12960
+#define dTopMask12 12992
+#define dTopMask41 13024
+#define TinyRange 13056
+
+/* Lookup bias for data table __svml_datanh_data_internal. */
+#define Table_Lookup_Bias -0x405fe0
+
+#include <sysdep.h>
+
+ .text
+ .section .text.avx2,"ax",@progbits
+ENTRY(_ZGVdN4v_atanh_avx2)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-32, %rsp
+ subq $96, %rsp
+ lea Table_Lookup_Bias+__svml_datanh_data_internal(%rip), %r8
+ vmovupd SgnMask+__svml_datanh_data_internal(%rip), %ymm7
+
+/* Load the constant 1 and a sign mask */
+ vmovupd One+__svml_datanh_data_internal(%rip), %ymm11
+ vmovapd %ymm0, %ymm12
+
+/* Strip off the sign, so treat X as positive until right at the end */
+ vandpd %ymm7, %ymm12, %ymm0
+ vsubpd %ymm0, %ymm11, %ymm6
+
+/*
+ * Check whether |X| < 1, in which case we use the main function.
+ * Otherwise set the rangemask so that the callout will get used.
+ * Note that this will also use the callout for NaNs since not(NaN < 1).
+ */
+ vcmpnlt_uqpd %ymm11, %ymm0, %ymm13
+ vcmplt_oqpd TinyRange+__svml_datanh_data_internal(%rip), %ymm0, %ymm10
+ vsubpd %ymm6, %ymm11, %ymm15
+
+/*
+ * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
+ * the upper part UHi being <= 41 bits long. Then we have
+ * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
+ */
+ vaddpd %ymm0, %ymm0, %ymm3
+ vcvtpd2ps %ymm6, %xmm5
+ vsubpd %ymm0, %ymm15, %ymm1
+ vrcpps %xmm5, %xmm4
+ vmovapd %ymm12, %ymm14
+ vfmadd213pd %ymm12, %ymm12, %ymm14
+ vcvtps2pd %xmm4, %ymm2
+
+/* Record the sign for eventual reincorporation. */
+ vandpd dSign+__svml_datanh_data_internal(%rip), %ymm12, %ymm9
+
+/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
+ vorpd %ymm9, %ymm14, %ymm8
+ vandpd dTopMask12+__svml_datanh_data_internal(%rip), %ymm2, %ymm14
+
+/* No need to split dU when FMA is available */
+ vfnmadd213pd %ymm11, %ymm14, %ymm6
+ vfnmadd231pd %ymm14, %ymm1, %ymm6
+
+/*
+ * Compute D = E + E^2 + E^3 + E^4 + E^5
+ * = E + (E + E^2) (E + E * E^2)
+ * Only saves when FMA is available
+ */
+ vmovapd %ymm11, %ymm0
+ vmovapd %ymm6, %ymm5
+ vfmadd231pd %ymm6, %ymm6, %ymm0
+ vfmadd213pd %ymm6, %ymm6, %ymm5
+ vfmadd213pd %ymm11, %ymm0, %ymm5
+ vmovmskpd %ymm13, %eax
+
+/*
+ * Split V as well into upper 41 bits and lower part, so that we can get
+ * a preliminary quotient estimate without rounding error.
+ */
+ vandpd dTopMask41+__svml_datanh_data_internal(%rip), %ymm3, %ymm13
+ vsubpd %ymm13, %ymm3, %ymm15
+
+/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
+ vmulpd %ymm13, %ymm14, %ymm2
+ vmulpd %ymm5, %ymm6, %ymm0
+ vmulpd %ymm15, %ymm14, %ymm4
+
+/* 2^ (-10-exp(X) ) */
+ vmovupd ExpMask2+__svml_datanh_data_internal(%rip), %ymm15
+
+/*
+ * Compute R * (VHi + VLo) * (1 + E + E^2 + E^3 + E^4 + E^5)
+ * = R * (VHi + VLo) * (1 + D)
+ * = QHi + (QHi * D + QLo + QLo * D)
+ */
+ vmulpd %ymm0, %ymm2, %ymm6
+ vfmadd213pd %ymm4, %ymm4, %ymm0
+ vaddpd %ymm0, %ymm6, %ymm5
+
+/*
+ * Now finally accumulate the high and low parts of the
+ * argument to log1p, H + L, with a final compensated summation.
+ */
+ vaddpd %ymm5, %ymm2, %ymm4
+
+/*
+ * Now we feed into the log1p code, using H in place of _VARG1 and
+ * later incorporating L into the reduced argument.
+ * compute 1+x as high, low parts
+ */
+ vmaxpd %ymm4, %ymm11, %ymm1
+ vminpd %ymm4, %ymm11, %ymm3
+ vandpd %ymm7, %ymm4, %ymm7
+ vcmplt_oqpd XThreshold+__svml_datanh_data_internal(%rip), %ymm7, %ymm0
+ vaddpd %ymm3, %ymm1, %ymm5
+ vorpd XhMask+__svml_datanh_data_internal(%rip), %ymm0, %ymm4
+ vandpd %ymm4, %ymm5, %ymm5
+
+/* preserve mantissa, set input exponent to 2^(-10) */
+ vandpd ExpMask+__svml_datanh_data_internal(%rip), %ymm5, %ymm6
+ vorpd Two10+__svml_datanh_data_internal(%rip), %ymm6, %ymm7
+
+/* reciprocal approximation good to at least 11 bits */
+ vcvtpd2ps %ymm7, %xmm13
+ vsubpd %ymm5, %ymm1, %ymm2
+ vrcpps %xmm13, %xmm14
+ vaddpd %ymm2, %ymm3, %ymm4
+ vcvtps2pd %xmm14, %ymm3
+
+/* exponent bits */
+ vpsrlq $20, %ymm5, %ymm2
+
+/* round reciprocal to nearest integer, will have 1+9 mantissa bits */
+ vroundpd $0, %ymm3, %ymm3
+
+/*
+ * prepare table index
+ * table lookup
+ */
+ vpsrlq $40, %ymm3, %ymm13
+
+/* exponent of X needed to scale Xl */
+ vandps ExpMask0+__svml_datanh_data_internal(%rip), %ymm5, %ymm0
+ vpsubq %ymm0, %ymm15, %ymm6
+
+/* Finally, halve the result and reincorporate the sign */
+ vxorpd dHalf+__svml_datanh_data_internal(%rip), %ymm9, %ymm9
+ vmovd %xmm13, %edx
+ vextractf128 $1, %ymm13, %xmm0
+ movslq %edx, %rdx
+ vpextrd $2, %xmm13, %ecx
+ movslq %ecx, %rcx
+ vmovd %xmm0, %esi
+ vmovsd (%r8,%rdx), %xmm14
+ vmovhpd (%r8,%rcx), %xmm14, %xmm15
+
+/* exponent*log(2.0) */
+ vmovupd Threshold+__svml_datanh_data_internal(%rip), %ymm14
+ movslq %esi, %rsi
+ vpextrd $2, %xmm0, %edi
+ movslq %edi, %rdi
+ vextractf128 $1, %ymm2, %xmm1
+ vshufps $221, %xmm1, %xmm2, %xmm7
+
+/* scale DblRcp */
+ vmulpd %ymm6, %ymm3, %ymm2
+ vmovsd (%r8,%rsi), %xmm6
+
+/* biased exponent in DP format */
+ vcvtdq2pd %xmm7, %ymm1
+ vmovhpd (%r8,%rdi), %xmm6, %xmm7
+ vcmplt_oqpd %ymm3, %ymm14, %ymm3
+
+/* argument reduction */
+ vfmsub213pd %ymm11, %ymm2, %ymm5
+ vmulpd %ymm2, %ymm4, %ymm11
+ vmovupd poly_coeff+64+__svml_datanh_data_internal(%rip), %ymm2
+ vaddpd %ymm11, %ymm5, %ymm5
+ vandpd Bias+__svml_datanh_data_internal(%rip), %ymm3, %ymm3
+ vorpd Bias1+__svml_datanh_data_internal(%rip), %ymm3, %ymm6
+ vsubpd %ymm6, %ymm1, %ymm1
+ vfmadd213pd poly_coeff+96+__svml_datanh_data_internal(%rip), %ymm5, %ymm2
+ vmulpd %ymm5, %ymm5, %ymm4
+ vmulpd L2+__svml_datanh_data_internal(%rip), %ymm1, %ymm3
+
+/* polynomial */
+ vmovupd poly_coeff+__svml_datanh_data_internal(%rip), %ymm1
+ vfmadd213pd poly_coeff+32+__svml_datanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213pd %ymm2, %ymm4, %ymm1
+
+/* reconstruction */
+ vfmadd213pd %ymm5, %ymm4, %ymm1
+ vinsertf128 $1, %xmm7, %ymm15, %ymm0
+ vaddpd %ymm1, %ymm0, %ymm0
+ vaddpd %ymm0, %ymm3, %ymm6
+ vmulpd %ymm6, %ymm9, %ymm0
+ vblendvpd %ymm10, %ymm8, %ymm0, %ymm0
+ testl %eax, %eax
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 eax ymm0 ymm12
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ vmovupd %ymm12, 32(%rsp)
+ vmovupd %ymm0, 64(%rsp)
+ # LOE rbx r12 r13 r14 r15 eax ymm0
+
+ xorl %edx, %edx
+ # LOE rbx r12 r13 r14 r15 eax edx
+
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $4, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovupd 64(%rsp), %ymm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 ymm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movsd 32(%rsp,%r14,8), %xmm0
+ call atanh@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
+
+ movsd %xmm0, 64(%rsp,%r14,8)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
+END(_ZGVdN4v_atanh_avx2)
+
+ .section .rodata, "a"
+ .align 32
+
+#ifdef __svml_datanh_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2];
+ __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2];
+ __declspec(align(32)) VUINT32 poly_coeff[4][4][2];
+ __declspec(align(32)) VUINT32 ExpMask[4][2];
+ __declspec(align(32)) VUINT32 Two10[4][2];
+ __declspec(align(32)) VUINT32 MinLog1p[4][2];
+ __declspec(align(32)) VUINT32 MaxLog1p[4][2];
+ __declspec(align(32)) VUINT32 One[4][2];
+ __declspec(align(32)) VUINT32 SgnMask[4][2];
+ __declspec(align(32)) VUINT32 XThreshold[4][2];
+ __declspec(align(32)) VUINT32 XhMask[4][2];
+ __declspec(align(32)) VUINT32 Threshold[4][2];
+ __declspec(align(32)) VUINT32 Bias[4][2];
+ __declspec(align(32)) VUINT32 Bias1[4][2];
+ __declspec(align(32)) VUINT32 ExpMask0[4][2];
+ __declspec(align(32)) VUINT32 ExpMask2[4][2];
+ __declspec(align(32)) VUINT32 L2[4][2];
+ __declspec(align(32)) VUINT32 dHalf[4][2];
+ __declspec(align(32)) VUINT32 dSign[4][2];
+ __declspec(align(32)) VUINT32 dTopMask12[4][2];
+ __declspec(align(32)) VUINT32 dTopMask41[4][2];
+ __declspec(align(32)) VUINT32 TinyRange[4][2];
+} __svml_datanh_data_internal;
+#endif
+__svml_datanh_data_internal:
+ /* Log_HA_table */
+ .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100
+ .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a
+ .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff
+ .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a
+ .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb
+ .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e
+ .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b
+ .quad 0xc0862347acebaf68, 0xbe1cef3b152048af
+ .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e
+ .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4
+ .quad 0xc08623537ac30980, 0xbe1cefc4642ee597
+ .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16
+ .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6
+ .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362
+ .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557
+ .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b
+ .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed
+ .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed
+ .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f
+ .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce
+ .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7
+ .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1
+ .quad 0xc086238206e94218, 0xbe1ceee898588610
+ .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea
+ .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6
+ .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6
+ .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165
+ .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1
+ .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b
+ .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670
+ .quad 0xc08623a07b28ae60, 0xbe1cef359363787c
+ .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c
+ .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84
+ .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7
+ .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b
+ .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf
+ .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62
+ .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b
+ .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98
+ .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87
+ .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff
+ .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798
+ .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e
+ .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde
+ .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b
+ .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c
+ .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98
+ .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f
+ .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358
+ .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380
+ .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4
+ .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b
+ .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2
+ .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4
+ .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400
+ .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7
+ .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a
+ .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d
+ .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b
+ .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575
+ .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951
+ .quad 0xc086241263e87f50, 0xbe1cf16e74768529
+ .quad 0xc0862415f6193658, 0xbe1cefec64b8becb
+ .quad 0xc086241986b28f30, 0xbe1cf0838d210baa
+ .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11
+ .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805
+ .quad 0xc08624242f008380, 0xbe1ceea988c5a417
+ .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5
+ .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38
+ .quad 0xc086242ec92eaee8, 0xbe1cef0946455411
+ .quad 0xc08624324ecbaf98, 0xbe1cefea60907739
+ .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42
+ .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d
+ .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e
+ .quad 0xc086244055d2c968, 0xbe1cef345284c119
+ .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219
+ .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114
+ .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189
+ .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f
+ .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f
+ .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5
+ .quad 0xc0862458a789e250, 0xbe1cf0b173796a31
+ .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d
+ .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb
+ .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7
+ .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f
+ .quad 0xc0862469d9a591c0, 0xbe1cef503d947663
+ .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2
+ .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc
+ .quad 0xc086247419475160, 0xbe1cf03dd9922331
+ .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129
+ .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6
+ .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100
+ .quad 0xc0862481af27c528, 0xbe1cee8a6593278a
+ .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7
+ .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8
+ .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002
+ .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4
+ .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c
+ .quad 0xc0862495e5179270, 0xbe1cee757f20c326
+ .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4
+ .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97
+ .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb
+ .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e
+ .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b
+ .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80
+ .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71
+ .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9
+ .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139
+ .quad 0xc08624b72472a528, 0xbe1cf031c931c11f
+ .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7
+ .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d
+ .quad 0xc08624c103245238, 0xbe1cefd492f1ba75
+ .quad 0xc08624c44aacab08, 0xbe1cf1253e154466
+ .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55
+ .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe
+ .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f
+ .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968
+ .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78
+ .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75
+ .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2
+ .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d
+ .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed
+ .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f
+ .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65
+ .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078
+ .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a
+ .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a
+ .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2
+ .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc
+ .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501
+ .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7
+ .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c
+ .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c
+ .quad 0xc0862507f9448db0, 0xbe1cf082da464994
+ .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf
+ .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531
+ .quad 0xc08625117667dd78, 0xbe1cf1106599c962
+ .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f
+ .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6
+ .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4
+ .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092
+ .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd
+ .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7
+ .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25
+ .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d
+ .quad 0xc086252dab033898, 0xbe1cf220bba8861f
+ .quad 0xc0862530c732b078, 0xbe1cef51e310eae2
+ .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae
+ .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8
+ .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171
+ .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408
+ .quad 0xc08625404216d160, 0xbe1cf22d2536f06b
+ .quad 0xc08625435715e498, 0xbe1cef6abbf2e268
+ .quad 0xc08625466ae57648, 0xbe1cf093a14789f5
+ .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c
+ .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc
+ .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157
+ .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997
+ .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff
+ .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f
+ .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9
+ .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d
+ .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc
+ .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9
+ .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5
+ .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b
+ .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996
+ .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945
+ .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995
+ .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c
+ .quad 0xc086257a09acaae0, 0xbe1cf172c3078947
+ .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22
+ .quad 0xc086258006ae71b8, 0xbe1cefdb80426923
+ .quad 0xc08625830381da08, 0xbe1ceef1391a0372
+ .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13
+ .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83
+ .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9
+ .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0
+ .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81
+ .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766
+ .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b
+ .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2
+ .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec
+ .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e
+ .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7
+ .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780
+ .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11
+ .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219
+ .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160
+ .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495
+ .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5
+ .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5
+ .quad 0xc08625baf725ae28, 0xbe1cf05c80779283
+ .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889
+ .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124
+ .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86
+ .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092
+ .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb
+ .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12
+ .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7
+ .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e
+ .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701
+ .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812
+ .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e
+ .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4
+ .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12
+ .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21
+ .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2
+ .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece
+ .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12
+ .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad
+ .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3
+ .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9
+ .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1
+ .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9
+ .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2
+ .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51
+ .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e
+ .quad 0xc08626052294df58, 0xbe1cf1b745c57716
+ .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23
+ .quad 0xc086260abb103458, 0xbe1cef480ff1acd2
+ .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef
+ .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5
+ .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6
+ .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a
+ .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545
+ .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011
+ .quad 0xc086261e32267e98, 0xbe1cf19917010e96
+ .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985
+ .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3
+ .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c
+ .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50
+ .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68
+ .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9
+ .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b
+ .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238
+ .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e
+ .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d
+ .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba
+ .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279
+ .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085
+ .quad 0xc086264494738e08, 0xbe1cf06797bd03b2
+ .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1
+ .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f
+ .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a
+ .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39
+ .quad 0xc08626521daf7758, 0xbe1cf252595aceb3
+ .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2
+ .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa
+ .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366
+ .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b
+ .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0
+ .quad 0xc08626623df56e38, 0xbe1cf080e10b8365
+ .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544
+ .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9
+ .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9
+ .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2
+ .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c
+ .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6
+ .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d
+ .quad 0xc08626778c3d4798, 0xbe1cefe260819380
+ .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3
+ .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa
+ .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1
+ .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52
+ .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd
+ .quad 0xc086268762086350, 0xbe1cefaee1edfa35
+ .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936
+ .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed
+ .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49
+ .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e
+ .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc
+ .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840
+ .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be
+ .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c
+ .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06
+ .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e
+ .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3
+ .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68
+ .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5
+ .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986
+ .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d
+ .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26
+ .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06
+ .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652
+ .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f
+ .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c
+ .quad 0xc08626bddc737648, 0xbe1ceec10a020e73
+ .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7
+ .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe
+ .quad 0xc08626c586da9388, 0xbe1cef7de2452430
+ .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae
+ .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d
+ .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3
+ .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d
+ .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e
+ .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64
+ .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2
+ .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d
+ .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab
+ .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153
+ .quad 0xc08626e164224880, 0xbe1ceeb431709788
+ .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5
+ .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b
+ .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93
+ .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8
+ .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2
+ .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6
+ .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef
+ .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339
+ .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1
+ .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28
+ .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f
+ .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3
+ .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6
+ .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6
+ .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3
+ .quad 0xc086270941934b10, 0xbe1ceefe32981f2c
+ .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445
+ .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c
+ .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f
+ .quad 0xc08627131a321318, 0xbe1cef04ac0fb802
+ .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd
+ .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5
+ .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570
+ .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85
+ .quad 0xc086271f58064068, 0xbe1cef092a785e3f
+ .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30
+ .quad 0xc086272438546be8, 0xbe1cf210907ded8b
+ .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99
+ .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc
+ .quad 0xc086272b833b8df0, 0xbe1cf06874992df5
+ .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899
+ .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99
+ .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe
+ .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d
+ .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8
+ .quad 0xc086273a05367688, 0xbe1cf18656c50806
+ .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a
+ .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911
+ .quad 0xc08627413c621848, 0xbe1cf188a4ea680c
+ .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80
+ .quad 0xc086274608397868, 0xbe1cf25a328c28e2
+ .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8
+ .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a
+ .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228
+ .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c
+ .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44
+ .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2
+ .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4
+ .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a
+ .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9
+ .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627
+ .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e
+ .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee
+ .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad
+ .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5
+ .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f
+ .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312
+ .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85
+ .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011
+ .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7
+ .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da
+ .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554
+ .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377
+ .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd
+ .quad 0xc086277eba506158, 0xbe1cf0b911b029f0
+ .quad 0xc08627810e6f4028, 0xbe1cefdc24719766
+ .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7
+ .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec
+ .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc
+ .quad 0xc086278a58297918, 0xbe1cf053073872bf
+ .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947
+ .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234
+ .quad 0xc086279148685aa0, 0xbe1cf162204794a8
+ .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac
+ .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3
+ .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388
+ .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5
+ .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f
+ .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a
+ .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f
+ .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f
+ .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26
+ .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a
+ .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81
+ .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d
+ .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893
+ .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0
+ .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8
+ .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00
+ .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2
+ .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4
+ .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7
+ .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3
+ .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d
+ .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e
+ .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93
+ .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a
+ .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9
+ .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f
+ .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1
+ .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4
+ .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb
+ .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b
+ .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b
+ .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96
+ .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477
+ .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2
+ .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c
+ .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875
+ .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522
+ .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57
+ .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e
+ .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548
+ .quad 0xc08627edd34756b8, 0xbe1cef36b3366305
+ .quad 0xc08627f007f0a408, 0xbe1cf18134625550
+ .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11
+ .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc
+ .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8
+ .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe
+ .quad 0xc08627fb06290f90, 0xbe1cf25188430e25
+ .quad 0xc08627fd37324070, 0xbe1ceea1713490f9
+ .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c
+ .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b
+ .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b
+ .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c
+ .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a
+ .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4
+ .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256
+ .quad 0xc086280eaa003050, 0xbe1cf010ad787fea
+ .quad 0xc0862810d5af5880, 0xbe1cee622478393d
+ .quad 0xc086281300c7e368, 0xbe1cf01c7482564f
+ .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536
+ .quad 0xc086281755366778, 0xbe1cef2edae5837d
+ .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9
+ .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8
+ .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83
+ .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4
+ .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9
+ .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2
+ .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d
+ .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1
+ .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b
+ .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02
+ .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9
+ .quad 0xc08628311f099420, 0xbe1cef247a9ec596
+ .quad 0xc086283341749490, 0xbe1cef74bbcc488a
+ .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e
+ .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810
+ .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8
+ .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065
+ .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e
+ .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234
+ .quad 0xc08628422284b168, 0xbe1cf0abf7638127
+ .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058
+ .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c
+ .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1
+ .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43
+ .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09
+ .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60
+ .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393
+ .quad 0xc0862853021d4588, 0xbe1cf176adb417f7
+ .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da
+ .quad 0xc08628573479b220, 0xbe1ceec34cf49523
+ .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb
+ .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b
+ .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d
+ .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5
+ .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792
+ .quad 0xc0862863be697458, 0xbe1cf097f890c6f8
+ .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc
+ .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7
+ .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98
+ .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7
+ .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d
+ .quad 0xc086287034d0b690, 0xbe1ceff262d0a248
+ .quad 0xc086287246aab180, 0xbe1cefa7bc194186
+ .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9
+ .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07
+ .quad 0xc086287879041490, 0xbe1cf034803c8a48
+ .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f
+ .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7
+ .quad 0xc086287ea6946958, 0xbe1cefb1e4625943
+ .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0
+ .quad 0xc0862882c24faff8, 0xbe1cee9896d016da
+ .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc
+ .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34
+ .quad 0xc0862888e7f699e0, 0xbe1cf05603549486
+ .quad 0xc086288af37750b0, 0xbe1cef50fff513d3
+ .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0
+ .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d
+ .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f
+ .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed
+ .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d
+ .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646
+ .quad 0xc0862899356c1150, 0xbe1ceec4501167e9
+ .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f
+ .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35
+ .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a
+ .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464
+ .quad 0xc08628a355104818, 0xbe1cf0435e2782b0
+ .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c
+ .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d
+ .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2
+ .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9
+ .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979
+ .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c
+ .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32
+ .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2
+ .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303
+ .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880
+ /*== Log_LA_table ==*/
+ .align 32
+ .quad 0x8000000000000000
+ .quad 0xbf5ff802a9ab10e6
+ .quad 0xbf6ff00aa2b10bc0
+ .quad 0xbf77ee11ebd82e94
+ .quad 0xbf7fe02a6b106789
+ .quad 0xbf83e7295d25a7d9
+ .quad 0xbf87dc475f810a77
+ .quad 0xbf8bcf712c74384c
+ .quad 0xbf8fc0a8b0fc03e4
+ .quad 0xbf91d7f7eb9eebe7
+ .quad 0xbf93cea44346a575
+ .quad 0xbf95c45a51b8d389
+ .quad 0xbf97b91b07d5b11b
+ .quad 0xbf99ace7551cc514
+ .quad 0xbf9b9fc027af9198
+ .quad 0xbf9d91a66c543cc4
+ .quad 0xbf9f829b0e783300
+ .quad 0xbfa0b94f7c196176
+ .quad 0xbfa1b0d98923d980
+ .quad 0xbfa2a7ec2214e873
+ .quad 0xbfa39e87b9febd60
+ .quad 0xbfa494acc34d911c
+ .quad 0xbfa58a5bafc8e4d5
+ .quad 0xbfa67f94f094bd98
+ .quad 0xbfa77458f632dcfc
+ .quad 0xbfa868a83083f6cf
+ .quad 0xbfa95c830ec8e3eb
+ .quad 0xbfaa4fe9ffa3d235
+ .quad 0xbfab42dd711971bf
+ .quad 0xbfac355dd0921f2d
+ .quad 0xbfad276b8adb0b52
+ .quad 0xbfae19070c276016
+ .quad 0xbfaf0a30c01162a6
+ .quad 0xbfaffae9119b9303
+ .quad 0xbfb075983598e471
+ .quad 0xbfb0ed839b5526fe
+ .quad 0xbfb16536eea37ae1
+ .quad 0xbfb1dcb263db1944
+ .quad 0xbfb253f62f0a1417
+ .quad 0xbfb2cb0283f5de1f
+ .quad 0xbfb341d7961bd1d1
+ .quad 0xbfb3b87598b1b6ee
+ .quad 0xbfb42edcbea646f0
+ .quad 0xbfb4a50d3aa1b040
+ .quad 0xbfb51b073f06183f
+ .quad 0xbfb590cafdf01c28
+ .quad 0xbfb60658a93750c4
+ .quad 0xbfb67bb0726ec0fc
+ .quad 0xbfb6f0d28ae56b4c
+ .quad 0xbfb765bf23a6be13
+ .quad 0xbfb7da766d7b12cd
+ .quad 0xbfb84ef898e8282a
+ .quad 0xbfb8c345d6319b21
+ .quad 0xbfb9375e55595ede
+ .quad 0xbfb9ab42462033ad
+ .quad 0xbfba1ef1d8061cd4
+ .quad 0xbfba926d3a4ad563
+ .quad 0xbfbb05b49bee43fe
+ .quad 0xbfbb78c82bb0eda1
+ .quad 0xbfbbeba818146765
+ .quad 0xbfbc5e548f5bc743
+ .quad 0xbfbcd0cdbf8c13e1
+ .quad 0xbfbd4313d66cb35d
+ .quad 0xbfbdb5270187d927
+ .quad 0xbfbe27076e2af2e6
+ .quad 0xbfbe98b549671467
+ .quad 0xbfbf0a30c01162a6
+ .quad 0xbfbf7b79fec37ddf
+ .quad 0xbfbfec9131dbeabb
+ .quad 0xbfc02ebb42bf3d4b
+ .quad 0xbfc0671512ca596e
+ .quad 0xbfc09f561ee719c3
+ .quad 0xbfc0d77e7cd08e59
+ .quad 0xbfc10f8e422539b1
+ .quad 0xbfc14785846742ac
+ .quad 0xbfc17f6458fca611
+ .quad 0xbfc1b72ad52f67a0
+ .quad 0xbfc1eed90e2dc2c3
+ .quad 0xbfc2266f190a5acb
+ .quad 0xbfc25ded0abc6ad2
+ .quad 0xbfc29552f81ff523
+ .quad 0xbfc2cca0f5f5f251
+ .quad 0xbfc303d718e47fd3
+ .quad 0xbfc33af575770e4f
+ .quad 0xbfc371fc201e8f74
+ .quad 0xbfc3a8eb2d31a376
+ .quad 0xbfc3dfc2b0ecc62a
+ .quad 0xbfc41682bf727bc0
+ .quad 0xbfc44d2b6ccb7d1e
+ .quad 0xbfc483bccce6e3dd
+ .quad 0xbfc4ba36f39a55e5
+ .quad 0xbfc4f099f4a230b2
+ .quad 0xbfc526e5e3a1b438
+ .quad 0xbfc55d1ad4232d6f
+ .quad 0xbfc59338d9982086
+ .quad 0xbfc5c940075972b9
+ .quad 0xbfc5ff3070a793d4
+ .quad 0xbfc6350a28aaa758
+ .quad 0xbfc66acd4272ad51
+ .quad 0xbfc6a079d0f7aad2
+ .quad 0xbfc6d60fe719d21d
+ .quad 0xbfc70b8f97a1aa75
+ .quad 0xbfc740f8f54037a5
+ .quad 0xbfc7764c128f2127
+ .quad 0xbfc7ab890210d909
+ .quad 0xbfc7e0afd630c274
+ .quad 0xbfc815c0a14357eb
+ .quad 0xbfc84abb75865139
+ .quad 0xbfc87fa06520c911
+ .quad 0xbfc8b46f8223625b
+ .quad 0xbfc8e928de886d41
+ .quad 0xbfc91dcc8c340bde
+ .quad 0xbfc9525a9cf456b4
+ .quad 0xbfc986d3228180ca
+ .quad 0xbfc9bb362e7dfb83
+ .quad 0xbfc9ef83d2769a34
+ .quad 0xbfca23bc1fe2b563
+ .quad 0xbfca57df28244dcd
+ .quad 0xbfca8becfc882f19
+ .quad 0xbfcabfe5ae46124c
+ .quad 0xbfcaf3c94e80bff3
+ .quad 0xbfcb2797ee46320c
+ .quad 0xbfcb5b519e8fb5a4
+ .quad 0xbfcb8ef670420c3b
+ .quad 0xbfcbc286742d8cd6
+ .quad 0xbfcbf601bb0e44e2
+ .quad 0xbfcc2968558c18c1
+ .quad 0xbfcc5cba543ae425
+ .quad 0xbfcc8ff7c79a9a22
+ .quad 0xbfccc320c0176502
+ .quad 0xbfccf6354e09c5dc
+ .quad 0xbfcd293581b6b3e7
+ .quad 0xbfcd5c216b4fbb91
+ .quad 0xbfcd8ef91af31d5e
+ .quad 0xbfcdc1bca0abec7d
+ .quad 0xbfcdf46c0c722d2f
+ .quad 0xbfce27076e2af2e6
+ .quad 0xbfce598ed5a87e2f
+ .quad 0xbfce8c0252aa5a60
+ .quad 0xbfcebe61f4dd7b0b
+ .quad 0xbfcef0adcbdc5936
+ .quad 0xbfcf22e5e72f105d
+ .quad 0xbfcf550a564b7b37
+ .quad 0xbfcf871b28955045
+ .quad 0xbfcfb9186d5e3e2b
+ .quad 0xbfcfeb0233e607cc
+ .quad 0xbfd00e6c45ad501d
+ .quad 0xbfd0274dc16c232f
+ .quad 0xbfd0402594b4d041
+ .quad 0xbfd058f3c703ebc6
+ .quad 0xbfd071b85fcd590d
+ .quad 0xbfd08a73667c57af
+ .quad 0xbfd0a324e27390e3
+ .quad 0xbfd0bbccdb0d24bd
+ .quad 0xbfd0d46b579ab74b
+ .quad 0xbfd0ed005f657da4
+ .quad 0xbfd1058bf9ae4ad5
+ .quad 0xbfd11e0e2dad9cb7
+ .quad 0xbfd136870293a8b0
+ .quad 0xbfd14ef67f88685a
+ .quad 0xbfd1675cababa60e
+ .quad 0xbfd17fb98e15095d
+ .quad 0xbfd1980d2dd4236f
+ .quad 0xbfd1b05791f07b49
+ .quad 0xbfd1c898c16999fb
+ .quad 0xbfd1e0d0c33716be
+ .quad 0xbfd1f8ff9e48a2f3
+ .quad 0xbfd211255986160c
+ .quad 0xbfd22941fbcf7966
+ .quad 0xbfd241558bfd1404
+ .quad 0xbfd2596010df763a
+ .quad 0xbfd27161913f853d
+ .quad 0xbfd2895a13de86a3
+ .quad 0xbfd2a1499f762bc9
+ .quad 0xbfd2b9303ab89d25
+ .quad 0xbfd2d10dec508583
+ .quad 0xbfd2e8e2bae11d31
+ .quad 0xbfd300aead06350c
+ .quad 0xbfd31871c9544185
+ .quad 0xbfd3302c16586588
+ .quad 0xbfd347dd9a987d55
+ .quad 0xbfd35f865c93293e
+ .quad 0xbfd3772662bfd85b
+ .quad 0xbfd38ebdb38ed321
+ .quad 0xbfd3a64c556945ea
+ .quad 0xbfd3bdd24eb14b6a
+ .quad 0xbfd3d54fa5c1f710
+ .quad 0xbfd3ecc460ef5f50
+ .quad 0xbfd404308686a7e4
+ .quad 0xbfd41b941cce0bee
+ .quad 0xbfd432ef2a04e814
+ .quad 0xbfd44a41b463c47c
+ .quad 0xbfd4618bc21c5ec2
+ .quad 0xbfd478cd5959b3d9
+ .quad 0xbfd49006804009d1
+ .quad 0xbfd4a7373cecf997
+ .quad 0xbfd4be5f957778a1
+ .quad 0xbfd4d57f8fefe27f
+ .quad 0xbfd4ec973260026a
+ .quad 0xbfd503a682cb1cb3
+ .quad 0xbfd51aad872df82d
+ .quad 0xbfd531ac457ee77e
+ .quad 0xbfd548a2c3add263
+ .quad 0xbfd55f9107a43ee2
+ .quad 0xbfd5767717455a6c
+ .quad 0xbfd58d54f86e02f2
+ .quad 0xbfd5a42ab0f4cfe2
+ .quad 0xbfd5baf846aa1b19
+ .quad 0xbfd5d1bdbf5809ca
+ .quad 0xbfd5e87b20c2954a
+ .quad 0xbfd5ff3070a793d4
+ .quad 0xbfd615ddb4bec13c
+ .quad 0xbfd62c82f2b9c795
+ .quad 0x3fd61965cdb02c1f
+ .quad 0x3fd602d08af091ec
+ .quad 0x3fd5ec433d5c35ae
+ .quad 0x3fd5d5bddf595f30
+ .quad 0x3fd5bf406b543db2
+ .quad 0x3fd5a8cadbbedfa1
+ .quad 0x3fd5925d2b112a59
+ .quad 0x3fd57bf753c8d1fb
+ .quad 0x3fd565995069514c
+ .quad 0x3fd54f431b7be1a9
+ .quad 0x3fd538f4af8f72fe
+ .quad 0x3fd522ae0738a3d8
+ .quad 0x3fd50c6f1d11b97c
+ .quad 0x3fd4f637ebba9810
+ .quad 0x3fd4e0086dd8baca
+ .quad 0x3fd4c9e09e172c3c
+ .quad 0x3fd4b3c077267e9a
+ .quad 0x3fd49da7f3bcc41f
+ .quad 0x3fd487970e958770
+ .quad 0x3fd4718dc271c41b
+ .quad 0x3fd45b8c0a17df13
+ .quad 0x3fd44591e0539f49
+ .quad 0x3fd42f9f3ff62642
+ .quad 0x3fd419b423d5e8c7
+ .quad 0x3fd403d086cea79c
+ .quad 0x3fd3edf463c1683e
+ .quad 0x3fd3d81fb5946dba
+ .quad 0x3fd3c25277333184
+ .quad 0x3fd3ac8ca38e5c5f
+ .quad 0x3fd396ce359bbf54
+ .quad 0x3fd3811728564cb2
+ .quad 0x3fd36b6776be1117
+ .quad 0x3fd355bf1bd82c8b
+ .quad 0x3fd3401e12aecba1
+ .quad 0x3fd32a84565120a8
+ .quad 0x3fd314f1e1d35ce4
+ .quad 0x3fd2ff66b04ea9d4
+ .quad 0x3fd2e9e2bce12286
+ .quad 0x3fd2d46602adccee
+ .quad 0x3fd2bef07cdc9354
+ .quad 0x3fd2a982269a3dbf
+ .quad 0x3fd2941afb186b7c
+ .quad 0x3fd27ebaf58d8c9d
+ .quad 0x3fd269621134db92
+ .quad 0x3fd25410494e56c7
+ .quad 0x3fd23ec5991eba49
+ .quad 0x3fd22981fbef797b
+ .quad 0x3fd214456d0eb8d4
+ .quad 0x3fd1ff0fe7cf47a7
+ .quad 0x3fd1e9e1678899f4
+ .quad 0x3fd1d4b9e796c245
+ .quad 0x3fd1bf99635a6b95
+ .quad 0x3fd1aa7fd638d33f
+ .quad 0x3fd1956d3b9bc2fa
+ .quad 0x3fd180618ef18adf
+ .quad 0x3fd16b5ccbacfb73
+ .quad 0x3fd1565eed455fc3
+ .quad 0x3fd14167ef367783
+ .quad 0x3fd12c77cd00713b
+ .quad 0x3fd1178e8227e47c
+ .quad 0x3fd102ac0a35cc1c
+ .quad 0x3fd0edd060b78081
+ .quad 0x3fd0d8fb813eb1ef
+ .quad 0x3fd0c42d676162e3
+ .quad 0x3fd0af660eb9e279
+ .quad 0x3fd09aa572e6c6d4
+ .quad 0x3fd085eb8f8ae797
+ .quad 0x3fd07138604d5862
+ .quad 0x3fd05c8be0d9635a
+ .quad 0x3fd047e60cde83b8
+ .quad 0x3fd03346e0106062
+ .quad 0x3fd01eae5626c691
+ .quad 0x3fd00a1c6adda473
+ .quad 0x3fcfeb2233ea07cd
+ .quad 0x3fcfc218be620a5e
+ .quad 0x3fcf991c6cb3b379
+ .quad 0x3fcf702d36777df0
+ .quad 0x3fcf474b134df229
+ .quad 0x3fcf1e75fadf9bde
+ .quad 0x3fcef5ade4dcffe6
+ .quad 0x3fceccf2c8fe920a
+ .quad 0x3fcea4449f04aaf5
+ .quad 0x3fce7ba35eb77e2a
+ .quad 0x3fce530effe71012
+ .quad 0x3fce2a877a6b2c12
+ .quad 0x3fce020cc6235ab5
+ .quad 0x3fcdd99edaf6d7e9
+ .quad 0x3fcdb13db0d48940
+ .quad 0x3fcd88e93fb2f450
+ .quad 0x3fcd60a17f903515
+ .quad 0x3fcd38666871f465
+ .quad 0x3fcd1037f2655e7b
+ .quad 0x3fcce816157f1988
+ .quad 0x3fccc000c9db3c52
+ .quad 0x3fcc97f8079d44ec
+ .quad 0x3fcc6ffbc6f00f71
+ .quad 0x3fcc480c0005ccd1
+ .quad 0x3fcc2028ab17f9b4
+ .quad 0x3fcbf851c067555f
+ .quad 0x3fcbd087383bd8ad
+ .quad 0x3fcba8c90ae4ad19
+ .quad 0x3fcb811730b823d2
+ .quad 0x3fcb5971a213acdb
+ .quad 0x3fcb31d8575bce3d
+ .quad 0x3fcb0a4b48fc1b46
+ .quad 0x3fcae2ca6f672bd4
+ .quad 0x3fcabb55c31693ad
+ .quad 0x3fca93ed3c8ad9e3
+ .quad 0x3fca6c90d44b704e
+ .quad 0x3fca454082e6ab05
+ .quad 0x3fca1dfc40f1b7f1
+ .quad 0x3fc9f6c407089664
+ .quad 0x3fc9cf97cdce0ec3
+ .quad 0x3fc9a8778debaa38
+ .quad 0x3fc981634011aa75
+ .quad 0x3fc95a5adcf7017f
+ .quad 0x3fc9335e5d594989
+ .quad 0x3fc90c6db9fcbcd9
+ .quad 0x3fc8e588ebac2dbf
+ .quad 0x3fc8beafeb38fe8c
+ .quad 0x3fc897e2b17b19a5
+ .quad 0x3fc871213750e994
+ .quad 0x3fc84a6b759f512f
+ .quad 0x3fc823c16551a3c2
+ .quad 0x3fc7fd22ff599d4f
+ .quad 0x3fc7d6903caf5ad0
+ .quad 0x3fc7b0091651528c
+ .quad 0x3fc7898d85444c73
+ .quad 0x3fc7631d82935a86
+ .quad 0x3fc73cb9074fd14d
+ .quad 0x3fc716600c914054
+ .quad 0x3fc6f0128b756abc
+ .quad 0x3fc6c9d07d203fc7
+ .quad 0x3fc6a399dabbd383
+ .quad 0x3fc67d6e9d785771
+ .quad 0x3fc6574ebe8c133a
+ .quad 0x3fc6313a37335d76
+ .quad 0x3fc60b3100b09476
+ .quad 0x3fc5e533144c1719
+ .quad 0x3fc5bf406b543db2
+ .quad 0x3fc59958ff1d52f1
+ .quad 0x3fc5737cc9018cdd
+ .quad 0x3fc54dabc26105d2
+ .quad 0x3fc527e5e4a1b58d
+ .quad 0x3fc5022b292f6a45
+ .quad 0x3fc4dc7b897bc1c8
+ .quad 0x3fc4b6d6fefe22a4
+ .quad 0x3fc4913d8333b561
+ .quad 0x3fc46baf0f9f5db7
+ .quad 0x3fc4462b9dc9b3dc
+ .quad 0x3fc420b32740fdd4
+ .quad 0x3fc3fb45a59928cc
+ .quad 0x3fc3d5e3126bc27f
+ .quad 0x3fc3b08b6757f2a9
+ .quad 0x3fc38b3e9e027479
+ .quad 0x3fc365fcb0159016
+ .quad 0x3fc340c59741142e
+ .quad 0x3fc31b994d3a4f85
+ .quad 0x3fc2f677cbbc0a96
+ .quad 0x3fc2d1610c86813a
+ .quad 0x3fc2ac55095f5c59
+ .quad 0x3fc28753bc11aba5
+ .quad 0x3fc2625d1e6ddf57
+ .quad 0x3fc23d712a49c202
+ .quad 0x3fc2188fd9807263
+ .quad 0x3fc1f3b925f25d41
+ .quad 0x3fc1ceed09853752
+ .quad 0x3fc1aa2b7e23f72a
+ .quad 0x3fc185747dbecf34
+ .quad 0x3fc160c8024b27b1
+ .quad 0x3fc13c2605c398c3
+ .quad 0x3fc1178e8227e47c
+ .quad 0x3fc0f301717cf0fb
+ .quad 0x3fc0ce7ecdccc28d
+ .quad 0x3fc0aa06912675d5
+ .quad 0x3fc08598b59e3a07
+ .quad 0x3fc06135354d4b18
+ .quad 0x3fc03cdc0a51ec0d
+ .quad 0x3fc0188d2ecf6140
+ .quad 0x3fbfe89139dbd566
+ .quad 0x3fbfa01c9db57ce2
+ .quad 0x3fbf57bc7d9005db
+ .quad 0x3fbf0f70cdd992e3
+ .quad 0x3fbec739830a1120
+ .quad 0x3fbe7f1691a32d3e
+ .quad 0x3fbe3707ee30487b
+ .quad 0x3fbdef0d8d466db9
+ .quad 0x3fbda727638446a2
+ .quad 0x3fbd5f55659210e2
+ .quad 0x3fbd179788219364
+ .quad 0x3fbccfedbfee13a8
+ .quad 0x3fbc885801bc4b23
+ .quad 0x3fbc40d6425a5cb1
+ .quad 0x3fbbf968769fca11
+ .quad 0x3fbbb20e936d6974
+ .quad 0x3fbb6ac88dad5b1c
+ .quad 0x3fbb23965a52ff00
+ .quad 0x3fbadc77ee5aea8c
+ .quad 0x3fba956d3ecade63
+ .quad 0x3fba4e7640b1bc38
+ .quad 0x3fba0792e9277cac
+ .quad 0x3fb9c0c32d4d2548
+ .quad 0x3fb97a07024cbe74
+ .quad 0x3fb9335e5d594989
+ .quad 0x3fb8ecc933aeb6e8
+ .quad 0x3fb8a6477a91dc29
+ .quad 0x3fb85fd927506a48
+ .quad 0x3fb8197e2f40e3f0
+ .quad 0x3fb7d33687c293c9
+ .quad 0x3fb78d02263d82d3
+ .quad 0x3fb746e100226ed9
+ .quad 0x3fb700d30aeac0e1
+ .quad 0x3fb6bad83c1883b6
+ .quad 0x3fb674f089365a7a
+ .quad 0x3fb62f1be7d77743
+ .quad 0x3fb5e95a4d9791cb
+ .quad 0x3fb5a3abb01ade25
+ .quad 0x3fb55e10050e0384
+ .quad 0x3fb518874226130a
+ .quad 0x3fb4d3115d207eac
+ .quad 0x3fb48dae4bc31018
+ .quad 0x3fb4485e03dbdfad
+ .quad 0x3fb403207b414b7f
+ .quad 0x3fb3bdf5a7d1ee64
+ .quad 0x3fb378dd7f749714
+ .quad 0x3fb333d7f8183f4b
+ .quad 0x3fb2eee507b40301
+ .quad 0x3fb2aa04a44717a5
+ .quad 0x3fb26536c3d8c369
+ .quad 0x3fb2207b5c78549e
+ .quad 0x3fb1dbd2643d190b
+ .quad 0x3fb1973bd1465567
+ .quad 0x3fb152b799bb3cc9
+ .quad 0x3fb10e45b3cae831
+ .quad 0x3fb0c9e615ac4e17
+ .quad 0x3fb08598b59e3a07
+ .quad 0x3fb0415d89e74444
+ .quad 0x3faffa6911ab9301
+ .quad 0x3faf723b517fc523
+ .quad 0x3faeea31c006b87c
+ .quad 0x3fae624c4a0b5e1b
+ .quad 0x3fadda8adc67ee4e
+ .quad 0x3fad52ed6405d86f
+ .quad 0x3faccb73cdddb2cc
+ .quad 0x3fac441e06f72a9e
+ .quad 0x3fabbcebfc68f420
+ .quad 0x3fab35dd9b58baad
+ .quad 0x3faaaef2d0fb10fc
+ .quad 0x3faa282b8a936171
+ .quad 0x3fa9a187b573de7c
+ .quad 0x3fa91b073efd7314
+ .quad 0x3fa894aa149fb343
+ .quad 0x3fa80e7023d8ccc4
+ .quad 0x3fa788595a3577ba
+ .quad 0x3fa70265a550e777
+ .quad 0x3fa67c94f2d4bb58
+ .quad 0x3fa5f6e73078efb8
+ .quad 0x3fa5715c4c03ceef
+ .quad 0x3fa4ebf43349e26f
+ .quad 0x3fa466aed42de3ea
+ .quad 0x3fa3e18c1ca0ae92
+ .quad 0x3fa35c8bfaa1306b
+ .quad 0x3fa2d7ae5c3c5bae
+ .quad 0x3fa252f32f8d183f
+ .quad 0x3fa1ce5a62bc353a
+ .quad 0x3fa149e3e4005a8d
+ .quad 0x3fa0c58fa19dfaaa
+ .quad 0x3fa0415d89e74444
+ .quad 0x3f9f7a9b16782856
+ .quad 0x3f9e72bf2813ce51
+ .quad 0x3f9d6b2725979802
+ .quad 0x3f9c63d2ec14aaf2
+ .quad 0x3f9b5cc258b718e6
+ .quad 0x3f9a55f548c5c43f
+ .quad 0x3f994f6b99a24475
+ .quad 0x3f98492528c8cabf
+ .quad 0x3f974321d3d006d3
+ .quad 0x3f963d6178690bd6
+ .quad 0x3f9537e3f45f3565
+ .quad 0x3f9432a925980cc1
+ .quad 0x3f932db0ea132e22
+ .quad 0x3f9228fb1fea2e28
+ .quad 0x3f912487a5507f70
+ .quad 0x3f90205658935847
+ .quad 0x3f8e38ce3033310c
+ .quad 0x3f8c317384c75f06
+ .quad 0x3f8a2a9c6c170462
+ .quad 0x3f882448a388a2aa
+ .quad 0x3f861e77e8b53fc6
+ .quad 0x3f841929f96832f0
+ .quad 0x3f82145e939ef1e9
+ .quad 0x3f8010157588de71
+ .quad 0x3f7c189cbb0e27fb
+ .quad 0x3f78121214586b54
+ .quad 0x3f740c8a747878e2
+ .quad 0x3f70080559588b35
+ .quad 0x3f680904828985c0
+ .quad 0x3f60040155d5889e
+ .quad 0x3f50020055655889
+ .quad 0x0000000000000000
+ /*== poly_coeff[4] ==*/
+ .align 32
+ .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */
+ .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */
+ .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */
+ .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */
+ /*== ExpMask ==*/
+ .align 32
+ .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff
+ /*== Two10 ==*/
+ .align 32
+ .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000
+ /*== MinLog1p = -1+2^(-53) ==*/
+ .align 32
+ .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff
+ /*== MaxLog1p ==*/
+ .align 32
+ .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000
+ /*== One ==*/
+ .align 32
+ .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
+ /*== SgnMask ==*/
+ .align 32
+ .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
+ /*== XThreshold ==*/
+ .align 32
+ .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000
+ /*== XhMask ==*/
+ .align 32
+ .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00
+ /*== Threshold ==*/
+ .align 32
+ .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000
+ /*== Bias ==*/
+ .align 32
+ .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000
+ /*== Bias1 ==*/
+ .align 32
+ .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000
+ /*== ExpMask ==*/
+ .align 32
+ .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000
+ /*== ExpMask2 ==*/
+ .align 32
+ .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000
+ /*== L2L ==*/
+ .align 32
+ .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF
+ /*== dHalf ==*/
+ .align 32
+ .quad 0x3FE0000000000000, 0x3FE0000000000000, 0x3FE0000000000000, 0x3FE0000000000000
+ /*== dSign ==*/
+ .align 32
+ .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000
+ /*== dTopMask12 ==*/
+ .align 32
+ .quad 0xFFFFFE0000000000, 0xFFFFFE0000000000, 0xFFFFFE0000000000, 0xFFFFFE0000000000
+ /*== dTopMask41 ==*/
+ .align 32
+ .quad 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000, 0xFFFFFFFFFFFFF000
+ /*== dTinyRange ==*/
+ .align 32
+ .quad 0x0350000000000000, 0x0350000000000000, 0x0350000000000000, 0x0350000000000000
+ .align 32
+ .type __svml_datanh_data_internal,@object
+ .size __svml_datanh_data_internal,.-__svml_datanh_data_internal
new file mode 100644
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized atanh, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8v_atanh _ZGVeN8v_atanh_avx2_wrapper
+#include "../svml_d_atanh8_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized atanh, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_atanh
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_atanh, __GI__ZGVeN8v_atanh, __redirect__ZGVeN8v_atanh)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,401 @@
+/* Function atanh vectorized with AVX-512.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ * using small lookup table that map to AVX-512 permute instructions
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_datanh_data_internal_avx512
+ */
+#define Log_tbl_H 0
+#define Log_tbl_L 128
+#define One 256
+#define AbsMask 320
+#define AddB5 384
+#define RcpBitMask 448
+#define poly_coeff8 512
+#define poly_coeff7 576
+#define poly_coeff6 640
+#define poly_coeff5 704
+#define poly_coeff4 768
+#define poly_coeff3 832
+#define poly_coeff2 896
+#define poly_coeff1 960
+#define poly_coeff0 1024
+#define Half 1088
+#define L2H 1152
+#define L2L 1216
+
+#include <sysdep.h>
+
+ .text
+ .section .text.evex512,"ax",@progbits
+ENTRY(_ZGVeN8v_atanh_skx)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $192, %rsp
+ vmovups One+__svml_datanh_data_internal_avx512(%rip), %zmm15
+
+/* round reciprocals to 1+4b mantissas */
+ vmovups AddB5+__svml_datanh_data_internal_avx512(%rip), %zmm6
+ vmovups RcpBitMask+__svml_datanh_data_internal_avx512(%rip), %zmm9
+ vmovaps %zmm0, %zmm2
+ vandpd AbsMask+__svml_datanh_data_internal_avx512(%rip), %zmm2, %zmm13
+
+/* 1+y */
+ vaddpd {rn-sae}, %zmm15, %zmm13, %zmm0
+
+/* 1-y */
+ vsubpd {rn-sae}, %zmm13, %zmm15, %zmm4
+ vxorpd %zmm13, %zmm2, %zmm1
+
+/* Yp_high */
+ vsubpd {rn-sae}, %zmm15, %zmm0, %zmm7
+
+/* -Ym_high */
+ vsubpd {rn-sae}, %zmm15, %zmm4, %zmm12
+
+/* RcpP ~ 1/Yp */
+ vrcp14pd %zmm0, %zmm3
+
+/* RcpM ~ 1/Ym */
+ vrcp14pd %zmm4, %zmm5
+
+/* input outside (-1, 1) ? */
+ vcmppd $21, {sae}, %zmm15, %zmm13, %k0
+ vpaddq %zmm6, %zmm3, %zmm11
+ vpaddq %zmm6, %zmm5, %zmm10
+
+/* Yp_low */
+ vsubpd {rn-sae}, %zmm7, %zmm13, %zmm8
+ vandpd %zmm9, %zmm11, %zmm14
+ vandpd %zmm9, %zmm10, %zmm3
+
+/* Ym_low */
+ vaddpd {rn-sae}, %zmm12, %zmm13, %zmm12
+
+/* Reduced argument: Rp = (RcpP*Yp - 1)+RcpP*Yp_low */
+ vfmsub213pd {rn-sae}, %zmm15, %zmm14, %zmm0
+
+/* Reduced argument: Rm = (RcpM*Ym - 1)+RcpM*Ym_low */
+ vfmsub231pd {rn-sae}, %zmm3, %zmm4, %zmm15
+
+/* exponents */
+ vgetexppd {sae}, %zmm14, %zmm5
+ vgetexppd {sae}, %zmm3, %zmm4
+
+/* Table lookups */
+ vmovups __svml_datanh_data_internal_avx512(%rip), %zmm9
+ vmovups Log_tbl_H+64+__svml_datanh_data_internal_avx512(%rip), %zmm13
+ vmovups Log_tbl_L+__svml_datanh_data_internal_avx512(%rip), %zmm7
+ vfmadd231pd {rn-sae}, %zmm14, %zmm8, %zmm0
+ vfnmadd231pd {rn-sae}, %zmm3, %zmm12, %zmm15
+
+/* Prepare table index */
+ vpsrlq $48, %zmm14, %zmm11
+ vpsrlq $48, %zmm3, %zmm8
+ vmovups Log_tbl_L+64+__svml_datanh_data_internal_avx512(%rip), %zmm14
+
+/* polynomials */
+ vmovups poly_coeff8+__svml_datanh_data_internal_avx512(%rip), %zmm3
+
+/* Km-Kp */
+ vsubpd {rn-sae}, %zmm5, %zmm4, %zmm5
+ vmovups poly_coeff7+__svml_datanh_data_internal_avx512(%rip), %zmm4
+ kmovw %k0, %edx
+ vmovaps %zmm11, %zmm10
+ vmovaps %zmm4, %zmm6
+ vpermi2pd %zmm13, %zmm9, %zmm10
+ vpermi2pd %zmm14, %zmm7, %zmm11
+ vpermt2pd %zmm13, %zmm8, %zmm9
+ vpermt2pd %zmm14, %zmm8, %zmm7
+ vmovups poly_coeff6+__svml_datanh_data_internal_avx512(%rip), %zmm8
+ vfmadd231pd {rn-sae}, %zmm0, %zmm3, %zmm6
+ vfmadd231pd {rn-sae}, %zmm15, %zmm3, %zmm4
+ vmovups poly_coeff3+__svml_datanh_data_internal_avx512(%rip), %zmm13
+ vmovups poly_coeff2+__svml_datanh_data_internal_avx512(%rip), %zmm14
+ vfmadd213pd {rn-sae}, %zmm8, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm8, %zmm15, %zmm4
+ vmovups poly_coeff0+__svml_datanh_data_internal_avx512(%rip), %zmm8
+ vsubpd {rn-sae}, %zmm11, %zmm7, %zmm12
+
+/* table values */
+ vsubpd {rn-sae}, %zmm10, %zmm9, %zmm3
+ vmovups poly_coeff5+__svml_datanh_data_internal_avx512(%rip), %zmm7
+ vmovups poly_coeff4+__svml_datanh_data_internal_avx512(%rip), %zmm9
+
+/* K*L2H + Th */
+ vmovups L2H+__svml_datanh_data_internal_avx512(%rip), %zmm10
+
+/* K*L2L + Tl */
+ vmovups L2L+__svml_datanh_data_internal_avx512(%rip), %zmm11
+ vfmadd213pd {rn-sae}, %zmm7, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm7, %zmm15, %zmm4
+ vmovups poly_coeff1+__svml_datanh_data_internal_avx512(%rip), %zmm7
+ vfmadd231pd {rn-sae}, %zmm5, %zmm10, %zmm3
+ vfmadd213pd {rn-sae}, %zmm12, %zmm11, %zmm5
+ vfmadd213pd {rn-sae}, %zmm9, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm9, %zmm15, %zmm4
+ vfmadd213pd {rn-sae}, %zmm13, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm13, %zmm15, %zmm4
+ vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm14, %zmm15, %zmm4
+ vfmadd213pd {rn-sae}, %zmm7, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm7, %zmm15, %zmm4
+ vfmadd213pd {rn-sae}, %zmm8, %zmm0, %zmm6
+ vfmadd213pd {rn-sae}, %zmm8, %zmm15, %zmm4
+
+/* (K*L2L + Tl) + Rp*PolyP */
+ vfmadd213pd {rn-sae}, %zmm5, %zmm0, %zmm6
+ vorpd Half+__svml_datanh_data_internal_avx512(%rip), %zmm1, %zmm0
+
+/* (K*L2L + Tl) + Rp*PolyP -Rm*PolyM */
+ vfnmadd213pd {rn-sae}, %zmm6, %zmm15, %zmm4
+ vaddpd {rn-sae}, %zmm4, %zmm3, %zmm1
+ vmulpd {rn-sae}, %zmm0, %zmm1, %zmm0
+ testl %edx, %edx
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 edx zmm0 zmm2
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ vmovups %zmm2, 64(%rsp)
+ vmovups %zmm0, 128(%rsp)
+ # LOE rbx r12 r13 r14 r15 edx zmm0
+
+ xorl %eax, %eax
+ # LOE rbx r12 r13 r14 r15 eax edx
+
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $8, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovups 128(%rsp), %zmm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 zmm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movsd 64(%rsp,%r14,8), %xmm0
+ call atanh@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
+
+ movsd %xmm0, 128(%rsp,%r14,8)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
+END(_ZGVeN8v_atanh_skx)
+
+ .section .rodata, "a"
+ .align 64
+
+#ifdef __svml_datanh_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(64)) VUINT32 Log_tbl_H[16][2];
+ __declspec(align(64)) VUINT32 Log_tbl_L[16][2];
+ __declspec(align(64)) VUINT32 One[8][2];
+ __declspec(align(64)) VUINT32 AbsMask[8][2];
+ __declspec(align(64)) VUINT32 AddB5[8][2];
+ __declspec(align(64)) VUINT32 RcpBitMask[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff8[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff7[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff6[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff5[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff4[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff3[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff2[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff1[8][2];
+ __declspec(align(64)) VUINT32 poly_coeff0[8][2];
+ __declspec(align(64)) VUINT32 Half[8][2];
+ __declspec(align(64)) VUINT32 L2H[8][2];
+ __declspec(align(64)) VUINT32 L2L[8][2];
+ } __svml_datanh_data_internal_avx512;
+#endif
+__svml_datanh_data_internal_avx512:
+ /*== Log_tbl_H ==*/
+ .quad 0x0000000000000000
+ .quad 0x3faf0a30c0100000
+ .quad 0x3fbe27076e2a0000
+ .quad 0x3fc5ff3070a80000
+ .quad 0x3fcc8ff7c79b0000
+ .quad 0x3fd1675cabab8000
+ .quad 0x3fd4618bc21c8000
+ .quad 0x3fd739d7f6bc0000
+ .quad 0x3fd9f323ecbf8000
+ .quad 0x3fdc8ff7c79a8000
+ .quad 0x3fdf128f5faf0000
+ .quad 0x3fe0be72e4254000
+ .quad 0x3fe1e85f5e704000
+ .quad 0x3fe307d7334f0000
+ .quad 0x3fe41d8fe8468000
+ .quad 0x3fe52a2d265bc000
+ /*== Log_tbl_L ==*/
+ .align 64
+ .quad 0x0000000000000000
+ .quad 0x3d662a6617cc9717
+ .quad 0x3d6e5cbd3d50fffc
+ .quad 0xbd6b0b0de3077d7e
+ .quad 0xbd697794f689f843
+ .quad 0x3d630701ce63eab9
+ .quad 0xbd609ec17a426426
+ .quad 0xbd67fcb18ed9d603
+ .quad 0x3d584bf2b68d766f
+ .quad 0x3d5a21ac25d81ef3
+ .quad 0x3d3bb2cd720ec44c
+ .quad 0xbd657d49676844cc
+ .quad 0x3d1a07bd8b34be7c
+ .quad 0x3d60be1fb590a1f5
+ .quad 0xbd5aa33736867a17
+ .quad 0x3d46abb9df22bc57
+ /*== One ==*/
+ .align 64
+ .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
+ /*== AbsMask ==*/
+ .align 64
+ .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
+ /*== AddB5 ==*/
+ .align 64
+ .quad 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000
+ /*== RcpBitMask ==*/
+ .align 64
+ .quad 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000
+ /*== poly_coeff8 ==*/
+ .align 64
+ .quad 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142, 0x3fbc81dd40d38142
+ /*== poly_coeff7 ==*/
+ .align 64
+ .quad 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70, 0xbfc0073cb82e8b70
+ /*== poly_coeff6 ==*/
+ .align 64
+ .quad 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8, 0x3fc2492298ffdae8
+ /*== poly_coeff5 ==*/
+ .align 64
+ .quad 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5, 0xbfc55553f871e5c5
+ /*== poly_coeff4 ==*/
+ .align 64
+ .quad 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a, 0x3fc9999999cd394a
+ /*== poly_coeff3 ==*/
+ .align 64
+ .quad 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01, 0xbfd00000000c2a01
+ /*== poly_coeff2 ==*/
+ .align 64
+ .quad 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462, 0x3fd5555555555462
+ /*== poly_coeff1 ==*/
+ .align 64
+ .quad 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5, 0xbfdfffffffffffc5
+ /*== poly_coeff0 ==*/
+ .align 64
+ .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
+ /*== Half ==*/
+ .align 64
+ .quad 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000
+ /*== L2H = log(2)_high ==*/
+ .align 64
+ .quad 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000
+ /*== L2L = log(2)_low ==*/
+ .align 64
+ .quad 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000
+ .align 64
+ .type __svml_datanh_data_internal_avx512,@object
+ .size __svml_datanh_data_internal_avx512,.-__svml_datanh_data_internal_avx512
new file mode 100644
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized atanhf.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16v_atanhf _ZGVeN16v_atanhf_avx2_wrapper
+#include "../svml_s_atanhf16_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized atanhf, vector length is 16.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_atanhf
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_atanhf, __GI__ZGVeN16v_atanhf,
+ __redirect__ZGVeN16v_atanhf)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,393 @@
+/* Function atanhf vectorized with AVX-512.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ * using small lookup table that map to AVX-512 permute instructions
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_satanh_data_internal_avx512
+ */
+#define Log_tbl_H 0
+#define Log_tbl_L 128
+#define One 256
+#define AbsMask 320
+#define AddB5 384
+#define RcpBitMask 448
+#define poly_coeff3 512
+#define poly_coeff2 576
+#define poly_coeff1 640
+#define poly_coeff0 704
+#define Half 768
+#define L2H 832
+#define L2L 896
+
+#include <sysdep.h>
+
+ .text
+ .section .text.exex512,"ax",@progbits
+ENTRY(_ZGVeN16v_atanhf_skx)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $192, %rsp
+ vmovups One+__svml_satanh_data_internal_avx512(%rip), %zmm4
+
+/* round reciprocals to 1+5b mantissas */
+ vmovups AddB5+__svml_satanh_data_internal_avx512(%rip), %zmm14
+ vmovups RcpBitMask+__svml_satanh_data_internal_avx512(%rip), %zmm1
+ vmovaps %zmm0, %zmm11
+ vandps AbsMask+__svml_satanh_data_internal_avx512(%rip), %zmm11, %zmm6
+
+/* 1+y */
+ vaddps {rn-sae}, %zmm4, %zmm6, %zmm9
+
+/* 1-y */
+ vsubps {rn-sae}, %zmm6, %zmm4, %zmm8
+ vxorps %zmm6, %zmm11, %zmm10
+
+/* Yp_high */
+ vsubps {rn-sae}, %zmm4, %zmm9, %zmm2
+
+/* -Ym_high */
+ vsubps {rn-sae}, %zmm4, %zmm8, %zmm5
+
+/* RcpP ~ 1/Yp */
+ vrcp14ps %zmm9, %zmm12
+
+/* RcpM ~ 1/Ym */
+ vrcp14ps %zmm8, %zmm13
+
+/* input outside (-1, 1) ? */
+ vcmpps $21, {sae}, %zmm4, %zmm6, %k0
+ vpaddd %zmm14, %zmm12, %zmm15
+ vpaddd %zmm14, %zmm13, %zmm0
+
+/* Yp_low */
+ vsubps {rn-sae}, %zmm2, %zmm6, %zmm3
+ vandps %zmm1, %zmm15, %zmm7
+ vandps %zmm1, %zmm0, %zmm12
+
+/* Ym_low */
+ vaddps {rn-sae}, %zmm5, %zmm6, %zmm5
+
+/* Reduced argument: Rp = (RcpP*Yp - 1)+RcpP*Yp_low */
+ vfmsub213ps {rn-sae}, %zmm4, %zmm7, %zmm9
+
+/* Reduced argument: Rm = (RcpM*Ym - 1)+RcpM*Ym_low */
+ vfmsub231ps {rn-sae}, %zmm12, %zmm8, %zmm4
+ vmovups Log_tbl_L+__svml_satanh_data_internal_avx512(%rip), %zmm8
+ vmovups Log_tbl_L+64+__svml_satanh_data_internal_avx512(%rip), %zmm13
+
+/* exponents */
+ vgetexpps {sae}, %zmm7, %zmm15
+ vfmadd231ps {rn-sae}, %zmm7, %zmm3, %zmm9
+
+/* Table lookups */
+ vmovups __svml_satanh_data_internal_avx512(%rip), %zmm6
+ vgetexpps {sae}, %zmm12, %zmm14
+ vfnmadd231ps {rn-sae}, %zmm12, %zmm5, %zmm4
+
+/* Prepare table index */
+ vpsrld $18, %zmm7, %zmm3
+ vpsrld $18, %zmm12, %zmm2
+ vmovups Log_tbl_H+64+__svml_satanh_data_internal_avx512(%rip), %zmm7
+ vmovups poly_coeff1+__svml_satanh_data_internal_avx512(%rip), %zmm12
+
+/* Km-Kp */
+ vsubps {rn-sae}, %zmm15, %zmm14, %zmm1
+ kmovw %k0, %edx
+ vmovaps %zmm3, %zmm0
+ vpermi2ps %zmm13, %zmm8, %zmm3
+ vpermt2ps %zmm13, %zmm2, %zmm8
+ vpermi2ps %zmm7, %zmm6, %zmm0
+ vpermt2ps %zmm7, %zmm2, %zmm6
+ vsubps {rn-sae}, %zmm3, %zmm8, %zmm5
+
+/* K*L2H + Th */
+ vmovups L2H+__svml_satanh_data_internal_avx512(%rip), %zmm2
+
+/* K*L2L + Tl */
+ vmovups L2L+__svml_satanh_data_internal_avx512(%rip), %zmm3
+
+/* polynomials */
+ vmovups poly_coeff3+__svml_satanh_data_internal_avx512(%rip), %zmm7
+ vmovups poly_coeff0+__svml_satanh_data_internal_avx512(%rip), %zmm13
+
+/* table values */
+ vsubps {rn-sae}, %zmm0, %zmm6, %zmm0
+ vfmadd231ps {rn-sae}, %zmm1, %zmm2, %zmm0
+ vfmadd213ps {rn-sae}, %zmm5, %zmm3, %zmm1
+ vmovups poly_coeff2+__svml_satanh_data_internal_avx512(%rip), %zmm3
+ vmovaps %zmm3, %zmm2
+ vfmadd231ps {rn-sae}, %zmm9, %zmm7, %zmm2
+ vfmadd231ps {rn-sae}, %zmm4, %zmm7, %zmm3
+ vfmadd213ps {rn-sae}, %zmm12, %zmm9, %zmm2
+ vfmadd213ps {rn-sae}, %zmm12, %zmm4, %zmm3
+ vfmadd213ps {rn-sae}, %zmm13, %zmm9, %zmm2
+ vfmadd213ps {rn-sae}, %zmm13, %zmm4, %zmm3
+
+/* (K*L2L + Tl) + Rp*PolyP */
+ vfmadd213ps {rn-sae}, %zmm1, %zmm9, %zmm2
+ vorps Half+__svml_satanh_data_internal_avx512(%rip), %zmm10, %zmm9
+
+/* (K*L2L + Tl) + Rp*PolyP -Rm*PolyM */
+ vfnmadd213ps {rn-sae}, %zmm2, %zmm4, %zmm3
+ vaddps {rn-sae}, %zmm3, %zmm0, %zmm4
+ vmulps {rn-sae}, %zmm9, %zmm4, %zmm0
+ testl %edx, %edx
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ vmovups %zmm11, 64(%rsp)
+ vmovups %zmm0, 128(%rsp)
+ # LOE rbx r12 r13 r14 r15 edx zmm0
+
+ xorl %eax, %eax
+ # LOE rbx r12 r13 r14 r15 eax edx
+
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $16, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovups 128(%rsp), %zmm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 zmm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movss 64(%rsp,%r14,4), %xmm0
+ call atanhf@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
+
+ movss %xmm0, 128(%rsp,%r14,4)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
+END(_ZGVeN16v_atanhf_skx)
+
+ .section .rodata, "a"
+ .align 64
+
+#ifdef __svml_satanh_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(64)) VUINT32 Log_tbl_H[32][1];
+ __declspec(align(64)) VUINT32 Log_tbl_L[32][1];
+ __declspec(align(64)) VUINT32 One[16][1];
+ __declspec(align(64)) VUINT32 AbsMask[16][1];
+ __declspec(align(64)) VUINT32 AddB5[16][1];
+ __declspec(align(64)) VUINT32 RcpBitMask[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff3[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff2[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff1[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff0[16][1];
+ __declspec(align(64)) VUINT32 Half[16][1];
+ __declspec(align(64)) VUINT32 L2H[16][1];
+ __declspec(align(64)) VUINT32 L2L[16][1];
+ } __svml_satanh_data_internal_avx512;
+#endif
+__svml_satanh_data_internal_avx512:
+ /*== Log_tbl_H ==*/
+ .long 0x00000000
+ .long 0x3cfc0000
+ .long 0x3d780000
+ .long 0x3db78000
+ .long 0x3df10000
+ .long 0x3e14c000
+ .long 0x3e300000
+ .long 0x3e4a8000
+ .long 0x3e648000
+ .long 0x3e7dc000
+ .long 0x3e8b4000
+ .long 0x3e974000
+ .long 0x3ea30000
+ .long 0x3eae8000
+ .long 0x3eb9c000
+ .long 0x3ec4e000
+ .long 0x3ecfa000
+ .long 0x3eda2000
+ .long 0x3ee48000
+ .long 0x3eeea000
+ .long 0x3ef8a000
+ .long 0x3f013000
+ .long 0x3f05f000
+ .long 0x3f0aa000
+ .long 0x3f0f4000
+ .long 0x3f13d000
+ .long 0x3f184000
+ .long 0x3f1ca000
+ .long 0x3f20f000
+ .long 0x3f252000
+ .long 0x3f295000
+ .long 0x3f2d7000
+ /*== Log_tbl_L ==*/
+ .align 64
+ .long 0x00000000
+ .long 0x3726c39e
+ .long 0x38a30c01
+ .long 0x37528ae5
+ .long 0x38e0edc5
+ .long 0xb8ab41f8
+ .long 0xb7cf8f58
+ .long 0x3896a73d
+ .long 0xb5838656
+ .long 0x380c36af
+ .long 0xb8235454
+ .long 0x3862bae1
+ .long 0x38c5e10e
+ .long 0x38dedfac
+ .long 0x38ebfb5e
+ .long 0xb8e63c9f
+ .long 0xb85c1340
+ .long 0x38777bcd
+ .long 0xb6038656
+ .long 0x37d40984
+ .long 0xb8b85028
+ .long 0xb8ad5a5a
+ .long 0x3865c84a
+ .long 0x38c3d2f5
+ .long 0x383ebce1
+ .long 0xb8a1ed76
+ .long 0xb7a332c4
+ .long 0xb779654f
+ .long 0xb8602f73
+ .long 0x38f85db0
+ .long 0x37b4996f
+ .long 0xb8bfb3ca
+ /*== One ==*/
+ .align 64
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /*== AbsMask ==*/
+ .align 64
+ .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+ /*== AddB5 ==*/
+ .align 64
+ .long 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000
+ /*== RcpBitMask ==*/
+ .align 64
+ .long 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000
+ /*== poly_coeff3 ==*/
+ .align 64
+ .long 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810
+ /*== poly_coeff2 ==*/
+ .align 64
+ .long 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e
+ /*== poly_coeff1 ==*/
+ .align 64
+ .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000
+ /*== poly_coeff0 ==*/
+ .align 64
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /*== Half ==*/
+ .align 64
+ .long 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000
+ /*== L2H = log(2)_high ==*/
+ .align 64
+ .long 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000
+ /*== L2L = log(2)_low ==*/
+ .align 64
+ .long 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4
+ .align 64
+ .type __svml_satanh_data_internal_avx512,@object
+ .size __svml_satanh_data_internal_avx512,.-__svml_satanh_data_internal_avx512
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized atanhf, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_atanhf _ZGVbN4v_atanhf_sse2
+#include "../svml_s_atanhf4_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized atanhf, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_atanhf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_atanhf, __GI__ZGVbN4v_atanhf,
+ __redirect__ZGVbN4v_atanhf)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,361 @@
+/* Function atanhf vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_satanh_data_internal
+ */
+#define SgnMask 0
+#define sOne 16
+#define sPoly 32
+#define iBrkValue 160
+#define iOffExpoMask 176
+#define sHalf 192
+#define sSign 208
+#define sTopMask12 224
+#define TinyRange 240
+#define sLn2 256
+
+#include <sysdep.h>
+
+ .text
+ .section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN4v_atanhf_sse4)
+ subq $72, %rsp
+ cfi_def_cfa_offset(80)
+ movaps %xmm0, %xmm5
+
+/* Load constants including One = 1 */
+ movups sOne+__svml_satanh_data_internal(%rip), %xmm4
+ movaps %xmm5, %xmm3
+
+/* Strip off the sign, so treat X as positive until right at the end */
+ movups SgnMask+__svml_satanh_data_internal(%rip), %xmm7
+ movaps %xmm4, %xmm8
+ andps %xmm5, %xmm7
+ movaps %xmm4, %xmm10
+ movups sTopMask12+__svml_satanh_data_internal(%rip), %xmm11
+ movaps %xmm4, %xmm14
+ movaps %xmm11, %xmm9
+
+/*
+ * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
+ * the upper part UHi being <= 12 bits long. Then we have
+ * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
+ */
+ movaps %xmm7, %xmm12
+
+/*
+ * Check whether |X| < 1, in which case we use the main function.
+ * Otherwise set the rangemask so that the callout will get used.
+ * Note that this will also use the callout for NaNs since not(NaN < 1).
+ */
+ movaps %xmm7, %xmm6
+ movaps %xmm7, %xmm2
+ cmpnltps %xmm4, %xmm6
+ cmpltps TinyRange+__svml_satanh_data_internal(%rip), %xmm2
+ mulps %xmm5, %xmm3
+ subps %xmm7, %xmm8
+ addps %xmm7, %xmm12
+ movmskps %xmm6, %edx
+ subps %xmm8, %xmm10
+ addps %xmm5, %xmm3
+ subps %xmm7, %xmm10
+ andps %xmm8, %xmm9
+
+/*
+ * Now we feed into the log1p code, using H in place of _VARG1 and
+ * later incorporating L into the reduced argument.
+ * compute 1+x as high, low parts
+ */
+ movaps %xmm4, %xmm7
+
+/*
+ * Now compute R = 1/(UHi+ULo) * (1 - E) and the error term E
+ * The first FMR is exact (we force R to 12 bits just in case it
+ * isn't already, to make absolutely sure), and since E is ~ 2^-12,
+ * the rounding error in the other one is acceptable.
+ */
+ rcpps %xmm9, %xmm15
+ subps %xmm9, %xmm8
+ andps %xmm11, %xmm15
+
+/*
+ * Split V as well into upper 12 bits and lower part, so that we can get
+ * a preliminary quotient estimate without rounding error.
+ */
+ andps %xmm12, %xmm11
+ mulps %xmm15, %xmm9
+ addps %xmm8, %xmm10
+ subps %xmm11, %xmm12
+
+/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
+ mulps %xmm15, %xmm11
+ mulps %xmm15, %xmm10
+ subps %xmm9, %xmm14
+ mulps %xmm12, %xmm15
+ subps %xmm10, %xmm14
+
+/* Compute D = E + E^2 */
+ movaps %xmm14, %xmm13
+ movaps %xmm4, %xmm8
+ mulps %xmm14, %xmm13
+
+/* reduction: compute r,n */
+ movdqu iBrkValue+__svml_satanh_data_internal(%rip), %xmm9
+ addps %xmm13, %xmm14
+
+/*
+ * Compute R * (VHi + VLo) * (1 + E + E^2)
+ * = R * (VHi + VLo) * (1 + D)
+ * = QHi + (QHi * D + QLo + QLo * D)
+ */
+ movaps %xmm14, %xmm0
+ mulps %xmm15, %xmm14
+ mulps %xmm11, %xmm0
+ addps %xmm14, %xmm15
+ movdqu iOffExpoMask+__svml_satanh_data_internal(%rip), %xmm12
+ movaps %xmm4, %xmm14
+
+/* Record the sign for eventual reincorporation. */
+ movups sSign+__svml_satanh_data_internal(%rip), %xmm1
+ addps %xmm15, %xmm0
+
+/*
+ * Now finally accumulate the high and low parts of the
+ * argument to log1p, H + L, with a final compensated summation.
+ */
+ movaps %xmm0, %xmm6
+ andps %xmm5, %xmm1
+
+/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
+ orps %xmm1, %xmm3
+ addps %xmm11, %xmm6
+ maxps %xmm6, %xmm7
+ minps %xmm6, %xmm8
+ subps %xmm6, %xmm11
+ movaps %xmm7, %xmm10
+ andps %xmm2, %xmm3
+ addps %xmm8, %xmm10
+ addps %xmm11, %xmm0
+ subps %xmm10, %xmm7
+ psubd %xmm9, %xmm10
+ addps %xmm7, %xmm8
+ pand %xmm10, %xmm12
+ psrad $23, %xmm10
+ cvtdq2ps %xmm10, %xmm13
+ addps %xmm8, %xmm0
+
+/* final reconstruction */
+ mulps sLn2+__svml_satanh_data_internal(%rip), %xmm13
+ pslld $23, %xmm10
+ paddd %xmm9, %xmm12
+ psubd %xmm10, %xmm14
+
+/* polynomial evaluation */
+ subps %xmm4, %xmm12
+ mulps %xmm0, %xmm14
+ movups sPoly+112+__svml_satanh_data_internal(%rip), %xmm0
+ addps %xmm12, %xmm14
+ mulps %xmm14, %xmm0
+
+/* Finally, halve the result and reincorporate the sign */
+ movups sHalf+__svml_satanh_data_internal(%rip), %xmm4
+ pxor %xmm1, %xmm4
+ addps sPoly+96+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+80+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+64+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+48+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+32+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+16+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ addps sPoly+__svml_satanh_data_internal(%rip), %xmm0
+ mulps %xmm14, %xmm0
+ mulps %xmm14, %xmm0
+ addps %xmm0, %xmm14
+ movaps %xmm2, %xmm0
+ addps %xmm13, %xmm14
+ mulps %xmm14, %xmm4
+ andnps %xmm4, %xmm0
+ orps %xmm3, %xmm0
+ testl %edx, %edx
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm5
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ addq $72, %rsp
+ cfi_def_cfa_offset(8)
+ ret
+ cfi_def_cfa_offset(80)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ movups %xmm5, 32(%rsp)
+ movups %xmm0, 48(%rsp)
+ # LOE rbx rbp r12 r13 r14 r15 edx
+
+ xorl %eax, %eax
+ movq %r12, 16(%rsp)
+ cfi_offset(12, -64)
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ cfi_offset(13, -72)
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ cfi_offset(14, -80)
+ # LOE rbx rbp r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx rbp r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $4, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx rbp r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ movups 48(%rsp), %xmm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ cfi_offset(12, -64)
+ cfi_offset(13, -72)
+ cfi_offset(14, -80)
+ # LOE rbx rbp r12 r13 r14 r15 xmm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movss 32(%rsp,%r14,4), %xmm0
+ call atanhf@PLT
+ # LOE rbx rbp r14 r15 r12d r13d xmm0
+
+ movss %xmm0, 48(%rsp,%r14,4)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx rbp r15 r12d r13d
+END(_ZGVbN4v_atanhf_sse4)
+
+ .section .rodata, "a"
+ .align 16
+
+#ifdef __svml_satanh_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(16)) VUINT32 SgnMask[4][1];
+ __declspec(align(16)) VUINT32 sOne[4][1];
+ __declspec(align(16)) VUINT32 sPoly[8][4][1];
+ __declspec(align(16)) VUINT32 iBrkValue[4][1];
+ __declspec(align(16)) VUINT32 iOffExpoMask[4][1];
+ __declspec(align(16)) VUINT32 sHalf[4][1];
+ __declspec(align(16)) VUINT32 sSign[4][1];
+ __declspec(align(16)) VUINT32 sTopMask12[4][1];
+ __declspec(align(16)) VUINT32 TinyRange[4][1];
+ __declspec(align(16)) VUINT32 sLn2[4][1];
+} __svml_satanh_data_internal;
+#endif
+__svml_satanh_data_internal:
+ /*== SgnMask ==*/
+ .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+ /*== sOne = SP 1.0 ==*/
+ .align 16
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /*== sPoly[] = SP polynomial ==*/
+ .align 16
+ .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */
+ .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */
+ .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */
+ .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */
+ .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */
+ .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */
+ .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */
+ .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */
+ /*== iBrkValue = SP 2/3 ==*/
+ .align 16
+ .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
+ /*== iOffExpoMask = SP significand mask ==*/
+ .align 16
+ .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
+ /*== sHalf ==*/
+ .align 16
+ .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000
+ /*== sSign ==*/
+ .align 16
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ /*== sTopMask12 ==*/
+ .align 16
+ .long 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000
+ /*== TinyRange ==*/
+ .align 16
+ .long 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000
+ /*== sLn2 = SP ln(2) ==*/
+ .align 16
+ .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218
+ .align 16
+ .type __svml_satanh_data_internal,@object
+ .size __svml_satanh_data_internal,.-__svml_satanh_data_internal
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE version of vectorized atanhf, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_atanhf _ZGVdN8v_atanhf_sse_wrapper
+#include "../svml_s_atanhf8_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized atanhf, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_atanhf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_atanhf, __GI__ZGVdN8v_atanhf,
+ __redirect__ZGVdN8v_atanhf)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,335 @@
+/* Function atanhf vectorized with AVX2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * Compute atanh(x) as 0.5 * log((1 + x)/(1 - x))
+ *
+ * Special cases:
+ *
+ * atanh(0) = 0
+ * atanh(+1) = +INF
+ * atanh(-1) = -INF
+ * atanh(x) = NaN if |x| > 1, or if x is a NaN or INF
+ *
+ */
+
+/* Offsets for data table __svml_satanh_data_internal
+ */
+#define SgnMask 0
+#define sOne 32
+#define sPoly 64
+#define iBrkValue 320
+#define iOffExpoMask 352
+#define sHalf 384
+#define sSign 416
+#define sTopMask12 448
+#define TinyRange 480
+#define sLn2 512
+
+#include <sysdep.h>
+
+ .text
+ .section .text.avx2,"ax",@progbits
+ENTRY(_ZGVdN8v_atanhf_avx2)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-32, %rsp
+ subq $96, %rsp
+
+/* Load constants including One = 1 */
+ vmovups sOne+__svml_satanh_data_internal(%rip), %ymm5
+ vmovups sTopMask12+__svml_satanh_data_internal(%rip), %ymm13
+ vmovaps %ymm0, %ymm6
+
+/* Strip off the sign, so treat X as positive until right at the end */
+ vandps SgnMask+__svml_satanh_data_internal(%rip), %ymm6, %ymm10
+ vsubps %ymm10, %ymm5, %ymm1
+
+/*
+ * Compute V = 2 * X trivially, and UHi + U_lo = 1 - X in two pieces,
+ * the upper part UHi being <= 12 bits long. Then we have
+ * atanh(X) = 1/2 * log((1 + X) / (1 - X)) = 1/2 * log1p(V / (UHi + ULo)).
+ */
+ vaddps %ymm10, %ymm10, %ymm14
+
+/*
+ * Check whether |X| < 1, in which case we use the main function.
+ * Otherwise set the rangemask so that the callout will get used.
+ * Note that this will also use the callout for NaNs since not(NaN < 1).
+ */
+ vcmpnlt_uqps %ymm5, %ymm10, %ymm7
+ vsubps %ymm1, %ymm5, %ymm9
+ vcmplt_oqps TinyRange+__svml_satanh_data_internal(%rip), %ymm10, %ymm4
+ vrcpps %ymm1, %ymm11
+ vsubps %ymm10, %ymm9, %ymm12
+ vandps %ymm13, %ymm11, %ymm0
+
+/* No need to split sU when FMA is available */
+ vfnmadd213ps %ymm5, %ymm0, %ymm1
+ vmovaps %ymm6, %ymm8
+ vfmadd213ps %ymm6, %ymm6, %ymm8
+ vfnmadd231ps %ymm0, %ymm12, %ymm1
+
+/*
+ * Split V as well into upper 12 bits and lower part, so that we can get
+ * a preliminary quotient estimate without rounding error.
+ */
+ vandps %ymm13, %ymm14, %ymm15
+ vmovmskps %ymm7, %edx
+ vsubps %ymm15, %ymm14, %ymm7
+
+/* Hence get initial quotient estimate QHi + QLo = R * VHi + R * VLo */
+ vmulps %ymm15, %ymm0, %ymm10
+
+/* Compute D = E + E^2 */
+ vfmadd213ps %ymm1, %ymm1, %ymm1
+
+/* Record the sign for eventual reincorporation. */
+ vandps sSign+__svml_satanh_data_internal(%rip), %ymm6, %ymm3
+
+/* Or the sign bit in with the tiny result to handle atanh(-0) correctly */
+ vorps %ymm3, %ymm8, %ymm2
+ vmulps %ymm7, %ymm0, %ymm8
+
+/*
+ * Compute R * (VHi + VLo) * (1 + E + E^2)
+ * = R * (VHi + VLo) * (1 + D)
+ * = QHi + (QHi * D + QLo + QLo * D)
+ */
+ vmulps %ymm1, %ymm10, %ymm9
+ vfmadd213ps %ymm8, %ymm8, %ymm1
+ vaddps %ymm1, %ymm9, %ymm1
+
+/* reduction: compute r,n */
+ vmovups iBrkValue+__svml_satanh_data_internal(%rip), %ymm9
+
+/*
+ * Now finally accumulate the high and low parts of the
+ * argument to log1p, H + L, with a final compensated summation.
+ */
+ vaddps %ymm1, %ymm10, %ymm12
+ vsubps %ymm12, %ymm10, %ymm11
+
+/*
+ * Now we feed into the log1p code, using H in place of _VARG1 and
+ * later incorporating L into the reduced argument.
+ * compute 1+x as high, low parts
+ */
+ vmaxps %ymm12, %ymm5, %ymm13
+ vminps %ymm12, %ymm5, %ymm14
+ vaddps %ymm11, %ymm1, %ymm0
+ vaddps %ymm14, %ymm13, %ymm1
+ vpsubd %ymm9, %ymm1, %ymm7
+ vsubps %ymm1, %ymm13, %ymm15
+ vpsrad $23, %ymm7, %ymm10
+ vpand iOffExpoMask+__svml_satanh_data_internal(%rip), %ymm7, %ymm8
+ vaddps %ymm15, %ymm14, %ymm13
+ vpslld $23, %ymm10, %ymm11
+ vpaddd %ymm9, %ymm8, %ymm15
+ vaddps %ymm13, %ymm0, %ymm14
+ vcvtdq2ps %ymm10, %ymm0
+ vpsubd %ymm11, %ymm5, %ymm12
+
+/* polynomial evaluation */
+ vsubps %ymm5, %ymm15, %ymm5
+ vmulps %ymm14, %ymm12, %ymm1
+ vaddps %ymm5, %ymm1, %ymm5
+ vmovups sPoly+224+__svml_satanh_data_internal(%rip), %ymm1
+ vfmadd213ps sPoly+192+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+160+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+128+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+96+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+64+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+32+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vfmadd213ps sPoly+__svml_satanh_data_internal(%rip), %ymm5, %ymm1
+ vmulps %ymm1, %ymm5, %ymm7
+ vfmadd213ps %ymm5, %ymm5, %ymm7
+
+/* final reconstruction */
+ vfmadd132ps sLn2+__svml_satanh_data_internal(%rip), %ymm7, %ymm0
+
+/* Finally, halve the result and reincorporate the sign */
+ vxorps sHalf+__svml_satanh_data_internal(%rip), %ymm3, %ymm3
+ vmulps %ymm0, %ymm3, %ymm0
+ vblendvps %ymm4, %ymm2, %ymm0, %ymm0
+ testl %edx, %edx
+
+/* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 edx ymm0 ymm6
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+ vmovups %ymm6, 32(%rsp)
+ vmovups %ymm0, 64(%rsp)
+ # LOE rbx r12 r13 r14 r15 edx ymm0
+
+ xorl %eax, %eax
+ # LOE rbx r12 r13 r14 r15 eax edx
+
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+ btl %r12d, %r13d
+
+/* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+ incl %r12d
+ cmpl $8, %r12d
+
+/* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
+
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovups 64(%rsp), %ymm0
+
+/* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 ymm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+ movl %r12d, %r14d
+ movss 32(%rsp,%r14,4), %xmm0
+ call atanhf@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
+
+ movss %xmm0, 64(%rsp,%r14,4)
+
+/* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
+END(_ZGVdN8v_atanhf_avx2)
+
+ .section .rodata, "a"
+ .align 32
+
+#ifdef __svml_satanh_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+ __declspec(align(32)) VUINT32 SgnMask[8][1];
+ __declspec(align(32)) VUINT32 sOne[8][1];
+ __declspec(align(32)) VUINT32 sPoly[8][8][1];
+ __declspec(align(32)) VUINT32 iBrkValue[8][1];
+ __declspec(align(32)) VUINT32 iOffExpoMask[8][1];
+ __declspec(align(32)) VUINT32 sHalf[8][1];
+ __declspec(align(32)) VUINT32 sSign[8][1];
+ __declspec(align(32)) VUINT32 sTopMask12[8][1];
+ __declspec(align(32)) VUINT32 TinyRange[8][1];
+ __declspec(align(32)) VUINT32 sLn2[8][1];
+} __svml_satanh_data_internal;
+#endif
+__svml_satanh_data_internal:
+ /*== SgnMask ==*/
+ .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+ /*== sOne = SP 1.0 ==*/
+ .align 32
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /*== sPoly[] = SP polynomial ==*/
+ .align 32
+ .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */
+ .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */
+ .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */
+ .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */
+ .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */
+ .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */
+ .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */
+ .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */
+ /*== iBrkValue = SP 2/3 ==*/
+ .align 32
+ .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
+ /*== iOffExpoMask = SP significand mask ==*/
+ .align 32
+ .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
+ /*== sHalf ==*/
+ .align 32
+ .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000
+ /*== sSign ==*/
+ .align 32
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ /*== sTopMask12 ==*/
+ .align 32
+ .long 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000
+ /*== TinyRange ==*/
+ .align 32
+ .long 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000
+ /*== sLn2 = SP ln(2) ==*/
+ .align 32
+ .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218
+ .align 32
+ .type __svml_satanh_data_internal,@object
+ .size __svml_satanh_data_internal,.-__svml_satanh_data_internal
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function atanh vectorized with SSE2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN2v_atanh)
+WRAPPER_IMPL_SSE2 atanh
+END (_ZGVbN2v_atanh)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN2v_atanh)
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function atanh vectorized with AVX2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVdN4v_atanh)
+WRAPPER_IMPL_AVX _ZGVbN2v_atanh
+END (_ZGVdN4v_atanh)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN4v_atanh)
+#endif
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function atanh vectorized in AVX ISA as wrapper to SSE4 ISA version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVcN4v_atanh)
+WRAPPER_IMPL_AVX _ZGVbN2v_atanh
+END (_ZGVcN4v_atanh)
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function atanh vectorized with AVX-512, wrapper to AVX2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN8v_atanh)
+WRAPPER_IMPL_AVX512 _ZGVdN4v_atanh
+END (_ZGVeN8v_atanh)
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function atanhf vectorized with AVX-512. Wrapper to AVX2 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN16v_atanhf)
+WRAPPER_IMPL_AVX512 _ZGVdN8v_atanhf
+END (_ZGVeN16v_atanhf)
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function atanhf vectorized with SSE2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN4v_atanhf)
+WRAPPER_IMPL_SSE2 atanhf
+END (_ZGVbN4v_atanhf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4v_atanhf)
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function atanhf vectorized with AVX2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVdN8v_atanhf)
+WRAPPER_IMPL_AVX _ZGVbN4v_atanhf
+END (_ZGVdN8v_atanhf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN8v_atanhf)
+#endif
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function atanhf vectorized in AVX ISA as wrapper to SSE4 ISA version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVcN8v_atanhf)
+WRAPPER_IMPL_AVX _ZGVbN4v_atanhf
+END (_ZGVcN8v_atanhf)
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-atanh.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-atanh.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-atanh.c"
new file mode 100644
@@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE double
+#define LIBMVEC_FUNC atanh
+#include "test-vector-abi-arg1.h"
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2)
VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p)
+VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVbN2v_atanh)
#define VEC_INT_TYPE __m128i
@@ -44,6 +44,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2)
VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p)
+VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVdN4v_atanh)
#ifndef __ILP32__
# define VEC_INT_TYPE __m256i
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2)
VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p)
+VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVcN4v_atanh)
#define VEC_INT_TYPE __m128i
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2)
VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p)
+VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVeN8v_atanh)
#ifndef __ILP32__
# define VEC_INT_TYPE __m512i
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-atanhf.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-atanhf.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-atanhf.c"
new file mode 100644
@@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE float
+#define LIBMVEC_FUNC atanhf
+#include "test-vector-abi-arg1.h"
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f)
VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVeN16v_atanhf)
#define VEC_INT_TYPE __m512i
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f)
VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVbN4v_atanhf)
#define VEC_INT_TYPE __m128i
@@ -44,6 +44,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f)
VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVdN8v_atanhf)
/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
#undef VECTOR_WRAPPER_fFF
@@ -41,6 +41,7 @@ VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f)
VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVcN8v_atanhf)
#define VEC_INT_TYPE __m128i