From patchwork Tue Feb 1 20:58:38 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 50647 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 080EF3858437 for ; Tue, 1 Feb 2022 20:59:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 080EF3858437 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1643749154; bh=QEmzu+6skffhJupg7vc/YnB8Fj9JWkR6GhELyAsTIlg=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=nQ71pptiESAHM/nYh9Kmw+/yf1tZmHqSZsOI5FK/a0gtTKlk6hcU+rcImVdXrK9qm t4lwWMa8++FiXr7Ifn9tQ+AtLCkkiNl/6SBJ2vZ23Y16UqCwi5oKL3ULWtDDBENu8t peY1iP5gH9xhm06578kr7e9MKAE7do6vFdjXRdoA= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-io1-xd2f.google.com (mail-io1-xd2f.google.com [IPv6:2607:f8b0:4864:20::d2f]) by sourceware.org (Postfix) with ESMTPS id 666EC3858437 for ; Tue, 1 Feb 2022 20:58:51 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 666EC3858437 Received: by mail-io1-xd2f.google.com with SMTP id z199so22830057iof.10 for ; Tue, 01 Feb 2022 12:58:51 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=QEmzu+6skffhJupg7vc/YnB8Fj9JWkR6GhELyAsTIlg=; b=0XNtgFWapovzHjhknXoObyIBChzGF28EhRuqONZb+vsD5UurdLY7DTAjzjD1MTgoed 2vgkSNZ3nWFqeACBbudJujHJ8n3gLHXukTp7rpRs4xJdupLroCKtwyW6ZE3HvfzXclqR 8oqDRkroCLT+22Nd0OPqKiL4V6LwzFo7m3RGs4iGHNVW1e7x94g5xJ4m0aE0V0Ek27MK zMoJoS3A53njdKpzLmlEx7j/SNJX0BY3nWyfk2inEWU8mj8LFjMlkLK9JUQEidFkQSV4 Z9SZcX+LnIckuf3nUQB4t40lbg8uKYuo1vzt3VGzmay/e6wTWgbyA5zFYo9De7ZEpcEJ oSMA== X-Gm-Message-State: AOAM531ryQSegRkfp/oZDt6N4DOESQYt9eFH2q6TlMTkHwXmYJaLpymK RcMC2SfcH/4DjojNQOs40J49i7RxXTs= X-Google-Smtp-Source: ABdhPJzCgTh9QJA032gfWR7ExSvjtHbqorDf7itzhHkSoHh666wDhMzXIOzbjiCmqO7T/7qLkmnb5Q== X-Received: by 2002:a05:6638:258d:: with SMTP id s13mr5739381jat.221.1643749130152; Tue, 01 Feb 2022 12:58:50 -0800 (PST) Received: from localhost.localdomain (node-17-161.flex.volo.net. [76.191.17.161]) by smtp.googlemail.com with ESMTPSA id u17sm9144308ilk.49.2022.02.01.12.58.49 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 01 Feb 2022 12:58:49 -0800 (PST) To: libc-alpha@sourceware.org Subject: [PATCH v2 1/3] x86: Optimize svml_s_tanhf_core_avx512.S Date: Tue, 1 Feb 2022 14:58:38 -0600 Message-Id: <20220201205840.2587777-1-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 X-Spam-Status: No, score=-2.7 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE, UNWANTED_LANGUAGE_BODY autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" No bug. Optimizations are: 1. Reduce code size (-56 bytes). 2. Reduce rodata size (-448 bytes). 3. Remove register save/restores and stack adjustment from the fast path. 4. Slightly better instruction selection where possible. 5. Remove redundant registers moves. This results in roughly a 14% performance improvement. Results from geomean of 40 benchtest runs: Function, New Time, Old Time, New / Old _ZGVeN16v_tanhf, 0.658, 0.762, 0.864 All math and mathvec tests are passing. --- .../multiarch/svml_s_tanhf16_core_avx512.S | 585 +++++++++--------- 1 file changed, 298 insertions(+), 287 deletions(-) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S index 8954a5f658..6a2f0c1392 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S @@ -70,312 +70,323 @@ * */ -/* Offsets for data table __svml_stanh_data_internal - */ -#define _sC 0 -#define _sP0 128 -#define _sP2 256 -#define _sP3 384 -#define _sP4 512 -#define _sP5 640 -#define _sP6 768 -#define _sP7 896 -#define _iExpMantMask_UISA 1024 -#define _iMinIdxOfsMask_UISA 1088 -#define _iMaxIdxMask_UISA 1152 -#define _sSignMask 1216 -#define _sAbsMask 1280 -#define _iExpMantMask 1344 -#define _iExpMask 1408 -#define _iMinIdxOfsMask 1472 -#define _iMaxIdxMask 1536 - #include +#define TANHF_DATA(offset) ((offset) + __svml_stanh_data_internal) + +/* Offsets for data table __svml_stanh_data_internal. */ +#define _iExpMantMask_UISA 0 +#define _iMinIdxOfsMask_UISA 4 +#define _iMaxIdxMask_UISA 8 +#define _iExpMask 12 +#define _sSignMask 64 +#define _sC_lo 128 +#define _sC_hi 192 +#define _sP7_lo 256 +#define _sP7_hi 320 +#define _sP6_lo 384 +#define _sP6_hi 448 +#define _sP5_lo 512 +#define _sP5_hi 576 +#define _sP4_lo 640 +#define _sP4_hi 704 +#define _sP3_lo 768 +#define _sP3_hi 832 +#define _sP2_lo 896 +#define _sP2_hi 960 +#define _sP0_lo 1024 +#define _sP0_hi 1088 + .text .section .text.exex512,"ax",@progbits ENTRY(_ZGVeN16v_tanhf_skx) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-64, %rsp - subq $192, %rsp - vmovaps %zmm0, %zmm1 - vmovups __svml_stanh_data_internal(%rip), %zmm9 - vmovups _sP6+__svml_stanh_data_internal(%rip), %zmm11 - vmovups _sP5+__svml_stanh_data_internal(%rip), %zmm12 - vmovups _sP4+__svml_stanh_data_internal(%rip), %zmm13 - vmovups _sP3+__svml_stanh_data_internal(%rip), %zmm14 - vmovups _sP2+__svml_stanh_data_internal(%rip), %zmm15 - vpternlogd $255, %zmm2, %zmm2, %zmm2 - vandps _sAbsMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm8 - vandps _sSignMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm0 - -/* Here huge arguments, INF and NaNs are filtered out to callout. */ - vpandd _iExpMantMask_UISA+__svml_stanh_data_internal(%rip), %zmm1, %zmm3 - vpsubd _iMinIdxOfsMask_UISA+__svml_stanh_data_internal(%rip), %zmm3, %zmm4 - vpcmpd $2, _iExpMask+__svml_stanh_data_internal(%rip), %zmm3, %k1 + /* Here huge arguments, INF and NaNs are filtered out to callout. */ + vpandd TANHF_DATA(_iExpMantMask_UISA)(%rip) {1to16}, %zmm0, %zmm1 + vpsubd TANHF_DATA(_iMinIdxOfsMask_UISA)(%rip) {1to16}, %zmm1, %zmm2 -/* - * small table specific variables * - * Constant loading - */ - vpxord %zmm5, %zmm5, %zmm5 - -/* if VMIN, VMAX is defined for I type */ - vpmaxsd %zmm5, %zmm4, %zmm6 - vpminsd _iMaxIdxMask_UISA+__svml_stanh_data_internal(%rip), %zmm6, %zmm7 - vpsrld $21, %zmm7, %zmm10 - vmovups _sP7+__svml_stanh_data_internal(%rip), %zmm4 - vpermt2ps _sC+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm9 - vpermt2ps _sP6+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm11 - vpermt2ps _sP7+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm4 - vpermt2ps _sP5+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm12 - vpermt2ps _sP4+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm13 - vpermt2ps _sP3+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm14 - vpermt2ps _sP2+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm15 - vpandnd %zmm3, %zmm3, %zmm2{%k1} - vptestmd %zmm2, %zmm2, %k0 - vmovups _sP0+__svml_stanh_data_internal(%rip), %zmm3 - vsubps {rn-sae}, %zmm9, %zmm8, %zmm2 - kmovw %k0, %edx - vfmadd213ps {rn-sae}, %zmm11, %zmm2, %zmm4 - vpermt2ps _sP0+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm3 - vfmadd213ps {rn-sae}, %zmm12, %zmm2, %zmm4 - vfmadd213ps {rn-sae}, %zmm13, %zmm2, %zmm4 - vfmadd213ps {rn-sae}, %zmm14, %zmm2, %zmm4 - vfmadd213ps {rn-sae}, %zmm15, %zmm2, %zmm4 - vfmadd213ps {rn-sae}, %zmm3, %zmm2, %zmm4 - vorps %zmm0, %zmm4, %zmm0 - testl %edx, %edx - -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r12 r13 r14 r15 edx zmm0 zmm1 - -/* Restore registers - * and exit the function - */ + /* Selection arguments between [0, 0x03e00000] into zmm3. */ + vpxord %zmm3, %zmm3, %zmm3 + vpmaxsd %zmm3, %zmm2, %zmm3 + vpminsd TANHF_DATA(_iMaxIdxMask_UISA)(%rip) {1to16}, %zmm3, %zmm3 -L(EXIT): - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - -/* Branch to process - * special inputs - */ + /* Setup permute indices in zmm3. */ + vpsrld $21, %zmm3, %zmm3 + + /* Store if there are any special cases in k1. */ + vpcmpd $6, TANHF_DATA(_iExpMask)(%rip) {1to16}, %zmm1, %k1 + + + /* Store absolute values of inputs in zmm1. */ + vmovaps TANHF_DATA(_sSignMask)(%rip), %zmm4 + vandnps %zmm0, %zmm4, %zmm1 + + vmovaps TANHF_DATA(_sC_lo)(%rip), %zmm5 + vpermt2ps TANHF_DATA(_sC_hi)(%rip), %zmm3, %zmm5 + vsubps {rn-sae}, %zmm5, %zmm1, %zmm1 + + vmovaps TANHF_DATA(_sP7_lo)(%rip), %zmm2 + vpermt2ps TANHF_DATA(_sP7_hi)(%rip), %zmm3, %zmm2 + vmovaps TANHF_DATA(_sP6_lo)(%rip), %zmm5 + vpermt2ps TANHF_DATA(_sP6_hi)(%rip), %zmm3, %zmm5 + + vmovaps TANHF_DATA(_sP5_lo)(%rip), %zmm6 + vpermt2ps TANHF_DATA(_sP5_hi)(%rip), %zmm3, %zmm6 + + vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm2 + vfmadd213ps {rn-sae}, %zmm6, %zmm1, %zmm2 + + vmovaps TANHF_DATA(_sP4_lo)(%rip), %zmm7 + vpermt2ps TANHF_DATA(_sP4_hi)(%rip), %zmm3, %zmm7 + + vmovaps TANHF_DATA(_sP3_lo)(%rip), %zmm8 + vpermt2ps TANHF_DATA(_sP3_hi)(%rip), %zmm3, %zmm8 + + vfmadd213ps {rn-sae}, %zmm7, %zmm1, %zmm2 + vfmadd213ps {rn-sae}, %zmm8, %zmm1, %zmm2 + + vmovaps TANHF_DATA(_sP2_lo)(%rip), %zmm9 + vpermt2ps TANHF_DATA(_sP2_hi)(%rip), %zmm3, %zmm9 + + vmovaps TANHF_DATA(_sP0_lo)(%rip), %zmm10 + vpermt2ps TANHF_DATA(_sP0_hi)(%rip), %zmm3, %zmm10 + + vfmadd213ps {rn-sae}, %zmm9, %zmm1, %zmm2 + vfmadd213ps {rn-sae}, %zmm10, %zmm1, %zmm2 + + kmovw %k1, %edx + testl %edx, %edx + + /* Go to special inputs processing branch. */ + jne L(SPECIAL_VALUES_BRANCH) + /* Wait until after branch of write over zmm0. */ + vpternlogd $0xec, %zmm4, %zmm2, %zmm0 + + /* No stack restoration on the fastpath. */ + ret + + /* Branch to process special inputs. */ L(SPECIAL_VALUES_BRANCH): - vmovups %zmm1, 64(%rsp) - vmovups %zmm0, 128(%rsp) - # LOE rbx r12 r13 r14 r15 edx zmm0 - - xorl %eax, %eax - # LOE rbx r12 r13 r14 r15 eax edx - - vzeroupper - movq %r12, 16(%rsp) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 - movl %eax, %r12d - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 - movl %edx, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d - -/* Range mask - * bits check - */ + pushq %rbp + /* Need to callee save registers to preserve state across tanhf calls. + */ + pushq %r13 + pushq %r12 + movq %rsp, %rbp -L(RANGEMASK_CHECK): - btl %r12d, %r13d + /* Align stack and make room for 2x zmm vectors. */ + andq $-64, %rsp + addq $-128, %rsp -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Save all already computed inputs. */ + vpternlogd $0xec, %zmm4, %zmm2, %zmm2 + vmovaps %zmm2, (%rsp) + /* Save origional input (zmm0 unchanged up to this point). */ + vmovaps %zmm0, 64(%rsp) -/* Special inputs - * processing loop - */ + vzeroupper + /* edx has 1s where there was a special value that needs to be handled + by a tanhf call. */ + movl %edx, %r13d L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $16, %r12d - -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d - - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovups 128(%rsp), %zmm0 - -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r12 r13 r14 r15 zmm0 - -/* Scalar math fucntion call - * to process special input - */ + /* use r12 as index for special value that is saved across calls to + tanhf. We technically don't need a callee save register here as offset + to rsp is always [0, 56] so we can restore rsp by realigning to 64. + Essentially the tradeoff is 1 extra save/restore vs 2 extra instructions + in the loop. */ + xorl %r12d, %r12d + tzcntl %r13d, %r12d -L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movss 64(%rsp,%r14,4), %xmm0 - call tanhf@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + /* Scalar math fucntion call to process special input. */ + movss 64(%rsp, %r12, 4), %xmm0 + call tanhf@PLT - movss %xmm0, 128(%rsp,%r14,4) + /* No good way to avoid the store-forwarding fault this will cause on + return. `lfence` avoids the SF fault but at greater cost as it + serialized stack/callee save restoration. */ + movss %xmm0, (%rsp, %r12, 4) -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d -END(_ZGVeN16v_tanhf_skx) + blsr %r13d, %r13d + jnz L(SPECIAL_VALUES_LOOP) - .section .rodata, "a" - .align 64 + /* All results have been written to 64(%rsp). */ + vmovaps (%rsp), %zmm0 + /* Restore rsp. */ + movq %rbp, %rsp + /* Restore callee save registers. */ + popq %r12 + popq %r13 + popq %rbp + ret +END(_ZGVeN16v_tanhf_skx) + .section .rodata, "a" + .align 16 #ifdef __svml_stanh_data_internal_typedef -typedef unsigned int VUINT32; -typedef struct -{ - __declspec(align(64)) VUINT32 _sC[32][1]; - __declspec(align(64)) VUINT32 _sP0[32][1]; - __declspec(align(64)) VUINT32 _sP2[32][1]; - __declspec(align(64)) VUINT32 _sP3[32][1]; - __declspec(align(64)) VUINT32 _sP4[32][1]; - __declspec(align(64)) VUINT32 _sP5[32][1]; - __declspec(align(64)) VUINT32 _sP6[32][1]; - __declspec(align(64)) VUINT32 _sP7[32][1]; - __declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1]; - __declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1]; - __declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1]; - __declspec(align(64)) VUINT32 _sSignMask[16][1]; - __declspec(align(64)) VUINT32 _sAbsMask[16][1]; - __declspec(align(64)) VUINT32 _iExpMantMask[16][1]; - __declspec(align(64)) VUINT32 _iExpMask[16][1]; - __declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1]; - __declspec(align(64)) VUINT32 _iMaxIdxMask[16][1]; -} __svml_stanh_data_internal; + typedef unsigned int VUINT32; + typedef struct + { + __declspec (align(4))VUINT32 _iExpMantMask_UISA[1][1]; + __declspec (align(4))VUINT32 _iMinIdxOfsMask_UISA[1][1]; + __declspec (align(4))VUINT32 _iMaxIdxMask_UISA[1][1]; + __declspec (align(4))VUINT32 _iExpMask[1][1]; + __declspec (align(64))VUINT32 _sSignMask[16][1]; + __declspec (align(64))VUINT32 _sC_lo[16][1]; + __declspec (align(64))VUINT32 _sC_hi[16][1]; + __declspec (align(64))VUINT32 _sP7_lo[16][1]; + __declspec (align(64))VUINT32 _sP7_hi[16][1]; + __declspec (align(64))VUINT32 _sP6_lo[16][1]; + __declspec (align(64))VUINT32 _sP6_hi[16][1]; + __declspec (align(64))VUINT32 _sP5_lo[16][1]; + __declspec (align(64))VUINT32 _sP5_hi[16][1]; + __declspec (align(64))VUINT32 _sP4_lo[16][1]; + __declspec (align(64))VUINT32 _sP4_hi[16][1]; + __declspec (align(64))VUINT32 _sP3_lo[16][1]; + __declspec (align(64))VUINT32 _sP3_hi[16][1]; + __declspec (align(64))VUINT32 _sP2_lo[16][1]; + __declspec (align(64))VUINT32 _sP2_hi[16][1]; + __declspec (align(64))VUINT32 _sP0_lo[16][1]; + __declspec (align(64))VUINT32 _sP0_hi[16][1]; + }__svml_stanh_data_internal; #endif + __svml_stanh_data_internal: - /*== _sC ==*/ - .long 0x00000000, 0x3d700000, 0x3d900000, 0x3db00000 - .long 0x3dd00000, 0x3df00000, 0x3e100000, 0x3e300000 - .long 0x3e500000, 0x3e700000, 0x3e900000, 0x3eb00000 - .long 0x3ed00000, 0x3ef00000, 0x3f100000, 0x3f300000 - .long 0x3f500000, 0x3f700000, 0x3f900000, 0x3fb00000 - .long 0x3fd00000, 0x3ff00000, 0x40100000, 0x40300000 - .long 0x40500000, 0x40700000, 0x40900000, 0x40b00000 - .long 0x40d00000, 0x40f00000, 0x41100000, 0x00000000 - /*== p0 ==*/ - .align 64 - .long 0x00000000, 0x3d6fb9c9, 0x3d8fc35f, 0x3daf9169 - .long 0x3dcf49ab, 0x3deee849, 0x3e0f0ee8, 0x3e2e4984 - .long 0x3e4d2f8e, 0x3e6bb32e, 0x3e8c51cd, 0x3ea96163 - .long 0x3ec543f1, 0x3edfd735, 0x3f028438, 0x3f18abf0 - .long 0x3f2bc480, 0x3f3bec1c, 0x3f4f2e5b, 0x3f613c53 - .long 0x3f6ce37d, 0x3f743c4f, 0x3f7a5feb, 0x3f7dea85 - .long 0x3f7f3b3d, 0x3f7fb78c, 0x3f7fefd4, 0x3f7ffdd0 - .long 0x3f7fffb4, 0x3f7ffff6, 0x3f7fffff, 0x3f800000 - /*== p2 ==*/ - .align 64 - .long 0x3f800000, 0x3f7f1f84, 0x3f7ebd11, 0x3f7e1e5f - .long 0x3f7d609f, 0x3f7c842d, 0x3f7b00e5, 0x3f789580 - .long 0x3f75b8ad, 0x3f726fd9, 0x3f6cc59b, 0x3f63fb92 - .long 0x3f59ff97, 0x3f4f11d7, 0x3f3d7573, 0x3f24f360 - .long 0x3f0cbfe7, 0x3eec1a69, 0x3eb0a801, 0x3e6753a2 - .long 0x3e132f1a, 0x3db7e7d3, 0x3d320845, 0x3c84d3d4 - .long 0x3bc477b7, 0x3b10d3da, 0x3a01601e, 0x388c1a3b - .long 0x3717b0da, 0x35a43bce, 0x338306c6, 0x00000000 - /*== p3 ==*/ - .align 64 - .long 0xb0343c7b, 0xbd6ee69d, 0xbd8f0da7, 0xbdae477d - .long 0xbdcd2a1f, 0xbdeba80d, 0xbe0c443b, 0xbe293cf3 - .long 0xbe44f282, 0xbe5f3651, 0xbe81c7c0, 0xbe96d7ca - .long 0xbea7fb8e, 0xbeb50e9e, 0xbec12efe, 0xbec4be92 - .long 0xbebce070, 0xbead510e, 0xbe8ef7d6, 0xbe4b8704 - .long 0xbe083237, 0xbdaf7449, 0xbd2e1ec4, 0xbc83bf06 - .long 0xbbc3e0b5, 0xbb10aadc, 0xba0157db, 0xb88c18f2 - .long 0xb717b096, 0xb5a43bae, 0xb383012c, 0x00000000 - /*== p4 ==*/ - .align 64 - .long 0xbeaaaaa5, 0xbeab0612, 0xbea7f01f, 0xbea4e120 - .long 0xbea387b7, 0xbea15962, 0xbe9d57f7, 0xbe976b5a - .long 0xbe90230d, 0xbe880dff, 0xbe7479b3, 0xbe4c3d88 - .long 0xbe212482, 0xbdeb8cba, 0xbd5e78ad, 0x3c6b5e6e - .long 0x3d839143, 0x3dc21ee1, 0x3de347af, 0x3dcbec96 - .long 0x3d99ef2d, 0x3d542ea1, 0x3cdde701, 0x3c2cca67 - .long 0x3b81cb27, 0x3ac073a1, 0x39ac3032, 0x383a94d9 - .long 0x36ca081d, 0x355abd4c, 0x332b3cb6, 0x00000000 - /*== p5 ==*/ - .align 64 - .long 0xb76dd6b9, 0xbe1c276d, 0x3c1dcf2f, 0x3dc1a78d - .long 0x3d96f985, 0x3da2b61b, 0x3dc13397, 0x3dd2f670 - .long 0x3df48a0a, 0x3e06c5a8, 0x3e1a3aba, 0x3e27c405 - .long 0x3e2e78d0, 0x3e2c3e44, 0x3e1d3097, 0x3df4a8f4 - .long 0x3da38508, 0x3d31416a, 0x3b562657, 0xbcaeeac9 - .long 0xbcce9419, 0xbcaaeac4, 0xbc49e7d0, 0xbba71ddd - .long 0xbb003b0e, 0xba3f9a05, 0xb92c08a7, 0xb7ba9232 - .long 0xb64a0b0f, 0xb4dac169, 0xb2ab78ac, 0x00000000 - /*== p6 ==*/ - .align 64 - .long 0x3e0910e9, 0x43761143, 0x4165ecdc, 0xc190f756 - .long 0xc08c097d, 0xc02ba813, 0xbf7f6bda, 0x3f2b1dc0 - .long 0x3ece105d, 0x3f426a94, 0xbadb0dc4, 0x3da43b17 - .long 0xbd51ab88, 0xbcaea23d, 0xbd3b6d8d, 0xbd6caaad - .long 0xbd795bed, 0xbd5fddda, 0xbd038f3b, 0xbc1cad63 - .long 0x3abb4766, 0x3b95f10b, 0x3b825873, 0x3afaea66 - .long 0x3a49f878, 0x39996bf3, 0x388f3e6c, 0x371bb0e3 - .long 0x35a8a5e6, 0x34369b17, 0x322487b0, 0x00000000 - /*== p7 ==*/ - .align 64 - .long 0xbc0e2f66, 0x460bda12, 0x43d638ef, 0xc3e11c3e - .long 0xc2baa4e9, 0xc249da2d, 0xc1859b82, 0x40dd5b57 - .long 0x40494640, 0x40c730a8, 0xbf0f160e, 0x3e30e76f - .long 0xbea81387, 0xbdb26a1c, 0xbd351e57, 0xbb4c01a0 - .long 0x3c1d7bfb, 0x3c722cd1, 0x3c973f1c, 0x3c33a31b - .long 0x3b862ef4, 0x3a27b3d0, 0xba3b5907, 0xba0efc22 - .long 0xb97f9f0f, 0xb8c8af50, 0xb7bdddfb, 0xb64f2950 - .long 0xb4e085b1, 0xb3731dfa, 0xb15a1f04, 0x00000000 - .align 64 - .long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMantMask_UISA */ - .align 64 - .long 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000 /* _iMinIdxOfsMask_UISA */ - .align 64 - .long 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000 /* _iMaxIdxMask_UISA */ - .align 64 - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */ - .align 64 - .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */ - .align 64 - .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */ - .align 64 - .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */ - .align 64 - .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */ - .align 64 - .long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */ - .align 64 - .type __svml_stanh_data_internal,@object - .size __svml_stanh_data_internal,.-__svml_stanh_data_internal + .align 4 + /* _iExpMantMask_UISA. */ + .long 0x7fe00000 + + .align 4 + /* _iMinIdxOfsMask_UISA. */ + .long 0x3d400000 + + .align 4 + /* _iMaxIdxMask_UISA. */ + .long 0x03e00000 + + .align 4 + /* _iExpMask. */ + .long 0x7f000000 + + .align 64 + /* _sSignMask. */ + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + + .align 64 + /* _sC_lo. */ + .long 0x00000000, 0x3d700000, 0x3d900000, 0x3db00000 + .long 0x3dd00000, 0x3df00000, 0x3e100000, 0x3e300000 + .long 0x3e500000, 0x3e700000, 0x3e900000, 0x3eb00000 + .long 0x3ed00000, 0x3ef00000, 0x3f100000, 0x3f300000 + + .align 64 + /* _sC_hi. */ + .long 0x3f500000, 0x3f700000, 0x3f900000, 0x3fb00000 + .long 0x3fd00000, 0x3ff00000, 0x40100000, 0x40300000 + .long 0x40500000, 0x40700000, 0x40900000, 0x40b00000 + .long 0x40d00000, 0x40f00000, 0x41100000, 0x00000000 + + .align 64 + /* _sP7_lo. */ + .long 0xbc0e2f66, 0x460bda12, 0x43d638ef, 0xc3e11c3e + .long 0xc2baa4e9, 0xc249da2d, 0xc1859b82, 0x40dd5b57 + .long 0x40494640, 0x40c730a8, 0xbf0f160e, 0x3e30e76f + .long 0xbea81387, 0xbdb26a1c, 0xbd351e57, 0xbb4c01a0 + + .align 64 + /* _sP7_hi. */ + .long 0x3c1d7bfb, 0x3c722cd1, 0x3c973f1c, 0x3c33a31b + .long 0x3b862ef4, 0x3a27b3d0, 0xba3b5907, 0xba0efc22 + .long 0xb97f9f0f, 0xb8c8af50, 0xb7bdddfb, 0xb64f2950 + .long 0xb4e085b1, 0xb3731dfa, 0xb15a1f04, 0x00000000 + + .align 64 + /* _sP6_lo. */ + .long 0x3e0910e9, 0x43761143, 0x4165ecdc, 0xc190f756 + .long 0xc08c097d, 0xc02ba813, 0xbf7f6bda, 0x3f2b1dc0 + .long 0x3ece105d, 0x3f426a94, 0xbadb0dc4, 0x3da43b17 + .long 0xbd51ab88, 0xbcaea23d, 0xbd3b6d8d, 0xbd6caaad + + .align 64 + /* _sP6_hi. */ + .long 0xbd795bed, 0xbd5fddda, 0xbd038f3b, 0xbc1cad63 + .long 0x3abb4766, 0x3b95f10b, 0x3b825873, 0x3afaea66 + .long 0x3a49f878, 0x39996bf3, 0x388f3e6c, 0x371bb0e3 + .long 0x35a8a5e6, 0x34369b17, 0x322487b0, 0x00000000 + + .align 64 + /* _sP5_lo. */ + .long 0xb76dd6b9, 0xbe1c276d, 0x3c1dcf2f, 0x3dc1a78d + .long 0x3d96f985, 0x3da2b61b, 0x3dc13397, 0x3dd2f670 + .long 0x3df48a0a, 0x3e06c5a8, 0x3e1a3aba, 0x3e27c405 + .long 0x3e2e78d0, 0x3e2c3e44, 0x3e1d3097, 0x3df4a8f4 + + .align 64 + /* _sP5_hi. */ + .long 0x3da38508, 0x3d31416a, 0x3b562657, 0xbcaeeac9 + .long 0xbcce9419, 0xbcaaeac4, 0xbc49e7d0, 0xbba71ddd + .long 0xbb003b0e, 0xba3f9a05, 0xb92c08a7, 0xb7ba9232 + .long 0xb64a0b0f, 0xb4dac169, 0xb2ab78ac, 0x00000000 + + .align 64 + /* _sP4_lo. */ + .long 0xbeaaaaa5, 0xbeab0612, 0xbea7f01f, 0xbea4e120 + .long 0xbea387b7, 0xbea15962, 0xbe9d57f7, 0xbe976b5a + .long 0xbe90230d, 0xbe880dff, 0xbe7479b3, 0xbe4c3d88 + .long 0xbe212482, 0xbdeb8cba, 0xbd5e78ad, 0x3c6b5e6e + + .align 64 + /* _sP4_hi. */ + .long 0x3d839143, 0x3dc21ee1, 0x3de347af, 0x3dcbec96 + .long 0x3d99ef2d, 0x3d542ea1, 0x3cdde701, 0x3c2cca67 + .long 0x3b81cb27, 0x3ac073a1, 0x39ac3032, 0x383a94d9 + .long 0x36ca081d, 0x355abd4c, 0x332b3cb6, 0x00000000 + + .align 64 + /* _sP3_lo. */ + .long 0xb0343c7b, 0xbd6ee69d, 0xbd8f0da7, 0xbdae477d + .long 0xbdcd2a1f, 0xbdeba80d, 0xbe0c443b, 0xbe293cf3 + .long 0xbe44f282, 0xbe5f3651, 0xbe81c7c0, 0xbe96d7ca + .long 0xbea7fb8e, 0xbeb50e9e, 0xbec12efe, 0xbec4be92 + + .align 64 + /* _sP3_hi. */ + .long 0xbebce070, 0xbead510e, 0xbe8ef7d6, 0xbe4b8704 + .long 0xbe083237, 0xbdaf7449, 0xbd2e1ec4, 0xbc83bf06 + .long 0xbbc3e0b5, 0xbb10aadc, 0xba0157db, 0xb88c18f2 + .long 0xb717b096, 0xb5a43bae, 0xb383012c, 0x00000000 + + .align 64 + /* _sP2_lo. */ + .long 0x3f800000, 0x3f7f1f84, 0x3f7ebd11, 0x3f7e1e5f + .long 0x3f7d609f, 0x3f7c842d, 0x3f7b00e5, 0x3f789580 + .long 0x3f75b8ad, 0x3f726fd9, 0x3f6cc59b, 0x3f63fb92 + .long 0x3f59ff97, 0x3f4f11d7, 0x3f3d7573, 0x3f24f360 + + .align 64 + /* _sP2_hi. */ + .long 0x3f0cbfe7, 0x3eec1a69, 0x3eb0a801, 0x3e6753a2 + .long 0x3e132f1a, 0x3db7e7d3, 0x3d320845, 0x3c84d3d4 + .long 0x3bc477b7, 0x3b10d3da, 0x3a01601e, 0x388c1a3b + .long 0x3717b0da, 0x35a43bce, 0x338306c6, 0x00000000 + + .align 64 + /* _sP0_lo. */ + .long 0x00000000, 0x3d6fb9c9, 0x3d8fc35f, 0x3daf9169 + .long 0x3dcf49ab, 0x3deee849, 0x3e0f0ee8, 0x3e2e4984 + .long 0x3e4d2f8e, 0x3e6bb32e, 0x3e8c51cd, 0x3ea96163 + .long 0x3ec543f1, 0x3edfd735, 0x3f028438, 0x3f18abf0 + + .align 64 + /* _sP0_hi. */ + .long 0x3f2bc480, 0x3f3bec1c, 0x3f4f2e5b, 0x3f613c53 + .long 0x3f6ce37d, 0x3f743c4f, 0x3f7a5feb, 0x3f7dea85 + .long 0x3f7f3b3d, 0x3f7fb78c, 0x3f7fefd4, 0x3f7ffdd0 + .long 0x3f7fffb4, 0x3f7ffff6, 0x3f7fffff, 0x3f800000 + + .align 64 + .type __svml_stanh_data_internal, @object + .size __svml_stanh_data_internal, .-__svml_stanh_data_internal From patchwork Tue Feb 1 20:58:39 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 50649 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 3A176385B80B for ; Tue, 1 Feb 2022 21:00:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3A176385B80B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1643749250; bh=BgTx1bnORpUHE64UnaPCXfcMa/V5Tjh+F57QuQP5cks=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=Fw6bemLritFsVEBwgWYd9diPE7sl5LA1/chkOkALjQ9T6DXsxBG+4kwxELBLmttaC g0d7t3R4Q3Cp0FesF5FVDb1JWqOXe69vZW4BLLV0TkUrg31GmvG9hYAU8Nd53ilDQO oZtBJFDG9MXhxkIbcCnZzi4O17pnqdmwghfFNhoE= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-io1-xd29.google.com (mail-io1-xd29.google.com [IPv6:2607:f8b0:4864:20::d29]) by sourceware.org (Postfix) with ESMTPS id 9613D3858437 for ; Tue, 1 Feb 2022 20:58:54 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9613D3858437 Received: by mail-io1-xd29.google.com with SMTP id n17so22860665iod.4 for ; Tue, 01 Feb 2022 12:58:54 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=BgTx1bnORpUHE64UnaPCXfcMa/V5Tjh+F57QuQP5cks=; b=e/L06nwLl/DxooK22I2oLxta3vnS7ur5ygZe0BYRG4pcgJuYxAmepzL8kfWeNIDM74 UuFZHxEaq54buCIDVS59fJYeeWA+Ha1G3NMpW9fYySDcxwbK+1jQh9+c424xodwdU9Xi iNxlV+w3/ZnKpSI+PwNcCrLXxP1r7ExFM3F+maKJwcH/rB9Xe4EV1Eyq1owaD32BAIyM OsczbCv3XTLjidWs8cAOdCw92cjjZYhFxxEYJx7pVweVmsRa/1XUDueYNPqzNax+2ZkV y9Qe7LlJ1Z0rY9uhagfVRJ3tQvoWHgF+pKVBaUt0iBJgDW2sxI2b8mzlneF9dKTvWE0M Cknw== X-Gm-Message-State: AOAM533v3fqMXGp1VLXq1Sk1ssW3F8fykaKOKqPBZj1UpVbIq8Yg2vUj xlEPic8FoG+Fx+uZCP8AVissfq1fxbI= X-Google-Smtp-Source: ABdhPJxGP0Id+VJYsrqzYrNz5+Gf2qwOb1rZvP62AsTNx80XhWb+d7scHpbC4ZW4aCqF1f2J0Vy+pw== X-Received: by 2002:a05:6602:2dd5:: with SMTP id l21mr14868099iow.132.1643749132054; Tue, 01 Feb 2022 12:58:52 -0800 (PST) Received: from localhost.localdomain (node-17-161.flex.volo.net. [76.191.17.161]) by smtp.googlemail.com with ESMTPSA id u17sm9144308ilk.49.2022.02.01.12.58.51 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 01 Feb 2022 12:58:51 -0800 (PST) To: libc-alpha@sourceware.org Subject: [PATCH v2 2/3] x86: Optimize svml_s_tanhf_core_avx2.S Date: Tue, 1 Feb 2022 14:58:39 -0600 Message-Id: <20220201205840.2587777-2-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220201205840.2587777-1-goldstein.w.n@gmail.com> References: <20220201205840.2587777-1-goldstein.w.n@gmail.com> MIME-Version: 1.0 X-Spam-Status: No, score=-12.6 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_LOTSOFHASH, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" No bug. Optimizations are: 1. Reduce code size (-70 bytes). 2. Reduce rodata size (-32 bytes). 3. Remove register save/restores and stack adjustment from the fast path. 4. Slightly better instruction selection where possible. 5. Remove redundant registers moves. 6. Prefer registers that get smaller instruction encodings. This results in roughly a 15% performance improvement. Results from geomean of 40 benchtest runs: Function, New Time, Old Time, New / Old _ZGVcN8v_tanhf, 3.556, 4.192, 0.848 _ZGVdN8v_tanhf, 2.13, 2.486, 0.857 All math and mathvec tests are passing. --- .../fpu/multiarch/svml_s_tanhf8_core_avx2.S | 908 +++--------------- .../fpu/multiarch/svml_s_tanhf_rodata.S | 600 ++++++++++++ 2 files changed, 752 insertions(+), 756 deletions(-) create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S index 3745db5aa4..90c3ea4cc6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S @@ -70,775 +70,171 @@ * */ -/* Offsets for data table __svml_stanh_data_internal - */ -#define _dbP 0 -#define _sSignMask 4288 -#define _sAbsMask 4320 -#define _iExpMantMask 4352 -#define _iExpMask 4384 -#define _iMinIdxOfsMask 4416 -#define _iMaxIdxMask 4448 - #include +#include "svml_s_tanhf_rodata.S" .text .section .text.avx2,"ax",@progbits ENTRY(_ZGVdN8v_tanhf_avx2) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - pushq %r12 - subq $120, %rsp - lea _dbP+16+__svml_stanh_data_internal(%rip), %r10 - vmovaps %ymm0, %ymm12 - -/* Here huge arguments, INF and NaNs are filtered out to callout. */ - vpand _iExpMantMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm14 + /* Here huge arguments, INF and NaNs are filtered out to callout. */ + vpand TANHF_DATA(_iExpMantMask)(%rip), %ymm0, %ymm4 + vpsubd TANHF_DATA(_iMinIdxOfsMask)(%rip), %ymm4, %ymm2 -/* - * small table specific variables * - * Constant loading - */ - vmovups _iMaxIdxMask+__svml_stanh_data_internal(%rip), %ymm8 - vpsubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm9 - -/* if VMIN, VMAX is defined for I type */ - vxorps %ymm15, %ymm15, %ymm15 - vpcmpgtd %ymm15, %ymm9, %ymm0 - vpand %ymm0, %ymm9, %ymm7 - vpcmpgtd %ymm8, %ymm9, %ymm6 - vblendvps %ymm6, %ymm8, %ymm7, %ymm3 - vpsrld $14, %ymm3, %ymm1 - vpcmpgtd _iExpMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm13 - vmovmskps %ymm13, %r11d - vandps _sAbsMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm10 - vandps _sSignMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm11 - vextractf128 $1, %ymm1, %xmm2 - vmovd %xmm1, %r9d - vmovd %xmm2, %ecx - vpextrd $1, %xmm2, %edx - vpextrd $1, %xmm1, %r8d - movslq %r9d, %r9 - movslq %edx, %rdx - movslq %r8d, %r8 - vpextrd $2, %xmm1, %edi - movslq %ecx, %rcx - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 - vpextrd $3, %xmm2, %r12d - vpextrd $3, %xmm1, %esi - vpextrd $2, %xmm2, %eax - movslq %edi, %rdi - movslq %r12d, %r12 - movslq %esi, %rsi - movslq %eax, %rax - vmovupd -16(%r9,%r10), %xmm5 - vmovupd -16(%rdx,%r10), %xmm14 - vmovupd -16(%rcx,%r10), %xmm13 - vmovupd (%r9,%r10), %xmm1 - vmovupd (%r8,%r10), %xmm2 - vmovupd -16(%r8,%r10), %xmm4 - vinsertf128 $1, -16(%rdi,%r10), %ymm5, %ymm15 - vinsertf128 $1, -16(%r12,%r10), %ymm14, %ymm3 - vinsertf128 $1, -16(%rax,%r10), %ymm13, %ymm6 - vinsertf128 $1, (%rdi,%r10), %ymm1, %ymm5 - vinsertf128 $1, (%rsi,%r10), %ymm2, %ymm14 - vunpcklpd %ymm3, %ymm6, %ymm8 - vunpckhpd %ymm3, %ymm6, %ymm6 - vunpcklpd %ymm14, %ymm5, %ymm3 - vunpckhpd %ymm14, %ymm5, %ymm2 - vmovupd (%rcx,%r10), %xmm13 - vcvtps2pd %xmm10, %ymm5 - vextractf128 $1, %ymm10, %xmm10 - vfmadd213pd %ymm3, %ymm5, %ymm2 - vinsertf128 $1, -16(%rsi,%r10), %ymm4, %ymm0 - vmovupd (%rdx,%r10), %xmm4 - vunpcklpd %ymm0, %ymm15, %ymm9 - vunpckhpd %ymm0, %ymm15, %ymm7 - vfmadd213pd %ymm7, %ymm5, %ymm2 - vfmadd213pd %ymm9, %ymm5, %ymm2 - vinsertf128 $1, (%r12,%r10), %ymm4, %ymm0 - vcvtps2pd %xmm10, %ymm4 - vinsertf128 $1, (%rax,%r10), %ymm13, %ymm15 - vunpcklpd %ymm0, %ymm15, %ymm1 - vunpckhpd %ymm0, %ymm15, %ymm0 - vfmadd213pd %ymm1, %ymm4, %ymm0 - vcvtpd2ps %ymm2, %xmm1 - vfmadd213pd %ymm6, %ymm4, %ymm0 - vfmadd213pd %ymm8, %ymm4, %ymm0 - vcvtpd2ps %ymm0, %xmm0 - vinsertf128 $1, %xmm0, %ymm1, %ymm2 - vorps %ymm11, %ymm2, %ymm0 - testl %r11d, %r11d - -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r13 r14 r15 r11d ymm0 ymm12 - -/* Restore registers - * and exit the function - */ + /* Selection of arguments between [0, 0x04280000] into ymm2. */ + vpxor %ymm3, %ymm3, %ymm3 + vpmaxsd %ymm3, %ymm2, %ymm2 + vpminsd TANHF_DATA(_iMaxIdxMask)(%rip), %ymm2, %ymm2 -L(EXIT): - addq $120, %rsp - cfi_restore(12) - popq %r12 - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 - -/* Branch to process - * special inputs - */ + vpsrld $14, %ymm2, %ymm1 -L(SPECIAL_VALUES_BRANCH): - vmovups %ymm12, 32(%rsp) - vmovups %ymm0, 64(%rsp) - # LOE rbx r13 r14 r15 r11d ymm0 - - xorl %r12d, %r12d - # LOE rbx r13 r14 r15 r11d r12d - - vzeroupper - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 - movl %r11d, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d - -/* Range mask - * bits check - */ + /* Store special cases in ymm15. */ + vpcmpgtd TANHF_DATA(_iExpMask)(%rip), %ymm4, %ymm15 -L(RANGEMASK_CHECK): - btl %r12d, %r13d -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Store base of lookup table in rax. */ + leaq TANHF_DATA(_lookupTable)(%rip), %rax -/* Special inputs - * processing loop - */ + /* We are splitting xmm1 into 8 GPRs. This may be faster to do with + store/load as we can take advantage of store-forwarding. */ + vmovq %xmm1, %r8 + /* We have eliminated all negative values for ymm1 so no need to sign + extend. */ + movl %r8d, %r9d + shrq $32, %r8 -L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $8, %r12d - -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d - - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovups 64(%rsp), %ymm0 - -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r13 r14 r15 ymm0 - -/* Scalar math fucntion call - * to process special input - */ + /* Instead of using cross-lane permutes on ymm vectors, use vpinsertf128 + with memory operand. This helps alleviate bottleneck on p5. */ + vmovdqu 16(%r9, %rax), %xmm5 -L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movss 32(%rsp,%r14,4), %xmm0 - call tanhf@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + vpextrq $1, %xmm1, %rsi + movl %esi, %edi + shrq $32, %rsi - movss %xmm0, 64(%rsp,%r14,4) + vinsertf128 $1, 16(%rdi, %rax), %ymm5, %ymm5 -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d -END(_ZGVdN8v_tanhf_avx2) + vextracti128 $1, %ymm1, %xmm2 + vmovq %xmm2, %rdx + movl %edx, %ecx + shrq $32, %rdx + + vmovdqu (%rcx, %rax), %xmm6 + + vpextrq $1, %xmm2, %r10 + movl %r10d, %r11d + shrq $32, %r10 + + vinsertf128 $1, (%r11, %rax), %ymm6, %ymm6 + + vmovupd 16(%r8, %rax), %xmm1 + vinsertf128 $1, 16(%rsi, %rax), %ymm1, %ymm1 + vmovupd (%rdx, %rax), %xmm3 + vinsertf128 $1, (%r10, %rax), %ymm3, %ymm3 + + vunpcklpd %ymm3, %ymm6, %ymm7 + vunpckhpd %ymm3, %ymm6, %ymm6 + + vunpcklpd %ymm1, %ymm5, %ymm3 + vunpckhpd %ymm1, %ymm5, %ymm1 + + vmovaps TANHF_DATA(_sAbsMask)(%rip), %ymm11 + vandps %ymm11, %ymm0, %ymm4 - .section .rodata, "a" - .align 32 - -#ifdef __svml_stanh_data_internal_typedef -typedef unsigned int VUINT32; -typedef struct -{ - __declspec(align(32)) VUINT32 _dbP[(134*4)][2]; - __declspec(align(32)) VUINT32 _sSignMask[8][1]; - __declspec(align(32)) VUINT32 _sAbsMask[8][1]; - __declspec(align(32)) VUINT32 _iExpMantMask[8][1]; - __declspec(align(32)) VUINT32 _iExpMask[8][1]; - __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1]; - __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1]; -} __svml_stanh_data_internal; -#endif -__svml_stanh_data_internal: - /* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */ - .quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01 */ - .quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00 */ - .quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06 */ - .quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01 */ - .quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08 */ - .quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00 */ - .quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05 */ - .quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01 */ - .quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08 */ - .quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00 */ - .quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04 */ - .quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01 */ - .quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08 */ - .quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00 */ - .quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04 */ - .quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01 */ - .quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08 */ - .quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00 */ - .quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04 */ - .quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01 */ - .quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08 */ - .quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00 */ - .quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04 */ - .quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01 */ - .quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08 */ - .quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00 */ - .quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04 */ - .quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01 */ - .quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08 */ - .quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00 */ - .quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04 */ - .quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01 */ - .quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07 */ - .quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00 */ - .quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04 */ - .quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01 */ - .quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07 */ - .quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00 */ - .quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04 */ - .quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01 */ - .quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07 */ - .quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00 */ - .quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04 */ - .quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01 */ - .quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07 */ - .quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00 */ - .quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04 */ - .quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01 */ - .quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07 */ - .quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00 */ - .quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04 */ - .quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01 */ - .quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07 */ - .quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00 */ - .quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04 */ - .quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01 */ - .quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07 */ - .quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00 */ - .quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04 */ - .quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01 */ - .quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07 */ - .quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00 */ - .quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04 */ - .quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01 */ - .quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07 */ - .quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00 */ - .quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04 */ - .quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01 */ - .quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07 */ - .quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00 */ - .quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04 */ - .quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01 */ - .quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07 */ - .quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00 */ - .quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04 */ - .quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01 */ - .quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06 */ - .quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00 */ - .quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04 */ - .quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01 */ - .quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06 */ - .quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00 */ - .quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03 */ - .quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01 */ - .quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06 */ - .quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00 */ - .quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03 */ - .quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01 */ - .quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06 */ - .quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00 */ - .quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03 */ - .quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01 */ - .quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06 */ - .quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00 */ - .quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03 */ - .quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01 */ - .quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06 */ - .quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00 */ - .quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03 */ - .quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01 */ - .quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06 */ - .quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00 */ - .quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03 */ - .quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01 */ - .quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06 */ - .quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00 */ - .quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03 */ - .quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01 */ - .quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06 */ - .quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00 */ - .quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03 */ - .quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01 */ - .quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06 */ - .quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00 */ - .quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03 */ - .quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01 */ - .quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06 */ - .quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00 */ - .quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03 */ - .quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01 */ - .quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05 */ - .quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00 */ - .quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03 */ - .quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01 */ - .quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05 */ - .quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00 */ - .quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03 */ - .quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01 */ - .quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05 */ - .quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00 */ - .quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03 */ - .quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01 */ - .quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05 */ - .quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00 */ - .quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03 */ - .quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01 */ - .quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05 */ - .quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00 */ - .quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03 */ - .quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01 */ - .quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05 */ - .quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00 */ - .quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03 */ - .quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01 */ - .quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05 */ - .quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00 */ - .quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03 */ - .quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01 */ - .quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05 */ - .quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00 */ - .quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02 */ - .quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01 */ - .quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05 */ - .quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00 */ - .quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02 */ - .quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01 */ - .quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05 */ - .quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00 */ - .quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02 */ - .quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01 */ - .quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04 */ - .quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00 */ - .quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02 */ - .quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01 */ - .quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04 */ - .quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00 */ - .quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02 */ - .quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01 */ - .quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04 */ - .quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00 */ - .quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02 */ - .quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01 */ - .quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04 */ - .quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00 */ - .quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02 */ - .quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01 */ - .quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04 */ - .quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00 */ - .quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02 */ - .quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01 */ - .quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04 */ - .quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00 */ - .quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02 */ - .quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01 */ - .quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04 */ - .quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00 */ - .quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02 */ - .quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01 */ - .quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04 */ - .quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00 */ - .quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02 */ - .quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01 */ - .quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04 */ - .quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00 */ - .quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02 */ - .quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01 */ - .quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04 */ - .quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00 */ - .quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02 */ - .quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01 */ - .quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04 */ - .quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00 */ - .quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02 */ - .quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01 */ - .quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04 */ - .quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00 */ - .quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02 */ - .quad 0xBFCDF78F156BE7CF /* A03 = -2.341173987004467604844e-01 */ - .quad 0xBF5308ED74E5C7A6 /* A00 = -1.161796466103906435435e-03 */ - .quad 0x3FF03B5986412ECB /* A01 = +1.014489674026594512313e+00 */ - .quad 0xBFB087EBA88DCC3F /* A02 = -6.457398285947223148806e-02 */ - .quad 0xBFCCBB9BD134862F /* A03 = -2.244753619680052991736e-01 */ - .quad 0xBF57FA23C00DF4B5 /* A00 = -1.463446533505758208674e-03 */ - .quad 0x3FF0473558A1BCC0 /* A01 = +1.017384859292903342975e+00 */ - .quad 0xBFB2E702BC6360EF /* A02 = -7.383744334527241048871e-02 */ - .quad 0xBFCB77D546379288 /* A03 = -2.145945160729250122955e-01 */ - .quad 0xBF5DD12971557F71 /* A00 = -1.819887610814388068450e-03 */ - .quad 0x3FF0548DDF5000A8 /* A01 = +1.020643112482540360020e+00 */ - .quad 0xBFB571B63DA186E1 /* A02 = -8.376635555898871710045e-02 */ - .quad 0xBFCA2D5202605148 /* A03 = -2.045080672838912594358e-01 */ - .quad 0xBF6252B1AD5D4F17 /* A00 = -2.236697221556737096709e-03 */ - .quad 0x3FF063738A910BF7 /* A01 = +1.024280110622155737232e+00 */ - .quad 0xBFB8270C8E6B601B /* A02 = -9.434584118878357184013e-02 */ - .quad 0xBFC8DD27D950A07E /* A03 = -1.942491351230763441116e-01 */ - .quad 0xBF66470C91730CFC /* A00 = -2.719425723258004842786e-03 */ - .quad 0x3FF073F468FCF331 /* A01 = +1.028309259519300633556e+00 */ - .quad 0xBFBB05C2952191E4 /* A02 = -1.055566419686964629854e-01 */ - .quad 0xBFC7886A770DE2BD /* A03 = -1.838505822486435070662e-01 */ - .quad 0xBF6AD114AC8E98EC /* A00 = -3.273525599485007861467e-03 */ - .quad 0x3FF0861BF53E5226 /* A01 = +1.032741506559554434119e+00 */ - .quad 0xBFBE0C4F9B461507 /* A02 = -1.173753503881763554650e-01 */ - .quad 0xBFC6302A037CDE3A /* A03 = -1.733448521642786954722e-01 */ - .quad 0xBF6FFBDE2A6C2AF8 /* A00 = -3.904279630096648551207e-03 */ - .quad 0x3FF099F2EB8E7DA3 /* A01 = +1.037585182326304034106e+00 */ - .quad 0xBFC09C74D192DDF0 /* A02 = -1.297746680554463516444e-01 */ - .quad 0xBFC4D571D8E3079F /* A03 = -1.627638157861470424859e-01 */ - .quad 0xBF72E8FDC0B952AA /* A00 = -4.616728994353872309042e-03 */ - .quad 0x3FF0AF7F273C9533 /* A01 = +1.042845872181101141152e+00 */ - .quad 0xBFC244C512736F10 /* A02 = -1.427236881344176033792e-01 */ - .quad 0xBFC379474F58B902 /* A03 = -1.521386277613104298645e-01 */ - .quad 0xBF762EABAF17395B /* A00 = -5.415602341101023557701e-03 */ - .quad 0x3FF0C6C3886F63FB /* A01 = +1.048526318502125631582e+00 */ - .quad 0xBFC3FDF9918EA12A /* A02 = -1.561881981590514389957e-01 */ - .quad 0xBFC21CA89ECAB895 /* A03 = -1.414995932913753196036e-01 */ - .quad 0xBF79D387CE5B2BAE /* A00 = -6.305246822828998107258e-03 */ - .quad 0x3FF0DFBFE2346376 /* A01 = +1.054626353847394337748e+00 */ - .quad 0xBFC5C6DA43602620 /* A02 = -1.701309994680721970894e-01 */ - .quad 0xBFC0C08BD8DB6631 /* A03 = -1.308760460731704100557e-01 */ - .quad 0xBF7DDBA8E8DA9060 /* A00 = -7.289562037531366334164e-03 */ - .quad 0x3FF0FA70F0D1B464 /* A01 = +1.061142864894713433443e+00 */ - .quad 0xBFC79E18D92BAA7C /* A02 = -1.845122394946264732241e-01 */ - .quad 0xBFBECBBBF74C2669 /* A03 = -1.202962378266875381749e-01 */ - .quad 0xBF81254E76EA25DA /* A00 = -8.371937755572145950511e-03 */ - .quad 0x3FF116D05835EBD0 /* A01 = +1.068069786618014660462e+00 */ - .quad 0xBFC982539E2ED224 /* A02 = -1.992897531869327609755e-01 */ - .quad 0xBFBC1B043C350159 /* A03 = -1.097872397413132278254e-01 */ - .quad 0xBF8391ACBA863403 /* A00 = -9.555196230190082448686e-03 */ - .quad 0x3FF134D4AA477FE2 /* A01 = +1.075398125794884141015e+00 */ - .quad 0xBFCB7218609FEAFB /* A02 = -2.144194099235717521079e-01 */ - .quad 0xBFB970A16CB88329 /* A03 = -9.937485603633135211599e-02 */ - .quad 0xBF87935088E48E8B /* A00 = -1.151144902957603431692e-02 */ - .quad 0x3FF1649892AD7DD3 /* A01 = +1.087059567413110938716e+00 */ - .quad 0xBFCE6971DDE75409 /* A02 = -2.375929196847723912089e-01 */ - .quad 0xBFB58291E88CB251 /* A03 = -8.402358939628952472223e-02 */ - .quad 0xBF8DB3A62C325325 /* A00 = -1.450280973794233242702e-02 */ - .quad 0x3FF1A9C900C6DEEA /* A01 = +1.103951457056548068891e+00 */ - .quad 0xBFD13DBC65B0E08E /* A02 = -2.693930619311765140012e-01 */ - .quad 0xBFB06696F62696D1 /* A03 = -6.406539449252625362252e-02 */ - .quad 0xBF92583699F2E27A /* A00 = -1.791463198307716858659e-02 */ - .quad 0x3FF1F451B85AA9F0 /* A01 = +1.122148246892376022288e+00 */ - .quad 0xBFD34FD5F8288180 /* A02 = -3.017477916164565954205e-01 */ - .quad 0xBFA6FB692825B683 /* A03 = -4.488686194495718900788e-02 */ - .quad 0xBF9641C26E673D6F /* A00 = -2.173522757385398448959e-02 */ - .quad 0x3FF24364DA5E2B07 /* A01 = +1.141453602790251542487e+00 */ - .quad 0xBFD564A5A5EF5890 /* A02 = -3.342680092295120530821e-01 */ - .quad 0xBF9B43712011A982 /* A03 = -2.662445791467283467968e-02 */ - .quad 0xBF9A901038EC2F39 /* A00 = -2.594018313816024226548e-02 */ - .quad 0x3FF2961356DFFEBA /* A01 = +1.161639537196534011088e+00 */ - .quad 0xBFD775EBB17198C7 /* A02 = -3.665723069046972759644e-01 */ - .quad 0xBF833B1A926CD462 /* A03 = -9.390075295963199591975e-03 */ - .quad 0xBF9F396A6A461B91 /* A00 = -3.049246095317987084727e-02 */ - .quad 0x3FF2EB53BAEF534B /* A01 = +1.182452898229899629357e+00 */ - .quad 0xBFD97DABF8AD8BBD /* A02 = -3.982953957076310058660e-01 */ - .quad 0x3F7B8F6A3E0F8837 /* A03 = +6.728568086119371925713e-03 */ - .quad 0xBFA21878590F8BAA /* A00 = -3.534294211546946951064e-02 */ - .quad 0x3FF34209790236E1 /* A01 = +1.203622315111197105253e+00 */ - .quad 0xBFDB764C0E71BECB /* A02 = -4.290952817018306997277e-01 */ - .quad 0x3F962FE0C03F84C0 /* A03 = +2.166701482190513949888e-02 */ - .quad 0xBFA4B36B9AD27ECC /* A00 = -4.043136849327097492868e-02 */ - .quad 0x3FF3990C5B12FC16 /* A01 = +1.224865298994477935679e+00 */ - .quad 0xBFDD5AABB0D01390 /* A02 = -4.586590983092770912322e-01 */ - .quad 0x3FA21DAF5CA162DB /* A03 = +3.538272863142363083844e-02 */ - .quad 0xBFA7645E4D7BF28B /* A00 = -4.568762489177399105378e-02 */ - .quad 0x3FF3EF2FD51C0D9F /* A01 = +1.245895225962932562069e+00 */ - .quad 0xBFDF26377E1B686E /* A02 = -4.867075664057044503963e-01 */ - .quad 0x3FA8803E756EE812 /* A03 = +4.785342391501513914509e-02 */ - .quad 0xBFAA210925C64413 /* A00 = -5.103329263796054643398e-02 */ - .quad 0x3FF44349F897D8E7 /* A01 = +1.266427966181760345066e+00 */ - .quad 0xBFE06A7B02C6D8E2 /* A02 = -5.129981092675530707226e-01 */ - .quad 0x3FAE3F194734F5D0 /* A03 = +5.907515520309980505687e-02 */ - .quad 0xBFACDE48F8A19BBB /* A00 = -5.638340029764018351832e-02 */ - .quad 0x3FF49439D5466582 /* A01 = +1.286187966447272845727e+00 */ - .quad 0xBFE131C7C1063DDC /* A02 = -5.373266954429101183166e-01 */ - .quad 0x3FB1ADEEC36AD805 /* A03 = +6.906025191241844940482e-02 */ - .quad 0xBFAF905D8F585680 /* A00 = -6.164829611604449866036e-02 */ - .quad 0x3FF4E0ED1FD27F99 /* A01 = +1.304913639360142818546e+00 */ - .quad 0xBFE1E7A859DC1D3D /* A02 = -5.595285182070380836095e-01 */ - .quad 0x3FB3ED018E4642A1 /* A03 = +7.783517573831001679086e-02 */ - .quad 0xBFB11595104160BA /* A00 = -6.673556944713512906198e-02 */ - .quad 0x3FF528650340490B /* A01 = +1.322361958217302513319e+00 */ - .quad 0xBFE28B14B40BC974 /* A02 = -5.794776455425521000109e-01 */ - .quad 0x3FB5DF49F5BAF6D7 /* A03 = +8.543836831355676453281e-02 */ - .quad 0xBFB2513A97344BA4 /* A00 = -7.155195418844911836587e-02 */ - .quad 0x3FF569BA0DB5EE14 /* A01 = +1.338312200124055273420e+00 */ - .quad 0xBFE31B53A8B67B20 /* A02 = -5.970857901737396389308e-01 */ - .quad 0x3FB787F297BB0544 /* A03 = +9.191814617499455275507e-02 */ - .quad 0xBFB37512E848FAFA /* A00 = -7.600515528700305112331e-02 */ - .quad 0x3FF5A41F33B403C8 /* A01 = +1.352568819013173495591e+00 */ - .quad 0xBFE397F6EA9A58A5 /* A02 = -6.123003561103997904880e-01 */ - .quad 0x3FB8EAA9FF25CA06 /* A03 = +9.733068923177520814782e-02 */ - .quad 0xBFB47B3E603AFC5D /* A00 = -8.000554894805263217439e-02 */ - .quad 0x3FF5D6E3EDE40487 /* A01 = +1.364963464031718975988e+00 */ - .quad 0xBFE400D5BCA6D631 /* A02 = -6.251019177058819709103e-01 */ - .quad 0x3FBA0B830ED567FE /* A03 = +1.017381583418739132707e-01 */ - .quad 0xBFB5BBFE8AC90496 /* A00 = -8.489981544791400103200e-02 */ - .quad 0x3FF612BA70107E95 /* A01 = +1.379572332145390989311e+00 */ - .quad 0xBFE477EAF1FA7693 /* A02 = -6.396383978023599814478e-01 */ - .quad 0x3FBB4784B7C08A95 /* A03 = +1.065600346196709652391e-01 */ - .quad 0xBFB6D5D940743939 /* A00 = -8.920057128509463473254e-02 */ - .quad 0x3FF644A8748F70CE /* A01 = +1.391762214006166953340e+00 */ - .quad 0xBFE4D646AB07EA37 /* A02 = -6.511567440459832267763e-01 */ - .quad 0x3FBC354F4E1D5292 /* A03 = +1.101884427747086558913e-01 */ - .quad 0xBFB7223D19E4F3D1 /* A00 = -9.036619074045339206069e-02 */ - .quad 0x3FF6518FEB42B7FA /* A01 = +1.394912642466350494175e+00 */ - .quad 0xBFE4ED86CB87498C /* A02 = -6.539949393430091184598e-01 */ - .quad 0x3FBC6D29F28CCA9B /* A03 = +1.110407082713131127205e-01 */ - .quad 0xBFB6878652FF6312 /* A00 = -8.800544287022329936754e-02 */ - .quad 0x3FF63948C302D040 /* A01 = +1.388985406648330922508e+00 */ - .quad 0xBFE4C4E2E7904E17 /* A02 = -6.490339777687407218920e-01 */ - .quad 0x3FBC127356CA1ABE /* A03 = +1.096565329445224612481e-01 */ - .quad 0xBFB4F5D18B0C91D6 /* A00 = -8.187589306596207427980e-02 */ - .quad 0x3FF5FD27EB7DD0B8 /* A01 = +1.374305648697413673176e+00 */ - .quad 0xBFE464E01A2B2FC6 /* A02 = -6.373138915164353601739e-01 */ - .quad 0x3FBB460547674A30 /* A03 = +1.065371798825160976065e-01 */ - .quad 0xBFB26642FA16A685 /* A00 = -7.187288861919156890412e-02 */ - .quad 0x3FF59F9BEDE1C95A /* A01 = +1.351467065073470141812e+00 */ - .quad 0xBFE3D67920C8FBEA /* A02 = -6.199308052381387046381e-01 */ - .quad 0x3FBA24F6A8D3CBC1 /* A03 = +1.021265184570401413078e-01 */ - .quad 0xBFADB5294794F097 /* A00 = -5.802277563859197656582e-02 */ - .quad 0x3FF523EA7B9CF453 /* A01 = +1.321268542159732772845e+00 */ - .quad 0xBFE322A8B55E35DB /* A02 = -5.979808370918208160205e-01 */ - .quad 0x3FB8C8673B1B3E37 /* A03 = +9.680791085269722928697e-02 */ - .quad 0xBFA4B7D661965C6A /* A00 = -4.046506825687219699450e-02 */ - .quad 0x3FF48DE3E2CE3122 /* A01 = +1.284641157110919085227e+00 */ - .quad 0xBFE251FED1A7F445 /* A02 = -5.725092024655472622285e-01 */ - .quad 0x3FB745699FCABDB9 /* A03 = +9.090290213747821701507e-02 */ - .quad 0xBF93E60456E4EE1D /* A00 = -1.943213253365004902773e-02 */ - .quad 0x3FF3E1A14E628A59 /* A01 = +1.242585474196536532432e+00 */ - .quad 0xBFE16C5AB660E876 /* A02 = -5.444768488007543094653e-01 */ - .quad 0x3FB5AD33AA8C188F /* A03 = +8.467410005332197397987e-02 */ - .quad 0x3F738C17C47C7961 /* A00 = +4.772274820224659853951e-03 */ - .quad 0x3FF3234DDE3BD146 /* A01 = +1.196119182682268355933e+00 */ - .quad 0xBFE078C0D77A9D3B /* A02 = -5.147403915952176722826e-01 */ - .quad 0x3FB40D74B3E276B8 /* A03 = +7.833032027925923568290e-02 */ - .quad 0x3FA0474BECC689C7 /* A00 = +3.179394975019849550746e-02 */ - .quad 0x3FF256FB4FA7D18A /* A01 = +1.146235762743432307076e+00 */ - .quad 0xBFDEFA8E3FB285E2 /* A02 = -4.840427038235174395098e-01 */ - .quad 0x3FB270C007493D59 /* A03 = +7.203293016322244446403e-02 */ - .quad 0x3FAF5BD51E479BDC /* A00 = +6.124750132203590768931e-02 */ - .quad 0x3FF18081D0B53BC5 /* A01 = +1.093873801484492647162e+00 */ - .quad 0xBFDCFE2439BD0C03 /* A02 = -4.530115665294831006626e-01 */ - .quad 0x3FB0DEFE5A45AFDD /* A03 = +6.590261176978580437424e-02 */ - .quad 0x3FB7BD5D2806EA26 /* A00 = +9.273321368429118805032e-02 */ - .quad 0x3FF0A369E35B4440 /* A01 = +1.039895904647224256223e+00 */ - .quad 0xBFDB04BC5C9951E7 /* A02 = -4.221640495573226181669e-01 */ - .quad 0x3FAEBBBAA9D6DEEF /* A03 = +6.002600978120919278380e-02 */ - .quad 0x3FC01BE411098DBC /* A00 = +1.258511622610124502941e-01 */ - .quad 0x3FEF85BDABC031C1 /* A01 = +9.850757936961188621083e-01 */ - .quad 0xBFD91521375097C2 /* A02 = -3.919146576102968682065e-01 */ - .quad 0x3FABE26F0086D982 /* A03 = +5.446192628317005068883e-02 */ - .quad 0x3FC481D7FF5776B9 /* A00 = +1.602125164781023347604e-01 */ - .quad 0x3FEDC3506C1E7218 /* A01 = +9.300920592973538347792e-01 */ - .quad 0xBFD7349A88DA7D4F /* A02 = -3.625856720409119104964e-01 */ - .quad 0x3FA936E2DFF8E2AE /* A03 = +4.924687370334389358018e-02 */ - .quad 0x3FC90471F96FA27A /* A00 = +1.954481571149420671141e-01 */ - .quad 0x3FEC0451601987A2 /* A01 = +8.755270840595026360376e-01 */ - .quad 0xBFD5671CD4B898DC /* A02 = -3.344184949259110251063e-01 */ - .quad 0x3FA6BB9594603B67 /* A03 = +4.439990459660841243261e-02 */ - .quad 0x3FCFD8ADB9ED944C /* A00 = +2.488000066615846384011e-01 */ - .quad 0x3FE978C073F6809A /* A01 = +7.959902062321078108909e-01 */ - .quad 0xBFD2DF7E00BCD5A9 /* A02 = -2.948908812716931060471e-01 */ - .quad 0x3FA3614033D490B2 /* A03 = +3.785133965200894456959e-02 */ - .quad 0x3FD4846A12AFE5A0 /* A00 = +3.205819303981005674586e-01 */ - .quad 0x3FE63A1147D40472 /* A01 = +6.945883181471244061100e-01 */ - .quad 0xBFCFA2268AD34450 /* A02 = -2.471359422548027318101e-01 */ - .quad 0x3F9F150201D9FFE0 /* A03 = +3.035357605267552383310e-02 */ - .quad 0x3FD9018641F82BEB /* A00 = +3.907180446846598154131e-01 */ - .quad 0x3FE33B7C220FFBDC /* A01 = +6.010113396913498995389e-01 */ - .quad 0xBFCA4E4187E29C86 /* A02 = -2.055131829740483584423e-01 */ - .quad 0x3F98C30CED19F8F4 /* A03 = +2.418155858185229434287e-02 */ - .quad 0x3FDD4B8255BEB078 /* A00 = +4.577337109901757905561e-01 */ - .quad 0x3FE0858B19D3A49B /* A01 = +5.163016800335243905451e-01 */ - .quad 0xBFC5BC929EACE564 /* A02 = -1.698172831327539045176e-01 */ - .quad 0x3F93A083CE57DE2B /* A03 = +1.916700312537337677621e-02 */ - .quad 0x3FE0A8E5E039295C /* A00 = +5.206174258576470315063e-01 */ - .quad 0x3FDC35E1234583FE /* A01 = +4.407885403107342225937e-01 */ - .quad 0xBFC1DE034E31AEB9 /* A02 = -1.395877963835710222629e-01 */ - .quad 0x3F8EFDEBB3471BDC /* A03 = +1.513275280821162888101e-02 */ - .quad 0x3FE2851B603CB2A5 /* A00 = +5.787484054213406503564e-01 */ - .quad 0x3FD7F4A44ABBB286 /* A01 = +3.743067483726821853551e-01 */ - .quad 0xBFBD3EEB67087DE7 /* A02 = -1.142413260026767657385e-01 */ - .quad 0x3F8864F38329E8BD /* A03 = +1.191129917173260922836e-02 */ - .quad 0x3FE437DBE3C34AC1 /* A00 = +6.318187187665317283702e-01 */ - .quad 0x3FD43F6F789441B5 /* A01 = +3.163717916040938438194e-01 */ - .quad 0xBFB7D92E7901B9A4 /* A02 = -9.315767721429907277653e-02 */ - .quad 0x3F8327ED342308E1 /* A03 = +9.353497651663324544136e-03 */ - .quad 0x3FE5C0977766D55C /* A00 = +6.797597248138731451661e-01 */ - .quad 0x3FD10B42A764D8F9 /* A01 = +2.663122782427219115142e-01 */ - .quad 0xBFB3633351D3D70F /* A02 = -7.573242900602060456716e-02 */ - .quad 0x3F7E079E30FF899C /* A03 = +7.331483779099558922843e-03 */ - .quad 0x3FE7202CE08A88C4 /* A00 = +7.226776490754436288455e-01 */ - .quad 0x3FCC973EB5662B01 /* A01 = +2.233656297433626314319e-01 */ - .quad 0xBFAF70A455F9920B /* A02 = -6.140626477716545211782e-02 */ - .quad 0x3F77812411CE99B6 /* A03 = +5.738392731393584730859e-03 */ - .quad 0x3FE85879424095B1 /* A00 = +7.608000082006382003286e-01 */ - .quad 0x3FC7E73BD1674D84 /* A01 = +1.867441914060742336190e-01 */ - .quad 0xBFA96F84E4BF333B /* A02 = -4.967894832916504993525e-02 */ - .quad 0x3F72606DDCA6E117 /* A03 = +4.486493251924870105662e-03 */ - .quad 0x3FE96BFE4957F4DD /* A00 = +7.944327766887472330737e-01 */ - .quad 0x3FC3ED4780D25478 /* A01 = +1.556786898624158421711e-01 */ - .quad 0xBFA489C5F9A56B58 /* A02 = -4.011362717093075458408e-02 */ - .quad 0x3F6CB5DC17E9AD2A /* A03 = +3.504686231556104931972e-03 */ - .quad 0x3FEA5D9CB2F41234 /* A00 = +8.239272589858672724006e-01 */ - .quad 0x3FC091A758374DCF /* A01 = +1.294449978582705440555e-01 */ - .quad 0xBFA08E436D4B5CE0 /* A02 = -3.233538350257858517978e-02 */ - .quad 0x3F666997AD53E6B7 /* A03 = +2.735897297154145629133e-03 */ - .quad 0x3FEB3060342CB850 /* A00 = +8.496552485501158713532e-01 */ - .quad 0x3FBB7D30BBC7DC1B /* A01 = +1.073790033768634993860e-01 */ - .quad 0xBF9AA6BA3443D9E3 /* A02 = -2.602663940430173170060e-02 */ - .quad 0x3F617CA764B7850B /* A03 = +2.134634914668814050648e-03 */ - .quad 0x3FEBE759A6A0C7B8 /* A00 = +8.719909910635044170135e-01 */ - .quad 0x3FB6C10DE6A703FF /* A01 = +8.888327485239243264115e-02 */ - .quad 0xBF956C566D8BE1F6 /* A02 = -2.092108768099084498138e-02 */ - .quad 0x3F5B46D1A4A59CF8 /* A03 = +1.664833764687232917079e-03 */ - .quad 0x3FEC858494887A04 /* A00 = +8.912985707318630268503e-01 */ - .quad 0x3FB2CC31F543394D /* A01 = +7.342827070099140762682e-02 */ - .quad 0xBF9133477FF69137 /* A02 = -1.679717749142747504343e-02 */ - .quad 0x3F5544482FBB4DA5 /* A03 = +1.298017973501022466823e-03 */ - .quad 0x3FED0DB59D0E32E9 /* A00 = +9.079235141267335551518e-01 */ - .quad 0x3FAF006BAFFC6EF4 /* A01 = +6.055008433597022787787e-02 */ - .quad 0xBF8B97146FA2B97A /* A02 = -1.347175565419144252499e-02 */ - .quad 0x3F5093B01F4CDC69 /* A03 = +1.011774057770665211434e-03 */ - .quad 0x3FEDB487C3EC457C /* A00 = +9.282873942012623835751e-01 */ - .quad 0x3FA7390C09D0BD1D /* A01 = +4.535710925881118044112e-02 */ - .quad 0xBF83D9F7C3181106 /* A02 = -9.693084374710735778846e-03 */ - .quad 0x3F46E34A0A3C0E64 /* A03 = +6.984817050299072134500e-04 */ - .quad 0x3FEE5FFCB4E6EB00 /* A00 = +9.492171796076434020506e-01 */ - .quad 0x3F9F4913ED00AADF /* A01 = +3.055220731782070861526e-02 */ - .quad 0xBF79670BD0E59B5C /* A02 = -6.201788097633133961528e-03 */ - .quad 0x3F3BC998EBCAF96D /* A03 = +4.240034429975534616304e-04 */ - .quad 0x3FEEDBA41E9542FE /* A00 = +9.643116566968215064293e-01 */ - .quad 0x3F94F5DD18D9C24D /* A01 = +2.046914543319848858727e-02 */ - .quad 0xBF7034896AA122B9 /* A02 = -3.956352980886528904192e-03 */ - .quad 0x3F30DCCB47810B39 /* A03 = +2.573009765038273091199e-04 */ - .quad 0x3FEF33F2882520ED /* A00 = +9.750912341196716903724e-01 */ - .quad 0x3F8BF37F2CF553FF /* A01 = +1.364802699996836392315e-02 */ - .quad 0xBF649F6F05A69619 /* A02 = -2.517430152880317534986e-03 */ - .quad 0x3F247623C950AAC9 /* A03 = +1.561087307505231250044e-04 */ - .quad 0x3FEF727757751741 /* A00 = +9.827229221489021115943e-01 */ - .quad 0x3F828E67912C4400 /* A01 = +9.060677640748693306705e-03 */ - .quad 0xBF5A2F51A806CC2C /* A02 = -1.598195784123355826789e-03 */ - .quad 0x3F18D35D7687E613 /* A03 = +9.470231965016282719549e-05 */ - .quad 0x3FEF9E6325C5942A /* A00 = +9.880843866091073568469e-01 */ - .quad 0x3F788AB117618F76 /* A01 = +5.991641772286606867914e-03 */ - .quad 0xBF5096EAB0B1EA89 /* A02 = -1.012543859160305046233e-03 */ - .quad 0x3F0E1E50EC4435AB /* A03 = +5.744633156910412119652e-05 */ - .quad 0x3FEFBD0784049369 /* A00 = +9.918248728250605994461e-01 */ - .quad 0x3F702BBD8294035F /* A01 = +3.947963975634432264028e-03 */ - .quad 0xBF44FB55E0F00593 /* A02 = -6.403130845457509273330e-04 */ - .quad 0x3F0244DCD723230A /* A03 = +3.484534217219031730379e-05 */ - .quad 0x3FEFD245E2366A43 /* A00 = +9.944180887426415926811e-01 */ - .quad 0x3F653D82EC088433 /* A01 = +2.592807490387838333795e-03 */ - .quad 0xBF3A7DF75E013CB8 /* A02 = -4.042366908878036561859e-04 */ - .quad 0x3EF6298E69F991CD /* A03 = +2.113564425911141559972e-05 */ - .quad 0x3FEFE0EAA508BC69 /* A00 = +9.962056372950317539861e-01 */ - .quad 0x3F5BD0771AF3FDDA /* A01 = +1.697651208644282514598e-03 */ - .quad 0xBF30B2E1254DE571 /* A02 = -2.548026725928887099328e-04 */ - .quad 0x3EEAE28B70EC0256 /* A03 = +1.281973848454955042307e-05 */ - .quad 0x3FEFEAF5303D7F96 /* A00 = +9.974313680831865536192e-01 */ - .quad 0x3F5229111365657E /* A01 = +1.108423877289460134782e-03 */ - .quad 0xBF250572D04DFE66 /* A02 = -1.603796628408704519168e-04 */ - .quad 0x3EE04E89BB57C981 /* A03 = +7.775682983689149966743e-06 */ - .quad 0x3FEFF1CF52F1CF44 /* A00 = +9.982678051005469122003e-01 */ - .quad 0x3F47A71316147CEB /* A01 = +7.218211359577819110842e-04 */ - .quad 0xBF1A6D7604055719 /* A02 = -1.008132248946049582547e-04 */ - .quad 0x3ED3C8047586A85C /* A03 = +4.716233739913014633626e-06 */ - .quad 0x3FEFF6770369EF69 /* A00 = +9.988360468555416149528e-01 */ - .quad 0x3F3EBB261180FBF0 /* A01 = +4.689186039321105101130e-04 */ - .quad 0xBF1097754FE19D7F /* A02 = -6.329206004950480057066e-05 */ - .quad 0x3EC7FEFF83BCA0A7 /* A03 = +2.860556404988488738366e-06 */ - .quad 0x3FEFF99D42371AC4 /* A00 = +9.992204945818561334647e-01 */ - .quad 0x3F33EB2AEC271F59 /* A01 = +3.039340773764907474054e-04 */ - .quad 0xBF04CF18E0FC0D79 /* A02 = -3.968996690952969588805e-05 */ - .quad 0x3EBD1BDBD6019BE9 /* A03 = +1.735021065507727833886e-06 */ - .quad 0x3FEFFBBCA32B0D91 /* A00 = +9.994795977476532700123e-01 */ - .quad 0x3F29C41E1615110A /* A01 = +1.965796209707565346710e-04 */ - .quad 0xBEFA11F93D9DCB5A /* A02 = -2.486248909101414873235e-05 */ - .quad 0x3EB1A7CA4546F7A7 /* A03 = +1.052345642723709228769e-06 */ - .quad 0x3FEFFD298B8E8DE2 /* A00 = +9.996535993308806045121e-01 */ - .quad 0x3F20A1C42D523C5B /* A01 = +1.268913244172078754520e-04 */ - .quad 0xBEF0507A364AFAE4 /* A02 = -1.555859070622834605755e-05 */ - .quad 0x3EA56ACA17E7CDF4 /* A03 = +6.382806956848098872313e-07 */ - .quad 0x3FEFFE1DC82BA5A3 /* A00 = +9.997700604991915929176e-01 */ - .quad 0x3F156E73B90F1769 /* A01 = +8.175450626798714452801e-05 */ - .quad 0xBEE4663579D0A09F /* A02 = -9.727122057226747625365e-06 */ - .quad 0x3E99FAF6FEC5D4C1 /* A03 = +3.871371052824002996020e-07 */ - .quad 0x3FEFFEF8D0BB5E81 /* A00 = +9.998745037837154514548e-01 */ - .quad 0x3F06686DA18D39C3 /* A01 = +4.273972098777251447726e-05 */ - .quad 0xBED46BC298073E90 /* A02 = -4.868731025855742842491e-06 */ - .quad 0x3E88E42286B9D0FD /* A03 = +1.854535328530838170114e-07 */ - .quad 0x3FEFFF8DBC68DDC7 /* A00 = +9.999455146670975791423e-01 */ - .quad 0x3EF26B2953A80AF0 /* A01 = +1.756534514108903368909e-05 */ - .quad 0xBEBFC4472D580F83 /* A02 = -1.893443529411295465239e-06 */ - .quad 0x3E72505B4553D19F /* A03 = +6.822456673547912277047e-08 */ - .quad 0x3FEFFFCED1276609 /* A00 = +9.999765477215883935358e-01 */ - .quad 0x3EDE1A94C7CC58F5 /* A01 = +7.177313020153979672606e-06 */ - .quad 0xBEA8A2C988744E57 /* A02 = -7.342066660497443762363e-07 */ - .quad 0x3E5AF30036BBBAF4 /* A03 = +2.509841882843541084885e-08 */ - .quad 0x3FEFFFEAFE70FCFC /* A00 = +9.999899835164849370983e-01 */ - .quad 0x3EC879175E3549F5 /* A01 = +2.917410471128503564412e-06 */ - .quad 0xBE930E36677D1813 /* A02 = -2.839493400307523115929e-07 */ - .quad 0x3E43D4005B42D48F /* A03 = +9.233192745401904898013e-09 */ - .quad 0x3ff0000000000000 - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .align 32 - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */ - .align 32 - .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */ - .align 32 - .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */ - .align 32 - .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */ - .align 32 - .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */ - .align 32 - .long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */ - .align 32 - .type __svml_stanh_data_internal,@object - .size __svml_stanh_data_internal,.-__svml_stanh_data_internal + vcvtps2pd %xmm4, %ymm5 + + vextractf128 $1, %ymm4, %xmm4 + vcvtps2pd %xmm4, %ymm4 + + vmovdqu 16(%rcx, %rax), %xmm2 + vinsertf128 $1, 16(%r11, %rax), %ymm2, %ymm2 + + vfmadd213pd %ymm3, %ymm5, %ymm1 + + vmovupd 16(%rdx, %rax), %xmm3 + vinsertf128 $1, 16(%r10, %rax), %ymm3, %ymm3 + + vunpcklpd %ymm3, %ymm2, %ymm10 + vunpckhpd %ymm3, %ymm2, %ymm2 + + vfmadd213pd %ymm10, %ymm4, %ymm2 + vfmadd213pd %ymm6, %ymm4, %ymm2 + vfmadd213pd %ymm7, %ymm4, %ymm2 + vcvtpd2ps %ymm2, %xmm2 + + vmovdqu (%r9, %rax), %xmm7 + vinsertf128 $1, (%rdi, %rax), %ymm7, %ymm7 + + vmovupd (%r8, %rax), %xmm3 + vinsertf128 $1, (%rsi, %rax), %ymm3, %ymm3 + + vunpckhpd %ymm3, %ymm7, %ymm4 + vunpcklpd %ymm3, %ymm7, %ymm7 + + vfmadd213pd %ymm4, %ymm5, %ymm1 + vfmadd213pd %ymm7, %ymm5, %ymm1 + + + vcvtpd2ps %ymm1, %xmm1 + vinsertf128 $1, %xmm2, %ymm1, %ymm1 + + vmovmskps %ymm15, %edx + vandnps %ymm0, %ymm11, %ymm2 + testl %edx, %edx + /* Go to special inputs processing branch. */ + jne L(SPECIAL_VALUES_BRANCH) + /* Wait until after branch of write over ymm0. */ + vorps %ymm2, %ymm1, %ymm0 + /* No stack restoration on the fastpath. */ + ret + + +L(SPECIAL_VALUES_BRANCH): + pushq %rbp + /* Need to callee save registers to preserve state across tanhf calls. + */ + pushq %r12 + pushq %r13 + movq %rsp, %rbp + + /* Align stack and make room for 2x ymm vectors. */ + andq $-32, %rsp + addq $-64, %rsp + + /* Save all already computed inputs. */ + vorps %ymm2, %ymm1, %ymm1 + vmovups %ymm1, (%rsp) + /* Save origional input (ymm0 unchanged up to this point). */ + vmovups %ymm0, 32(%rsp) + + vzeroupper + + /* edx has 1s where there was a special value that needs to be handled + by a tanhf call. */ + movl %edx, %r13d +L(SPECIAL_VALUES_LOOP): + /* use r12 as index for special value that is saved across calls to + tanhf. We technically don't need a callee save register here as offset + to rsp is always [0, 28] so we can restore rsp by realigning to 64. + Essentially the tradeoff is 1 extra save/restore vs 2 extra instructions + in the loop. */ + xorl %r12d, %r12d + tzcntl %r13d, %r12d + + /* Scalar math fucntion call to process special input. */ + movss 32(%rsp, %r12, 4), %xmm0 + call tanhf@PLT + /* No good way to avoid the store-forwarding fault this will cause on + return. `lfence` avoids the SF fault but at greater cost as it + serialized stack/callee save restoration. */ + movss %xmm0, (%rsp, %r12, 4) + + blsr %r13d, %r13d + jnz L(SPECIAL_VALUES_LOOP) + + /* All results have been written to 32(%rsp). */ + vmovups (%rsp), %ymm0 + movq %rbp, %rsp + popq %r13 + popq %r12 + popq %rbp + ret +END(_ZGVdN8v_tanhf_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S new file mode 100644 index 0000000000..f3413db550 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S @@ -0,0 +1,600 @@ +#define _iExpMantMask 0 +#define _iMinIdxOfsMask 32 +#define _iMaxIdxMask 64 +#define _iExpMask 96 +#define _sAbsMask 128 +#define _lookupTable 160 + +#define TANHF_DATA(offset) ((offset) + __svml_stanh_data_internal_avx2) +#ifndef ONLY_DECL_OFFSET + .section .rodata, "a" + .align 32 + +# ifdef __svml_stanh_data_internal_typedef + typedef unsigned int VUINT32; + typedef struct + { + __declspec (align(32))VUINT32 _iExpMantMask[8][1]; + __declspec (align(32))VUINT32 _iMinIdxOfsMask[8][1]; + __declspec (align(32))VUINT32 _iMaxIdxMask[8][1]; + __declspec (align(32))VUINT32 _iExpMask[8][1]; + __declspec (align(32))VUINT32 _sAbsMask[8][1]; + __declspec (align(32))VUINT32 _lookupTable[(134 * 4)][2]; + }__svml_stanh_data_internal; +# endif + + +__svml_stanh_data_internal: + .globl __svml_stanh_data_internal_avx2 +__svml_stanh_data_internal_avx2: + .align 32 + /* _iExpMantMask. */ + .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 + .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 + + .align 32 + /* _iMinIdxOfsMask. */ + .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 + .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 + + .align 32 + /* _iMaxIdxMask. */ + .long 0x04280000, 0x04280000, 0x04280000, 0x04280000 + .long 0x04280000, 0x04280000, 0x04280000, 0x04280000 + + .align 32 + /* _iExpMask. */ + .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 + .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 + + .align 32 + /* _sAbsMask. */ + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + + .align 32 + /* _lookupTable. */ + /* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */ + .quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01. */ + .quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00. */ + .quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06. */ + .quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01. */ + .quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08. */ + .quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00. */ + .quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05. */ + .quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01. */ + .quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08. */ + .quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00. */ + .quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04. */ + .quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01. */ + .quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08. */ + .quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00. */ + .quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04. */ + .quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01. */ + .quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08. */ + .quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00. */ + .quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04. */ + .quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01. */ + .quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08. */ + .quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00. */ + .quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04. */ + .quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01. */ + .quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08. */ + .quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00. */ + .quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04. */ + .quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01. */ + .quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08. */ + .quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00. */ + .quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04. */ + .quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01. */ + .quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07. */ + .quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00. */ + .quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04. */ + .quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01. */ + .quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07. */ + .quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00. */ + .quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04. */ + .quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01. */ + .quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07. */ + .quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00. */ + .quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04. */ + .quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01. */ + .quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07. */ + .quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00. */ + .quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04. */ + .quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01. */ + .quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07. */ + .quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00. */ + .quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04. */ + .quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01. */ + .quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07. */ + .quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00. */ + .quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04. */ + .quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01. */ + .quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07. */ + .quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00. */ + .quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04. */ + .quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01. */ + .quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07. */ + .quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00. */ + .quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04. */ + .quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01. */ + .quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07. */ + .quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00. */ + .quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04. */ + .quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01. */ + .quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07. */ + .quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00. */ + .quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04. */ + .quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01. */ + .quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07. */ + .quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00. */ + .quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04. */ + .quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01. */ + .quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06. */ + .quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00. */ + .quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04. */ + .quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01. */ + .quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06. */ + .quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00. */ + .quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03. */ + .quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01. */ + .quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06. */ + .quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00. */ + .quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03. */ + .quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01. */ + .quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06. */ + .quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00. */ + .quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03. */ + .quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01. */ + .quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06. */ + .quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00. */ + .quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03. */ + .quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01. */ + .quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06. */ + .quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00. */ + .quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03. */ + .quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01. */ + .quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06. */ + .quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00. */ + .quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03. */ + .quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01. */ + .quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06. */ + .quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00. */ + .quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03. */ + .quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01. */ + .quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06. */ + .quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00. */ + .quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03. */ + .quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01. */ + .quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06. */ + .quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00. */ + .quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03. */ + .quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01. */ + .quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06. */ + .quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00. */ + .quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03. */ + .quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01. */ + .quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05. */ + .quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00. */ + .quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03. */ + .quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01. */ + .quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05. */ + .quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00. */ + .quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03. */ + .quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01. */ + .quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05. */ + .quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00. */ + .quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03. */ + .quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01. */ + .quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05. */ + .quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00. */ + .quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03. */ + .quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01. */ + .quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05. */ + .quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00. */ + .quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03. */ + .quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01. */ + .quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05. */ + .quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00. */ + .quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03. */ + .quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01. */ + .quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05. */ + .quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00. */ + .quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03. */ + .quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01. */ + .quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05. */ + .quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00. */ + .quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02. */ + .quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01. */ + .quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05. */ + .quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00. */ + .quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02. */ + .quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01. */ + .quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05. */ + .quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00. */ + .quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02. */ + .quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01. */ + .quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04. */ + .quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00. */ + .quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02. */ + .quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01. */ + .quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04. */ + .quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00. */ + .quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02. */ + .quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01. */ + .quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04. */ + .quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00. */ + .quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02. */ + .quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01. */ + .quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04. */ + .quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00. */ + .quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02. */ + .quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01. */ + .quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04. */ + .quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00. */ + .quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02. */ + .quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01. */ + .quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04. */ + .quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00. */ + .quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02. */ + .quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01. */ + .quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04. */ + .quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00. */ + .quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02. */ + .quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01. */ + .quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04. */ + .quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00. */ + .quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02. */ + .quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01. */ + .quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04. */ + .quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00. */ + .quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02. */ + .quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01. */ + .quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04. */ + .quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00. */ + .quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02. */ + .quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01. */ + .quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04. */ + .quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00. */ + .quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02. */ + .quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01. */ + .quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04. */ + .quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00. */ + .quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02. */ + .quad 0xBFCDF78F156BE7CF /* A03 = -2.341173987004467604844e-01. */ + .quad 0xBF5308ED74E5C7A6 /* A00 = -1.161796466103906435435e-03. */ + .quad 0x3FF03B5986412ECB /* A01 = +1.014489674026594512313e+00. */ + .quad 0xBFB087EBA88DCC3F /* A02 = -6.457398285947223148806e-02. */ + .quad 0xBFCCBB9BD134862F /* A03 = -2.244753619680052991736e-01. */ + .quad 0xBF57FA23C00DF4B5 /* A00 = -1.463446533505758208674e-03. */ + .quad 0x3FF0473558A1BCC0 /* A01 = +1.017384859292903342975e+00. */ + .quad 0xBFB2E702BC6360EF /* A02 = -7.383744334527241048871e-02. */ + .quad 0xBFCB77D546379288 /* A03 = -2.145945160729250122955e-01. */ + .quad 0xBF5DD12971557F71 /* A00 = -1.819887610814388068450e-03. */ + .quad 0x3FF0548DDF5000A8 /* A01 = +1.020643112482540360020e+00. */ + .quad 0xBFB571B63DA186E1 /* A02 = -8.376635555898871710045e-02. */ + .quad 0xBFCA2D5202605148 /* A03 = -2.045080672838912594358e-01. */ + .quad 0xBF6252B1AD5D4F17 /* A00 = -2.236697221556737096709e-03. */ + .quad 0x3FF063738A910BF7 /* A01 = +1.024280110622155737232e+00. */ + .quad 0xBFB8270C8E6B601B /* A02 = -9.434584118878357184013e-02. */ + .quad 0xBFC8DD27D950A07E /* A03 = -1.942491351230763441116e-01. */ + .quad 0xBF66470C91730CFC /* A00 = -2.719425723258004842786e-03. */ + .quad 0x3FF073F468FCF331 /* A01 = +1.028309259519300633556e+00. */ + .quad 0xBFBB05C2952191E4 /* A02 = -1.055566419686964629854e-01. */ + .quad 0xBFC7886A770DE2BD /* A03 = -1.838505822486435070662e-01. */ + .quad 0xBF6AD114AC8E98EC /* A00 = -3.273525599485007861467e-03. */ + .quad 0x3FF0861BF53E5226 /* A01 = +1.032741506559554434119e+00. */ + .quad 0xBFBE0C4F9B461507 /* A02 = -1.173753503881763554650e-01. */ + .quad 0xBFC6302A037CDE3A /* A03 = -1.733448521642786954722e-01. */ + .quad 0xBF6FFBDE2A6C2AF8 /* A00 = -3.904279630096648551207e-03. */ + .quad 0x3FF099F2EB8E7DA3 /* A01 = +1.037585182326304034106e+00. */ + .quad 0xBFC09C74D192DDF0 /* A02 = -1.297746680554463516444e-01. */ + .quad 0xBFC4D571D8E3079F /* A03 = -1.627638157861470424859e-01. */ + .quad 0xBF72E8FDC0B952AA /* A00 = -4.616728994353872309042e-03. */ + .quad 0x3FF0AF7F273C9533 /* A01 = +1.042845872181101141152e+00. */ + .quad 0xBFC244C512736F10 /* A02 = -1.427236881344176033792e-01. */ + .quad 0xBFC379474F58B902 /* A03 = -1.521386277613104298645e-01. */ + .quad 0xBF762EABAF17395B /* A00 = -5.415602341101023557701e-03. */ + .quad 0x3FF0C6C3886F63FB /* A01 = +1.048526318502125631582e+00. */ + .quad 0xBFC3FDF9918EA12A /* A02 = -1.561881981590514389957e-01. */ + .quad 0xBFC21CA89ECAB895 /* A03 = -1.414995932913753196036e-01. */ + .quad 0xBF79D387CE5B2BAE /* A00 = -6.305246822828998107258e-03. */ + .quad 0x3FF0DFBFE2346376 /* A01 = +1.054626353847394337748e+00. */ + .quad 0xBFC5C6DA43602620 /* A02 = -1.701309994680721970894e-01. */ + .quad 0xBFC0C08BD8DB6631 /* A03 = -1.308760460731704100557e-01. */ + .quad 0xBF7DDBA8E8DA9060 /* A00 = -7.289562037531366334164e-03. */ + .quad 0x3FF0FA70F0D1B464 /* A01 = +1.061142864894713433443e+00. */ + .quad 0xBFC79E18D92BAA7C /* A02 = -1.845122394946264732241e-01. */ + .quad 0xBFBECBBBF74C2669 /* A03 = -1.202962378266875381749e-01. */ + .quad 0xBF81254E76EA25DA /* A00 = -8.371937755572145950511e-03. */ + .quad 0x3FF116D05835EBD0 /* A01 = +1.068069786618014660462e+00. */ + .quad 0xBFC982539E2ED224 /* A02 = -1.992897531869327609755e-01. */ + .quad 0xBFBC1B043C350159 /* A03 = -1.097872397413132278254e-01. */ + .quad 0xBF8391ACBA863403 /* A00 = -9.555196230190082448686e-03. */ + .quad 0x3FF134D4AA477FE2 /* A01 = +1.075398125794884141015e+00. */ + .quad 0xBFCB7218609FEAFB /* A02 = -2.144194099235717521079e-01. */ + .quad 0xBFB970A16CB88329 /* A03 = -9.937485603633135211599e-02. */ + .quad 0xBF87935088E48E8B /* A00 = -1.151144902957603431692e-02. */ + .quad 0x3FF1649892AD7DD3 /* A01 = +1.087059567413110938716e+00. */ + .quad 0xBFCE6971DDE75409 /* A02 = -2.375929196847723912089e-01. */ + .quad 0xBFB58291E88CB251 /* A03 = -8.402358939628952472223e-02. */ + .quad 0xBF8DB3A62C325325 /* A00 = -1.450280973794233242702e-02. */ + .quad 0x3FF1A9C900C6DEEA /* A01 = +1.103951457056548068891e+00. */ + .quad 0xBFD13DBC65B0E08E /* A02 = -2.693930619311765140012e-01. */ + .quad 0xBFB06696F62696D1 /* A03 = -6.406539449252625362252e-02. */ + .quad 0xBF92583699F2E27A /* A00 = -1.791463198307716858659e-02. */ + .quad 0x3FF1F451B85AA9F0 /* A01 = +1.122148246892376022288e+00. */ + .quad 0xBFD34FD5F8288180 /* A02 = -3.017477916164565954205e-01. */ + .quad 0xBFA6FB692825B683 /* A03 = -4.488686194495718900788e-02. */ + .quad 0xBF9641C26E673D6F /* A00 = -2.173522757385398448959e-02. */ + .quad 0x3FF24364DA5E2B07 /* A01 = +1.141453602790251542487e+00. */ + .quad 0xBFD564A5A5EF5890 /* A02 = -3.342680092295120530821e-01. */ + .quad 0xBF9B43712011A982 /* A03 = -2.662445791467283467968e-02. */ + .quad 0xBF9A901038EC2F39 /* A00 = -2.594018313816024226548e-02. */ + .quad 0x3FF2961356DFFEBA /* A01 = +1.161639537196534011088e+00. */ + .quad 0xBFD775EBB17198C7 /* A02 = -3.665723069046972759644e-01. */ + .quad 0xBF833B1A926CD462 /* A03 = -9.390075295963199591975e-03. */ + .quad 0xBF9F396A6A461B91 /* A00 = -3.049246095317987084727e-02. */ + .quad 0x3FF2EB53BAEF534B /* A01 = +1.182452898229899629357e+00. */ + .quad 0xBFD97DABF8AD8BBD /* A02 = -3.982953957076310058660e-01. */ + .quad 0x3F7B8F6A3E0F8837 /* A03 = +6.728568086119371925713e-03. */ + .quad 0xBFA21878590F8BAA /* A00 = -3.534294211546946951064e-02. */ + .quad 0x3FF34209790236E1 /* A01 = +1.203622315111197105253e+00. */ + .quad 0xBFDB764C0E71BECB /* A02 = -4.290952817018306997277e-01. */ + .quad 0x3F962FE0C03F84C0 /* A03 = +2.166701482190513949888e-02. */ + .quad 0xBFA4B36B9AD27ECC /* A00 = -4.043136849327097492868e-02. */ + .quad 0x3FF3990C5B12FC16 /* A01 = +1.224865298994477935679e+00. */ + .quad 0xBFDD5AABB0D01390 /* A02 = -4.586590983092770912322e-01. */ + .quad 0x3FA21DAF5CA162DB /* A03 = +3.538272863142363083844e-02. */ + .quad 0xBFA7645E4D7BF28B /* A00 = -4.568762489177399105378e-02. */ + .quad 0x3FF3EF2FD51C0D9F /* A01 = +1.245895225962932562069e+00. */ + .quad 0xBFDF26377E1B686E /* A02 = -4.867075664057044503963e-01. */ + .quad 0x3FA8803E756EE812 /* A03 = +4.785342391501513914509e-02. */ + .quad 0xBFAA210925C64413 /* A00 = -5.103329263796054643398e-02. */ + .quad 0x3FF44349F897D8E7 /* A01 = +1.266427966181760345066e+00. */ + .quad 0xBFE06A7B02C6D8E2 /* A02 = -5.129981092675530707226e-01. */ + .quad 0x3FAE3F194734F5D0 /* A03 = +5.907515520309980505687e-02. */ + .quad 0xBFACDE48F8A19BBB /* A00 = -5.638340029764018351832e-02. */ + .quad 0x3FF49439D5466582 /* A01 = +1.286187966447272845727e+00. */ + .quad 0xBFE131C7C1063DDC /* A02 = -5.373266954429101183166e-01. */ + .quad 0x3FB1ADEEC36AD805 /* A03 = +6.906025191241844940482e-02. */ + .quad 0xBFAF905D8F585680 /* A00 = -6.164829611604449866036e-02. */ + .quad 0x3FF4E0ED1FD27F99 /* A01 = +1.304913639360142818546e+00. */ + .quad 0xBFE1E7A859DC1D3D /* A02 = -5.595285182070380836095e-01. */ + .quad 0x3FB3ED018E4642A1 /* A03 = +7.783517573831001679086e-02. */ + .quad 0xBFB11595104160BA /* A00 = -6.673556944713512906198e-02. */ + .quad 0x3FF528650340490B /* A01 = +1.322361958217302513319e+00. */ + .quad 0xBFE28B14B40BC974 /* A02 = -5.794776455425521000109e-01. */ + .quad 0x3FB5DF49F5BAF6D7 /* A03 = +8.543836831355676453281e-02. */ + .quad 0xBFB2513A97344BA4 /* A00 = -7.155195418844911836587e-02. */ + .quad 0x3FF569BA0DB5EE14 /* A01 = +1.338312200124055273420e+00. */ + .quad 0xBFE31B53A8B67B20 /* A02 = -5.970857901737396389308e-01. */ + .quad 0x3FB787F297BB0544 /* A03 = +9.191814617499455275507e-02. */ + .quad 0xBFB37512E848FAFA /* A00 = -7.600515528700305112331e-02. */ + .quad 0x3FF5A41F33B403C8 /* A01 = +1.352568819013173495591e+00. */ + .quad 0xBFE397F6EA9A58A5 /* A02 = -6.123003561103997904880e-01. */ + .quad 0x3FB8EAA9FF25CA06 /* A03 = +9.733068923177520814782e-02. */ + .quad 0xBFB47B3E603AFC5D /* A00 = -8.000554894805263217439e-02. */ + .quad 0x3FF5D6E3EDE40487 /* A01 = +1.364963464031718975988e+00. */ + .quad 0xBFE400D5BCA6D631 /* A02 = -6.251019177058819709103e-01. */ + .quad 0x3FBA0B830ED567FE /* A03 = +1.017381583418739132707e-01. */ + .quad 0xBFB5BBFE8AC90496 /* A00 = -8.489981544791400103200e-02. */ + .quad 0x3FF612BA70107E95 /* A01 = +1.379572332145390989311e+00. */ + .quad 0xBFE477EAF1FA7693 /* A02 = -6.396383978023599814478e-01. */ + .quad 0x3FBB4784B7C08A95 /* A03 = +1.065600346196709652391e-01. */ + .quad 0xBFB6D5D940743939 /* A00 = -8.920057128509463473254e-02. */ + .quad 0x3FF644A8748F70CE /* A01 = +1.391762214006166953340e+00. */ + .quad 0xBFE4D646AB07EA37 /* A02 = -6.511567440459832267763e-01. */ + .quad 0x3FBC354F4E1D5292 /* A03 = +1.101884427747086558913e-01. */ + .quad 0xBFB7223D19E4F3D1 /* A00 = -9.036619074045339206069e-02. */ + .quad 0x3FF6518FEB42B7FA /* A01 = +1.394912642466350494175e+00. */ + .quad 0xBFE4ED86CB87498C /* A02 = -6.539949393430091184598e-01. */ + .quad 0x3FBC6D29F28CCA9B /* A03 = +1.110407082713131127205e-01. */ + .quad 0xBFB6878652FF6312 /* A00 = -8.800544287022329936754e-02. */ + .quad 0x3FF63948C302D040 /* A01 = +1.388985406648330922508e+00. */ + .quad 0xBFE4C4E2E7904E17 /* A02 = -6.490339777687407218920e-01. */ + .quad 0x3FBC127356CA1ABE /* A03 = +1.096565329445224612481e-01. */ + .quad 0xBFB4F5D18B0C91D6 /* A00 = -8.187589306596207427980e-02. */ + .quad 0x3FF5FD27EB7DD0B8 /* A01 = +1.374305648697413673176e+00. */ + .quad 0xBFE464E01A2B2FC6 /* A02 = -6.373138915164353601739e-01. */ + .quad 0x3FBB460547674A30 /* A03 = +1.065371798825160976065e-01. */ + .quad 0xBFB26642FA16A685 /* A00 = -7.187288861919156890412e-02. */ + .quad 0x3FF59F9BEDE1C95A /* A01 = +1.351467065073470141812e+00. */ + .quad 0xBFE3D67920C8FBEA /* A02 = -6.199308052381387046381e-01. */ + .quad 0x3FBA24F6A8D3CBC1 /* A03 = +1.021265184570401413078e-01. */ + .quad 0xBFADB5294794F097 /* A00 = -5.802277563859197656582e-02. */ + .quad 0x3FF523EA7B9CF453 /* A01 = +1.321268542159732772845e+00. */ + .quad 0xBFE322A8B55E35DB /* A02 = -5.979808370918208160205e-01. */ + .quad 0x3FB8C8673B1B3E37 /* A03 = +9.680791085269722928697e-02. */ + .quad 0xBFA4B7D661965C6A /* A00 = -4.046506825687219699450e-02. */ + .quad 0x3FF48DE3E2CE3122 /* A01 = +1.284641157110919085227e+00. */ + .quad 0xBFE251FED1A7F445 /* A02 = -5.725092024655472622285e-01. */ + .quad 0x3FB745699FCABDB9 /* A03 = +9.090290213747821701507e-02. */ + .quad 0xBF93E60456E4EE1D /* A00 = -1.943213253365004902773e-02. */ + .quad 0x3FF3E1A14E628A59 /* A01 = +1.242585474196536532432e+00. */ + .quad 0xBFE16C5AB660E876 /* A02 = -5.444768488007543094653e-01. */ + .quad 0x3FB5AD33AA8C188F /* A03 = +8.467410005332197397987e-02. */ + .quad 0x3F738C17C47C7961 /* A00 = +4.772274820224659853951e-03. */ + .quad 0x3FF3234DDE3BD146 /* A01 = +1.196119182682268355933e+00. */ + .quad 0xBFE078C0D77A9D3B /* A02 = -5.147403915952176722826e-01. */ + .quad 0x3FB40D74B3E276B8 /* A03 = +7.833032027925923568290e-02. */ + .quad 0x3FA0474BECC689C7 /* A00 = +3.179394975019849550746e-02. */ + .quad 0x3FF256FB4FA7D18A /* A01 = +1.146235762743432307076e+00. */ + .quad 0xBFDEFA8E3FB285E2 /* A02 = -4.840427038235174395098e-01. */ + .quad 0x3FB270C007493D59 /* A03 = +7.203293016322244446403e-02. */ + .quad 0x3FAF5BD51E479BDC /* A00 = +6.124750132203590768931e-02. */ + .quad 0x3FF18081D0B53BC5 /* A01 = +1.093873801484492647162e+00. */ + .quad 0xBFDCFE2439BD0C03 /* A02 = -4.530115665294831006626e-01. */ + .quad 0x3FB0DEFE5A45AFDD /* A03 = +6.590261176978580437424e-02. */ + .quad 0x3FB7BD5D2806EA26 /* A00 = +9.273321368429118805032e-02. */ + .quad 0x3FF0A369E35B4440 /* A01 = +1.039895904647224256223e+00. */ + .quad 0xBFDB04BC5C9951E7 /* A02 = -4.221640495573226181669e-01. */ + .quad 0x3FAEBBBAA9D6DEEF /* A03 = +6.002600978120919278380e-02. */ + .quad 0x3FC01BE411098DBC /* A00 = +1.258511622610124502941e-01. */ + .quad 0x3FEF85BDABC031C1 /* A01 = +9.850757936961188621083e-01. */ + .quad 0xBFD91521375097C2 /* A02 = -3.919146576102968682065e-01. */ + .quad 0x3FABE26F0086D982 /* A03 = +5.446192628317005068883e-02. */ + .quad 0x3FC481D7FF5776B9 /* A00 = +1.602125164781023347604e-01. */ + .quad 0x3FEDC3506C1E7218 /* A01 = +9.300920592973538347792e-01. */ + .quad 0xBFD7349A88DA7D4F /* A02 = -3.625856720409119104964e-01. */ + .quad 0x3FA936E2DFF8E2AE /* A03 = +4.924687370334389358018e-02. */ + .quad 0x3FC90471F96FA27A /* A00 = +1.954481571149420671141e-01. */ + .quad 0x3FEC0451601987A2 /* A01 = +8.755270840595026360376e-01. */ + .quad 0xBFD5671CD4B898DC /* A02 = -3.344184949259110251063e-01. */ + .quad 0x3FA6BB9594603B67 /* A03 = +4.439990459660841243261e-02. */ + .quad 0x3FCFD8ADB9ED944C /* A00 = +2.488000066615846384011e-01. */ + .quad 0x3FE978C073F6809A /* A01 = +7.959902062321078108909e-01. */ + .quad 0xBFD2DF7E00BCD5A9 /* A02 = -2.948908812716931060471e-01. */ + .quad 0x3FA3614033D490B2 /* A03 = +3.785133965200894456959e-02. */ + .quad 0x3FD4846A12AFE5A0 /* A00 = +3.205819303981005674586e-01. */ + .quad 0x3FE63A1147D40472 /* A01 = +6.945883181471244061100e-01. */ + .quad 0xBFCFA2268AD34450 /* A02 = -2.471359422548027318101e-01. */ + .quad 0x3F9F150201D9FFE0 /* A03 = +3.035357605267552383310e-02. */ + .quad 0x3FD9018641F82BEB /* A00 = +3.907180446846598154131e-01. */ + .quad 0x3FE33B7C220FFBDC /* A01 = +6.010113396913498995389e-01. */ + .quad 0xBFCA4E4187E29C86 /* A02 = -2.055131829740483584423e-01. */ + .quad 0x3F98C30CED19F8F4 /* A03 = +2.418155858185229434287e-02. */ + .quad 0x3FDD4B8255BEB078 /* A00 = +4.577337109901757905561e-01. */ + .quad 0x3FE0858B19D3A49B /* A01 = +5.163016800335243905451e-01. */ + .quad 0xBFC5BC929EACE564 /* A02 = -1.698172831327539045176e-01. */ + .quad 0x3F93A083CE57DE2B /* A03 = +1.916700312537337677621e-02. */ + .quad 0x3FE0A8E5E039295C /* A00 = +5.206174258576470315063e-01. */ + .quad 0x3FDC35E1234583FE /* A01 = +4.407885403107342225937e-01. */ + .quad 0xBFC1DE034E31AEB9 /* A02 = -1.395877963835710222629e-01. */ + .quad 0x3F8EFDEBB3471BDC /* A03 = +1.513275280821162888101e-02. */ + .quad 0x3FE2851B603CB2A5 /* A00 = +5.787484054213406503564e-01. */ + .quad 0x3FD7F4A44ABBB286 /* A01 = +3.743067483726821853551e-01. */ + .quad 0xBFBD3EEB67087DE7 /* A02 = -1.142413260026767657385e-01. */ + .quad 0x3F8864F38329E8BD /* A03 = +1.191129917173260922836e-02. */ + .quad 0x3FE437DBE3C34AC1 /* A00 = +6.318187187665317283702e-01. */ + .quad 0x3FD43F6F789441B5 /* A01 = +3.163717916040938438194e-01. */ + .quad 0xBFB7D92E7901B9A4 /* A02 = -9.315767721429907277653e-02. */ + .quad 0x3F8327ED342308E1 /* A03 = +9.353497651663324544136e-03. */ + .quad 0x3FE5C0977766D55C /* A00 = +6.797597248138731451661e-01. */ + .quad 0x3FD10B42A764D8F9 /* A01 = +2.663122782427219115142e-01. */ + .quad 0xBFB3633351D3D70F /* A02 = -7.573242900602060456716e-02. */ + .quad 0x3F7E079E30FF899C /* A03 = +7.331483779099558922843e-03. */ + .quad 0x3FE7202CE08A88C4 /* A00 = +7.226776490754436288455e-01. */ + .quad 0x3FCC973EB5662B01 /* A01 = +2.233656297433626314319e-01. */ + .quad 0xBFAF70A455F9920B /* A02 = -6.140626477716545211782e-02. */ + .quad 0x3F77812411CE99B6 /* A03 = +5.738392731393584730859e-03. */ + .quad 0x3FE85879424095B1 /* A00 = +7.608000082006382003286e-01. */ + .quad 0x3FC7E73BD1674D84 /* A01 = +1.867441914060742336190e-01. */ + .quad 0xBFA96F84E4BF333B /* A02 = -4.967894832916504993525e-02. */ + .quad 0x3F72606DDCA6E117 /* A03 = +4.486493251924870105662e-03. */ + .quad 0x3FE96BFE4957F4DD /* A00 = +7.944327766887472330737e-01. */ + .quad 0x3FC3ED4780D25478 /* A01 = +1.556786898624158421711e-01. */ + .quad 0xBFA489C5F9A56B58 /* A02 = -4.011362717093075458408e-02. */ + .quad 0x3F6CB5DC17E9AD2A /* A03 = +3.504686231556104931972e-03. */ + .quad 0x3FEA5D9CB2F41234 /* A00 = +8.239272589858672724006e-01. */ + .quad 0x3FC091A758374DCF /* A01 = +1.294449978582705440555e-01. */ + .quad 0xBFA08E436D4B5CE0 /* A02 = -3.233538350257858517978e-02. */ + .quad 0x3F666997AD53E6B7 /* A03 = +2.735897297154145629133e-03. */ + .quad 0x3FEB3060342CB850 /* A00 = +8.496552485501158713532e-01. */ + .quad 0x3FBB7D30BBC7DC1B /* A01 = +1.073790033768634993860e-01. */ + .quad 0xBF9AA6BA3443D9E3 /* A02 = -2.602663940430173170060e-02. */ + .quad 0x3F617CA764B7850B /* A03 = +2.134634914668814050648e-03. */ + .quad 0x3FEBE759A6A0C7B8 /* A00 = +8.719909910635044170135e-01. */ + .quad 0x3FB6C10DE6A703FF /* A01 = +8.888327485239243264115e-02. */ + .quad 0xBF956C566D8BE1F6 /* A02 = -2.092108768099084498138e-02. */ + .quad 0x3F5B46D1A4A59CF8 /* A03 = +1.664833764687232917079e-03. */ + .quad 0x3FEC858494887A04 /* A00 = +8.912985707318630268503e-01. */ + .quad 0x3FB2CC31F543394D /* A01 = +7.342827070099140762682e-02. */ + .quad 0xBF9133477FF69137 /* A02 = -1.679717749142747504343e-02. */ + .quad 0x3F5544482FBB4DA5 /* A03 = +1.298017973501022466823e-03. */ + .quad 0x3FED0DB59D0E32E9 /* A00 = +9.079235141267335551518e-01. */ + .quad 0x3FAF006BAFFC6EF4 /* A01 = +6.055008433597022787787e-02. */ + .quad 0xBF8B97146FA2B97A /* A02 = -1.347175565419144252499e-02. */ + .quad 0x3F5093B01F4CDC69 /* A03 = +1.011774057770665211434e-03. */ + .quad 0x3FEDB487C3EC457C /* A00 = +9.282873942012623835751e-01. */ + .quad 0x3FA7390C09D0BD1D /* A01 = +4.535710925881118044112e-02. */ + .quad 0xBF83D9F7C3181106 /* A02 = -9.693084374710735778846e-03. */ + .quad 0x3F46E34A0A3C0E64 /* A03 = +6.984817050299072134500e-04. */ + .quad 0x3FEE5FFCB4E6EB00 /* A00 = +9.492171796076434020506e-01. */ + .quad 0x3F9F4913ED00AADF /* A01 = +3.055220731782070861526e-02. */ + .quad 0xBF79670BD0E59B5C /* A02 = -6.201788097633133961528e-03. */ + .quad 0x3F3BC998EBCAF96D /* A03 = +4.240034429975534616304e-04. */ + .quad 0x3FEEDBA41E9542FE /* A00 = +9.643116566968215064293e-01. */ + .quad 0x3F94F5DD18D9C24D /* A01 = +2.046914543319848858727e-02. */ + .quad 0xBF7034896AA122B9 /* A02 = -3.956352980886528904192e-03. */ + .quad 0x3F30DCCB47810B39 /* A03 = +2.573009765038273091199e-04. */ + .quad 0x3FEF33F2882520ED /* A00 = +9.750912341196716903724e-01. */ + .quad 0x3F8BF37F2CF553FF /* A01 = +1.364802699996836392315e-02. */ + .quad 0xBF649F6F05A69619 /* A02 = -2.517430152880317534986e-03. */ + .quad 0x3F247623C950AAC9 /* A03 = +1.561087307505231250044e-04. */ + .quad 0x3FEF727757751741 /* A00 = +9.827229221489021115943e-01. */ + .quad 0x3F828E67912C4400 /* A01 = +9.060677640748693306705e-03. */ + .quad 0xBF5A2F51A806CC2C /* A02 = -1.598195784123355826789e-03. */ + .quad 0x3F18D35D7687E613 /* A03 = +9.470231965016282719549e-05. */ + .quad 0x3FEF9E6325C5942A /* A00 = +9.880843866091073568469e-01. */ + .quad 0x3F788AB117618F76 /* A01 = +5.991641772286606867914e-03. */ + .quad 0xBF5096EAB0B1EA89 /* A02 = -1.012543859160305046233e-03. */ + .quad 0x3F0E1E50EC4435AB /* A03 = +5.744633156910412119652e-05. */ + .quad 0x3FEFBD0784049369 /* A00 = +9.918248728250605994461e-01. */ + .quad 0x3F702BBD8294035F /* A01 = +3.947963975634432264028e-03. */ + .quad 0xBF44FB55E0F00593 /* A02 = -6.403130845457509273330e-04. */ + .quad 0x3F0244DCD723230A /* A03 = +3.484534217219031730379e-05. */ + .quad 0x3FEFD245E2366A43 /* A00 = +9.944180887426415926811e-01. */ + .quad 0x3F653D82EC088433 /* A01 = +2.592807490387838333795e-03. */ + .quad 0xBF3A7DF75E013CB8 /* A02 = -4.042366908878036561859e-04. */ + .quad 0x3EF6298E69F991CD /* A03 = +2.113564425911141559972e-05. */ + .quad 0x3FEFE0EAA508BC69 /* A00 = +9.962056372950317539861e-01. */ + .quad 0x3F5BD0771AF3FDDA /* A01 = +1.697651208644282514598e-03. */ + .quad 0xBF30B2E1254DE571 /* A02 = -2.548026725928887099328e-04. */ + .quad 0x3EEAE28B70EC0256 /* A03 = +1.281973848454955042307e-05. */ + .quad 0x3FEFEAF5303D7F96 /* A00 = +9.974313680831865536192e-01. */ + .quad 0x3F5229111365657E /* A01 = +1.108423877289460134782e-03. */ + .quad 0xBF250572D04DFE66 /* A02 = -1.603796628408704519168e-04. */ + .quad 0x3EE04E89BB57C981 /* A03 = +7.775682983689149966743e-06. */ + .quad 0x3FEFF1CF52F1CF44 /* A00 = +9.982678051005469122003e-01. */ + .quad 0x3F47A71316147CEB /* A01 = +7.218211359577819110842e-04. */ + .quad 0xBF1A6D7604055719 /* A02 = -1.008132248946049582547e-04. */ + .quad 0x3ED3C8047586A85C /* A03 = +4.716233739913014633626e-06. */ + .quad 0x3FEFF6770369EF69 /* A00 = +9.988360468555416149528e-01. */ + .quad 0x3F3EBB261180FBF0 /* A01 = +4.689186039321105101130e-04. */ + .quad 0xBF1097754FE19D7F /* A02 = -6.329206004950480057066e-05. */ + .quad 0x3EC7FEFF83BCA0A7 /* A03 = +2.860556404988488738366e-06. */ + .quad 0x3FEFF99D42371AC4 /* A00 = +9.992204945818561334647e-01. */ + .quad 0x3F33EB2AEC271F59 /* A01 = +3.039340773764907474054e-04. */ + .quad 0xBF04CF18E0FC0D79 /* A02 = -3.968996690952969588805e-05. */ + .quad 0x3EBD1BDBD6019BE9 /* A03 = +1.735021065507727833886e-06. */ + .quad 0x3FEFFBBCA32B0D91 /* A00 = +9.994795977476532700123e-01. */ + .quad 0x3F29C41E1615110A /* A01 = +1.965796209707565346710e-04. */ + .quad 0xBEFA11F93D9DCB5A /* A02 = -2.486248909101414873235e-05. */ + .quad 0x3EB1A7CA4546F7A7 /* A03 = +1.052345642723709228769e-06. */ + .quad 0x3FEFFD298B8E8DE2 /* A00 = +9.996535993308806045121e-01. */ + .quad 0x3F20A1C42D523C5B /* A01 = +1.268913244172078754520e-04. */ + .quad 0xBEF0507A364AFAE4 /* A02 = -1.555859070622834605755e-05. */ + .quad 0x3EA56ACA17E7CDF4 /* A03 = +6.382806956848098872313e-07. */ + .quad 0x3FEFFE1DC82BA5A3 /* A00 = +9.997700604991915929176e-01. */ + .quad 0x3F156E73B90F1769 /* A01 = +8.175450626798714452801e-05. */ + .quad 0xBEE4663579D0A09F /* A02 = -9.727122057226747625365e-06. */ + .quad 0x3E99FAF6FEC5D4C1 /* A03 = +3.871371052824002996020e-07. */ + .quad 0x3FEFFEF8D0BB5E81 /* A00 = +9.998745037837154514548e-01. */ + .quad 0x3F06686DA18D39C3 /* A01 = +4.273972098777251447726e-05. */ + .quad 0xBED46BC298073E90 /* A02 = -4.868731025855742842491e-06. */ + .quad 0x3E88E42286B9D0FD /* A03 = +1.854535328530838170114e-07. */ + .quad 0x3FEFFF8DBC68DDC7 /* A00 = +9.999455146670975791423e-01. */ + .quad 0x3EF26B2953A80AF0 /* A01 = +1.756534514108903368909e-05. */ + .quad 0xBEBFC4472D580F83 /* A02 = -1.893443529411295465239e-06. */ + .quad 0x3E72505B4553D19F /* A03 = +6.822456673547912277047e-08. */ + .quad 0x3FEFFFCED1276609 /* A00 = +9.999765477215883935358e-01. */ + .quad 0x3EDE1A94C7CC58F5 /* A01 = +7.177313020153979672606e-06. */ + .quad 0xBEA8A2C988744E57 /* A02 = -7.342066660497443762363e-07. */ + .quad 0x3E5AF30036BBBAF4 /* A03 = +2.509841882843541084885e-08. */ + .quad 0x3FEFFFEAFE70FCFC /* A00 = +9.999899835164849370983e-01. */ + .quad 0x3EC879175E3549F5 /* A01 = +2.917410471128503564412e-06. */ + .quad 0xBE930E36677D1813 /* A02 = -2.839493400307523115929e-07. */ + .quad 0x3E43D4005B42D48F /* A03 = +9.233192745401904898013e-09. */ + .quad 0x3ff0000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .align 32 + .type __svml_stanh_data_internal_avx2, @object + .size __svml_stanh_data_internal_avx2, .-__svml_stanh_data_internal_avx2 + .type __svml_stanh_data_internal, @object + .size __svml_stanh_data_internal, .-__svml_stanh_data_internal +#endif From patchwork Tue Feb 1 20:58:40 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 50648 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id D23013857C4D for ; Tue, 1 Feb 2022 21:00:01 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D23013857C4D DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1643749201; bh=bdzDP25B0qR+rxcATIqemgRY/C+5LcG4/+3zxmFHhtk=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=PCK0ZkV+/5L6xTz4gEuxCuiZKPdO3sjyyT+RK2kNhl+Amu0czb/7HE0zTy7I/ytNl XJ+OfReVBbDtlYej2RaPXRsScAl4I0/C2XEo883/h+nfXnVgCbpJ3dHqEOxjPJNIQa 7U9wuuXhlnTf3oR+yhDx2lCU2jEpN+z8wyvZ676Q= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-io1-xd36.google.com (mail-io1-xd36.google.com [IPv6:2607:f8b0:4864:20::d36]) by sourceware.org (Postfix) with ESMTPS id 7F9743858C83 for ; Tue, 1 Feb 2022 20:58:54 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 7F9743858C83 Received: by mail-io1-xd36.google.com with SMTP id n17so22860647iod.4 for ; Tue, 01 Feb 2022 12:58:54 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=bdzDP25B0qR+rxcATIqemgRY/C+5LcG4/+3zxmFHhtk=; b=fndmccNU/+ukPdGLx+bCqThxLSwODT8gCfh+Rd188aHbFwVpaP4ZIsB7glaO99VOzS asEN/uDAB9cDG0/ALTNH1fdFcGsb485A7J0I02xtUmJjpxDNHyPPlr/WiGmYzV0iNZS6 ESKKAKf5w3bczCNzTQDQMwj+oYHqJH3Ilx6IOtsRuxWSLlEOUiYvrnvf89HnaejZ5GqM Db81FTBtk47aeDUgzZ6QXpX5cHoZ4T39PhV00LqCTfIoo3tXkp3qmhcbOmODT4q7y9iZ y+yllomq+BlYXN8StlrYqr7/1K8f+6mAha+HOOyYLh9JzlCcytUTj9gptX83wdjFRiHr kr4w== X-Gm-Message-State: AOAM531Agq4WjW0Vu331fVaaOYyblWgtVUVXxCF04M4STb8ZyKvISYVy 1yf/KgmGGKDQFgLtDWghf3rkVQUOWO0= X-Google-Smtp-Source: ABdhPJxNfjpspgGrnAE7rwnso6chzTRE0zl6gdQugwDAMGEfSbY+KgtZuhaXSObxkpjoKYq4PNvPRg== X-Received: by 2002:a05:6638:2584:: with SMTP id s4mr4764780jat.289.1643749133115; Tue, 01 Feb 2022 12:58:53 -0800 (PST) Received: from localhost.localdomain (node-17-161.flex.volo.net. [76.191.17.161]) by smtp.googlemail.com with ESMTPSA id u17sm9144308ilk.49.2022.02.01.12.58.52 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 01 Feb 2022 12:58:52 -0800 (PST) To: libc-alpha@sourceware.org Subject: [PATCH v2 3/3] x86: Optimize svml_s_tanhf_core_sse2.S Date: Tue, 1 Feb 2022 14:58:40 -0600 Message-Id: <20220201205840.2587777-3-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220201205840.2587777-1-goldstein.w.n@gmail.com> References: <20220201205840.2587777-1-goldstein.w.n@gmail.com> MIME-Version: 1.0 X-Spam-Status: No, score=-12.6 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_LOTSOFHASH, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" No bug. Optimizations are: 1. Reduce code size (-106 bytes). 2. Remove all sse2 rodata and reuse avx2 rodata (-4k+ bytes). 3. Remove register save/restores and stack adjustment from the fast path. 4. Slightly better instruction selection where possible. 5. Remove redundant registers moves. 6. Prefer registers that get smaller instruction encodings. This results in roughly a 15% performance improvement. Results from geomean of 40 benchtest runs: Function, New Time, Old Time, New / Old _ZGVbN4v_tanhf, 3.28, 3.852, 0.852 All math and mathvec tests are passing. --- .../fpu/multiarch/svml_s_tanhf4_core_sse4.S | 871 +++--------------- 1 file changed, 131 insertions(+), 740 deletions(-) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S index 50f753ffb3..716b06d640 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S @@ -70,763 +70,154 @@ * */ -/* Offsets for data table __svml_stanh_data_internal - */ -#define _dbP 0 -#define _sSignMask 4288 -#define _sAbsMask 4304 -#define _iExpMantMask 4320 -#define _iExpMask 4336 -#define _iMinIdxOfsMask 4352 -#define _iMaxIdxMask 4368 #include +#define ONLY_DECL_OFFSET +#include "svml_s_tanhf_rodata.S" + .text .section .text.sse4,"ax",@progbits ENTRY(_ZGVbN4v_tanhf_sse4) - subq $72, %rsp - cfi_def_cfa_offset(80) - movaps %xmm0, %xmm5 + /* Save copy of input in xmm12. */ + movaps %xmm0, %xmm12 -/* Here huge arguments, INF and NaNs are filtered out to callout. */ - movdqu _iExpMantMask+__svml_stanh_data_internal(%rip), %xmm9 - lea _dbP+16+__svml_stanh_data_internal(%rip), %r8 - pand %xmm5, %xmm9 + /* Here huge arguments, INF and NaNs are filtered out to callout. */ + movdqu TANHF_DATA(_iExpMantMask)(%rip), %xmm3 + pand %xmm0, %xmm3 -/* if VMIN, VMAX is defined for I type */ - pxor %xmm7, %xmm7 - movdqa %xmm9, %xmm6 - psubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %xmm9 -/* - * small table specific variables * - * Constant loading - */ - movdqu _iMaxIdxMask+__svml_stanh_data_internal(%rip), %xmm10 - movdqa %xmm9, %xmm11 - movdqa %xmm9, %xmm8 - pcmpgtd %xmm10, %xmm11 - pcmpgtd %xmm7, %xmm8 - movdqa %xmm11, %xmm14 - pand %xmm8, %xmm9 - andps %xmm11, %xmm10 - andnps %xmm9, %xmm14 - orps %xmm10, %xmm14 - psrld $14, %xmm14 - movd %xmm14, %edx - pshufd $1, %xmm14, %xmm12 - pshufd $2, %xmm14, %xmm13 - movd %xmm12, %ecx - pshufd $3, %xmm14, %xmm15 - movups _sAbsMask+__svml_stanh_data_internal(%rip), %xmm3 - movslq %edx, %rdx - andps %xmm5, %xmm3 - movslq %ecx, %rcx - pcmpgtd _iExpMask+__svml_stanh_data_internal(%rip), %xmm6 - movd %xmm13, %esi - movups -16(%rdx,%r8), %xmm2 - movaps %xmm2, %xmm0 - movd %xmm15, %edi - movmskps %xmm6, %eax - movups -16(%rcx,%r8), %xmm6 - unpcklpd %xmm6, %xmm0 - unpckhpd %xmm6, %xmm2 - cvtps2pd %xmm3, %xmm6 - movhlps %xmm3, %xmm3 - cvtps2pd %xmm3, %xmm3 - movslq %esi, %rsi - movslq %edi, %rdi - movups (%rcx,%r8), %xmm8 - movups (%rdx,%r8), %xmm12 - movups (%rsi,%r8), %xmm13 - movaps %xmm12, %xmm10 - movups (%rdi,%r8), %xmm9 - movaps %xmm13, %xmm11 - unpckhpd %xmm8, %xmm12 - unpckhpd %xmm9, %xmm13 - mulpd %xmm6, %xmm12 - mulpd %xmm3, %xmm13 - unpcklpd %xmm8, %xmm10 - unpcklpd %xmm9, %xmm11 - addpd %xmm10, %xmm12 - addpd %xmm11, %xmm13 - mulpd %xmm6, %xmm12 - mulpd %xmm3, %xmm13 - addpd %xmm2, %xmm12 - movups -16(%rsi,%r8), %xmm1 - movups -16(%rdi,%r8), %xmm7 - movaps %xmm1, %xmm14 - unpckhpd %xmm7, %xmm1 - addpd %xmm1, %xmm13 - mulpd %xmm12, %xmm6 - mulpd %xmm13, %xmm3 - addpd %xmm0, %xmm6 - unpcklpd %xmm7, %xmm14 - addpd %xmm14, %xmm3 - cvtpd2ps %xmm6, %xmm0 - cvtpd2ps %xmm3, %xmm1 - movups _sSignMask+__svml_stanh_data_internal(%rip), %xmm4 - movlhps %xmm1, %xmm0 - andps %xmm5, %xmm4 - orps %xmm4, %xmm0 - testl %eax, %eax - -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm5 - -/* Restore registers - * and exit the function - */ + /* Selection of arguments between [0, 0x04280000] into xmm3. */ + pxor %xmm7, %xmm7 + /* Save xmm3 for special values check at end. */ + movdqa %xmm3, %xmm8 + psubd TANHF_DATA(_iMinIdxOfsMask)(%rip), %xmm3 + pmaxsd %xmm7, %xmm3 + pminsd TANHF_DATA(_iMaxIdxMask)(%rip), %xmm3 + psrld $14, %xmm3 -L(EXIT): - addq $72, %rsp - cfi_def_cfa_offset(8) - ret - cfi_def_cfa_offset(80) + movq %xmm3, %rcx + movl %ecx, %edx + shrq $32, %rcx -/* Branch to process - * special inputs - */ + /* xmm8 contains mask of special values. */ + pcmpgtd TANHF_DATA(_iExpMask)(%rip), %xmm8 -L(SPECIAL_VALUES_BRANCH): - movups %xmm5, 32(%rsp) - movups %xmm0, 48(%rsp) - # LOE rbx rbp r12 r13 r14 r15 eax - - xorl %edx, %edx - movq %r12, 16(%rsp) - cfi_offset(12, -64) - movl %edx, %r12d - movq %r13, 8(%rsp) - cfi_offset(13, -72) - movl %eax, %r13d - movq %r14, (%rsp) - cfi_offset(14, -80) - # LOE rbx rbp r15 r12d r13d - -/* Range mask - * bits check - */ + pshufd $0x0e, %xmm3, %xmm3 + movq %xmm3, %rdi + movl %edi, %esi + shrq $32, %rdi -L(RANGEMASK_CHECK): - btl %r12d, %r13d + movaps TANHF_DATA(_sAbsMask)(%rip), %xmm1 + andps %xmm1, %xmm0 -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx rbp r15 r12d r13d + leaq TANHF_DATA(_lookupTable)(%rip), %rax + movups (%rdx, %rax), %xmm2 + movups (%rcx, %rax), %xmm6 -/* Special inputs - * processing loop - */ + movaps %xmm2, %xmm4 + movlhps %xmm6, %xmm4 + unpckhpd %xmm6, %xmm2 -L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $4, %r12d - -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx rbp r15 r12d r13d - - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - movups 48(%rsp), %xmm0 - -/* Go to exit */ - jmp L(EXIT) - cfi_offset(12, -64) - cfi_offset(13, -72) - cfi_offset(14, -80) - # LOE rbx rbp r12 r13 r14 r15 xmm0 - -/* Scalar math fucntion call - * to process special input - */ + cvtps2pd %xmm0, %xmm6 + movhlps %xmm0, %xmm0 + cvtps2pd %xmm0, %xmm0 -L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movss 32(%rsp,%r14,4), %xmm0 - call tanhf@PLT - # LOE rbx rbp r14 r15 r12d r13d xmm0 + movups 16(%rdx, %rax), %xmm5 + movups 16(%rsi, %rax), %xmm13 - movss %xmm0, 48(%rsp,%r14,4) + movaps %xmm5, %xmm10 + movaps %xmm13, %xmm11 -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx rbp r15 r12d r13d -END(_ZGVbN4v_tanhf_sse4) + movups 16(%rcx, %rax), %xmm7 + movups 16(%rdi, %rax), %xmm3 + + unpckhpd %xmm7, %xmm5 + unpckhpd %xmm3, %xmm13 + + mulpd %xmm6, %xmm5 + mulpd %xmm0, %xmm13 + + movlhps %xmm7, %xmm10 + movlhps %xmm3, %xmm11 + + addpd %xmm10, %xmm5 + addpd %xmm11, %xmm13 + + mulpd %xmm6, %xmm5 + mulpd %xmm0, %xmm13 + + addpd %xmm2, %xmm5 - .section .rodata, "a" - .align 16 - -#ifdef __svml_stanh_data_internal_typedef -typedef unsigned int VUINT32; -typedef struct -{ - __declspec(align(16)) VUINT32 _dbP[(134*4)][2]; - __declspec(align(16)) VUINT32 _sSignMask[4][1]; - __declspec(align(16)) VUINT32 _sAbsMask[4][1]; - __declspec(align(16)) VUINT32 _iExpMantMask[4][1]; - __declspec(align(16)) VUINT32 _iExpMask[4][1]; - __declspec(align(16)) VUINT32 _iMinIdxOfsMask[4][1]; - __declspec(align(16)) VUINT32 _iMaxIdxMask[4][1]; -} __svml_stanh_data_internal; -#endif -__svml_stanh_data_internal: - /* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */ - .quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01 */ - .quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00 */ - .quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06 */ - .quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01 */ - .quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08 */ - .quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00 */ - .quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05 */ - .quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01 */ - .quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08 */ - .quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00 */ - .quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04 */ - .quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01 */ - .quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08 */ - .quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00 */ - .quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04 */ - .quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01 */ - .quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08 */ - .quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00 */ - .quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04 */ - .quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01 */ - .quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08 */ - .quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00 */ - .quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04 */ - .quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01 */ - .quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08 */ - .quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00 */ - .quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04 */ - .quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01 */ - .quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08 */ - .quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00 */ - .quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04 */ - .quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01 */ - .quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07 */ - .quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00 */ - .quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04 */ - .quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01 */ - .quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07 */ - .quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00 */ - .quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04 */ - .quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01 */ - .quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07 */ - .quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00 */ - .quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04 */ - .quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01 */ - .quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07 */ - .quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00 */ - .quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04 */ - .quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01 */ - .quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07 */ - .quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00 */ - .quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04 */ - .quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01 */ - .quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07 */ - .quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00 */ - .quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04 */ - .quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01 */ - .quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07 */ - .quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00 */ - .quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04 */ - .quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01 */ - .quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07 */ - .quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00 */ - .quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04 */ - .quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01 */ - .quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07 */ - .quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00 */ - .quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04 */ - .quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01 */ - .quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07 */ - .quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00 */ - .quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04 */ - .quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01 */ - .quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07 */ - .quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00 */ - .quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04 */ - .quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01 */ - .quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06 */ - .quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00 */ - .quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04 */ - .quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01 */ - .quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06 */ - .quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00 */ - .quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03 */ - .quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01 */ - .quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06 */ - .quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00 */ - .quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03 */ - .quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01 */ - .quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06 */ - .quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00 */ - .quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03 */ - .quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01 */ - .quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06 */ - .quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00 */ - .quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03 */ - .quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01 */ - .quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06 */ - .quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00 */ - .quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03 */ - .quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01 */ - .quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06 */ - .quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00 */ - .quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03 */ - .quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01 */ - .quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06 */ - .quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00 */ - .quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03 */ - .quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01 */ - .quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06 */ - .quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00 */ - .quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03 */ - .quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01 */ - .quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06 */ - .quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00 */ - .quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03 */ - .quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01 */ - .quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06 */ - .quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00 */ - .quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03 */ - .quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01 */ - .quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05 */ - .quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00 */ - .quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03 */ - .quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01 */ - .quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05 */ - .quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00 */ - .quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03 */ - .quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01 */ - .quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05 */ - .quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00 */ - .quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03 */ - .quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01 */ - .quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05 */ - .quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00 */ - .quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03 */ - .quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01 */ - .quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05 */ - .quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00 */ - .quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03 */ - .quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01 */ - .quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05 */ - .quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00 */ - .quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03 */ - .quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01 */ - .quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05 */ - .quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00 */ - .quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03 */ - .quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01 */ - .quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05 */ - .quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00 */ - .quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02 */ - .quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01 */ - .quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05 */ - .quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00 */ - .quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02 */ - .quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01 */ - .quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05 */ - .quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00 */ - .quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02 */ - .quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01 */ - .quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04 */ - .quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00 */ - .quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02 */ - .quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01 */ - .quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04 */ - .quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00 */ - .quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02 */ - .quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01 */ - .quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04 */ - .quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00 */ - .quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02 */ - .quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01 */ - .quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04 */ - .quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00 */ - .quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02 */ - .quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01 */ - .quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04 */ - .quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00 */ - .quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02 */ - .quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01 */ - .quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04 */ - .quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00 */ - .quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02 */ - .quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01 */ - .quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04 */ - .quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00 */ - .quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02 */ - .quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01 */ - .quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04 */ - .quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00 */ - .quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02 */ - .quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01 */ - .quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04 */ - .quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00 */ - .quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02 */ - .quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01 */ - .quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04 */ - .quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00 */ - .quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02 */ - .quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01 */ - .quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04 */ - .quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00 */ - .quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02 */ - .quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01 */ - .quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04 */ - .quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00 */ - .quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02 */ - .quad 0xBFCDF78F156BE7CF /* A03 = -2.341173987004467604844e-01 */ - .quad 0xBF5308ED74E5C7A6 /* A00 = -1.161796466103906435435e-03 */ - .quad 0x3FF03B5986412ECB /* A01 = +1.014489674026594512313e+00 */ - .quad 0xBFB087EBA88DCC3F /* A02 = -6.457398285947223148806e-02 */ - .quad 0xBFCCBB9BD134862F /* A03 = -2.244753619680052991736e-01 */ - .quad 0xBF57FA23C00DF4B5 /* A00 = -1.463446533505758208674e-03 */ - .quad 0x3FF0473558A1BCC0 /* A01 = +1.017384859292903342975e+00 */ - .quad 0xBFB2E702BC6360EF /* A02 = -7.383744334527241048871e-02 */ - .quad 0xBFCB77D546379288 /* A03 = -2.145945160729250122955e-01 */ - .quad 0xBF5DD12971557F71 /* A00 = -1.819887610814388068450e-03 */ - .quad 0x3FF0548DDF5000A8 /* A01 = +1.020643112482540360020e+00 */ - .quad 0xBFB571B63DA186E1 /* A02 = -8.376635555898871710045e-02 */ - .quad 0xBFCA2D5202605148 /* A03 = -2.045080672838912594358e-01 */ - .quad 0xBF6252B1AD5D4F17 /* A00 = -2.236697221556737096709e-03 */ - .quad 0x3FF063738A910BF7 /* A01 = +1.024280110622155737232e+00 */ - .quad 0xBFB8270C8E6B601B /* A02 = -9.434584118878357184013e-02 */ - .quad 0xBFC8DD27D950A07E /* A03 = -1.942491351230763441116e-01 */ - .quad 0xBF66470C91730CFC /* A00 = -2.719425723258004842786e-03 */ - .quad 0x3FF073F468FCF331 /* A01 = +1.028309259519300633556e+00 */ - .quad 0xBFBB05C2952191E4 /* A02 = -1.055566419686964629854e-01 */ - .quad 0xBFC7886A770DE2BD /* A03 = -1.838505822486435070662e-01 */ - .quad 0xBF6AD114AC8E98EC /* A00 = -3.273525599485007861467e-03 */ - .quad 0x3FF0861BF53E5226 /* A01 = +1.032741506559554434119e+00 */ - .quad 0xBFBE0C4F9B461507 /* A02 = -1.173753503881763554650e-01 */ - .quad 0xBFC6302A037CDE3A /* A03 = -1.733448521642786954722e-01 */ - .quad 0xBF6FFBDE2A6C2AF8 /* A00 = -3.904279630096648551207e-03 */ - .quad 0x3FF099F2EB8E7DA3 /* A01 = +1.037585182326304034106e+00 */ - .quad 0xBFC09C74D192DDF0 /* A02 = -1.297746680554463516444e-01 */ - .quad 0xBFC4D571D8E3079F /* A03 = -1.627638157861470424859e-01 */ - .quad 0xBF72E8FDC0B952AA /* A00 = -4.616728994353872309042e-03 */ - .quad 0x3FF0AF7F273C9533 /* A01 = +1.042845872181101141152e+00 */ - .quad 0xBFC244C512736F10 /* A02 = -1.427236881344176033792e-01 */ - .quad 0xBFC379474F58B902 /* A03 = -1.521386277613104298645e-01 */ - .quad 0xBF762EABAF17395B /* A00 = -5.415602341101023557701e-03 */ - .quad 0x3FF0C6C3886F63FB /* A01 = +1.048526318502125631582e+00 */ - .quad 0xBFC3FDF9918EA12A /* A02 = -1.561881981590514389957e-01 */ - .quad 0xBFC21CA89ECAB895 /* A03 = -1.414995932913753196036e-01 */ - .quad 0xBF79D387CE5B2BAE /* A00 = -6.305246822828998107258e-03 */ - .quad 0x3FF0DFBFE2346376 /* A01 = +1.054626353847394337748e+00 */ - .quad 0xBFC5C6DA43602620 /* A02 = -1.701309994680721970894e-01 */ - .quad 0xBFC0C08BD8DB6631 /* A03 = -1.308760460731704100557e-01 */ - .quad 0xBF7DDBA8E8DA9060 /* A00 = -7.289562037531366334164e-03 */ - .quad 0x3FF0FA70F0D1B464 /* A01 = +1.061142864894713433443e+00 */ - .quad 0xBFC79E18D92BAA7C /* A02 = -1.845122394946264732241e-01 */ - .quad 0xBFBECBBBF74C2669 /* A03 = -1.202962378266875381749e-01 */ - .quad 0xBF81254E76EA25DA /* A00 = -8.371937755572145950511e-03 */ - .quad 0x3FF116D05835EBD0 /* A01 = +1.068069786618014660462e+00 */ - .quad 0xBFC982539E2ED224 /* A02 = -1.992897531869327609755e-01 */ - .quad 0xBFBC1B043C350159 /* A03 = -1.097872397413132278254e-01 */ - .quad 0xBF8391ACBA863403 /* A00 = -9.555196230190082448686e-03 */ - .quad 0x3FF134D4AA477FE2 /* A01 = +1.075398125794884141015e+00 */ - .quad 0xBFCB7218609FEAFB /* A02 = -2.144194099235717521079e-01 */ - .quad 0xBFB970A16CB88329 /* A03 = -9.937485603633135211599e-02 */ - .quad 0xBF87935088E48E8B /* A00 = -1.151144902957603431692e-02 */ - .quad 0x3FF1649892AD7DD3 /* A01 = +1.087059567413110938716e+00 */ - .quad 0xBFCE6971DDE75409 /* A02 = -2.375929196847723912089e-01 */ - .quad 0xBFB58291E88CB251 /* A03 = -8.402358939628952472223e-02 */ - .quad 0xBF8DB3A62C325325 /* A00 = -1.450280973794233242702e-02 */ - .quad 0x3FF1A9C900C6DEEA /* A01 = +1.103951457056548068891e+00 */ - .quad 0xBFD13DBC65B0E08E /* A02 = -2.693930619311765140012e-01 */ - .quad 0xBFB06696F62696D1 /* A03 = -6.406539449252625362252e-02 */ - .quad 0xBF92583699F2E27A /* A00 = -1.791463198307716858659e-02 */ - .quad 0x3FF1F451B85AA9F0 /* A01 = +1.122148246892376022288e+00 */ - .quad 0xBFD34FD5F8288180 /* A02 = -3.017477916164565954205e-01 */ - .quad 0xBFA6FB692825B683 /* A03 = -4.488686194495718900788e-02 */ - .quad 0xBF9641C26E673D6F /* A00 = -2.173522757385398448959e-02 */ - .quad 0x3FF24364DA5E2B07 /* A01 = +1.141453602790251542487e+00 */ - .quad 0xBFD564A5A5EF5890 /* A02 = -3.342680092295120530821e-01 */ - .quad 0xBF9B43712011A982 /* A03 = -2.662445791467283467968e-02 */ - .quad 0xBF9A901038EC2F39 /* A00 = -2.594018313816024226548e-02 */ - .quad 0x3FF2961356DFFEBA /* A01 = +1.161639537196534011088e+00 */ - .quad 0xBFD775EBB17198C7 /* A02 = -3.665723069046972759644e-01 */ - .quad 0xBF833B1A926CD462 /* A03 = -9.390075295963199591975e-03 */ - .quad 0xBF9F396A6A461B91 /* A00 = -3.049246095317987084727e-02 */ - .quad 0x3FF2EB53BAEF534B /* A01 = +1.182452898229899629357e+00 */ - .quad 0xBFD97DABF8AD8BBD /* A02 = -3.982953957076310058660e-01 */ - .quad 0x3F7B8F6A3E0F8837 /* A03 = +6.728568086119371925713e-03 */ - .quad 0xBFA21878590F8BAA /* A00 = -3.534294211546946951064e-02 */ - .quad 0x3FF34209790236E1 /* A01 = +1.203622315111197105253e+00 */ - .quad 0xBFDB764C0E71BECB /* A02 = -4.290952817018306997277e-01 */ - .quad 0x3F962FE0C03F84C0 /* A03 = +2.166701482190513949888e-02 */ - .quad 0xBFA4B36B9AD27ECC /* A00 = -4.043136849327097492868e-02 */ - .quad 0x3FF3990C5B12FC16 /* A01 = +1.224865298994477935679e+00 */ - .quad 0xBFDD5AABB0D01390 /* A02 = -4.586590983092770912322e-01 */ - .quad 0x3FA21DAF5CA162DB /* A03 = +3.538272863142363083844e-02 */ - .quad 0xBFA7645E4D7BF28B /* A00 = -4.568762489177399105378e-02 */ - .quad 0x3FF3EF2FD51C0D9F /* A01 = +1.245895225962932562069e+00 */ - .quad 0xBFDF26377E1B686E /* A02 = -4.867075664057044503963e-01 */ - .quad 0x3FA8803E756EE812 /* A03 = +4.785342391501513914509e-02 */ - .quad 0xBFAA210925C64413 /* A00 = -5.103329263796054643398e-02 */ - .quad 0x3FF44349F897D8E7 /* A01 = +1.266427966181760345066e+00 */ - .quad 0xBFE06A7B02C6D8E2 /* A02 = -5.129981092675530707226e-01 */ - .quad 0x3FAE3F194734F5D0 /* A03 = +5.907515520309980505687e-02 */ - .quad 0xBFACDE48F8A19BBB /* A00 = -5.638340029764018351832e-02 */ - .quad 0x3FF49439D5466582 /* A01 = +1.286187966447272845727e+00 */ - .quad 0xBFE131C7C1063DDC /* A02 = -5.373266954429101183166e-01 */ - .quad 0x3FB1ADEEC36AD805 /* A03 = +6.906025191241844940482e-02 */ - .quad 0xBFAF905D8F585680 /* A00 = -6.164829611604449866036e-02 */ - .quad 0x3FF4E0ED1FD27F99 /* A01 = +1.304913639360142818546e+00 */ - .quad 0xBFE1E7A859DC1D3D /* A02 = -5.595285182070380836095e-01 */ - .quad 0x3FB3ED018E4642A1 /* A03 = +7.783517573831001679086e-02 */ - .quad 0xBFB11595104160BA /* A00 = -6.673556944713512906198e-02 */ - .quad 0x3FF528650340490B /* A01 = +1.322361958217302513319e+00 */ - .quad 0xBFE28B14B40BC974 /* A02 = -5.794776455425521000109e-01 */ - .quad 0x3FB5DF49F5BAF6D7 /* A03 = +8.543836831355676453281e-02 */ - .quad 0xBFB2513A97344BA4 /* A00 = -7.155195418844911836587e-02 */ - .quad 0x3FF569BA0DB5EE14 /* A01 = +1.338312200124055273420e+00 */ - .quad 0xBFE31B53A8B67B20 /* A02 = -5.970857901737396389308e-01 */ - .quad 0x3FB787F297BB0544 /* A03 = +9.191814617499455275507e-02 */ - .quad 0xBFB37512E848FAFA /* A00 = -7.600515528700305112331e-02 */ - .quad 0x3FF5A41F33B403C8 /* A01 = +1.352568819013173495591e+00 */ - .quad 0xBFE397F6EA9A58A5 /* A02 = -6.123003561103997904880e-01 */ - .quad 0x3FB8EAA9FF25CA06 /* A03 = +9.733068923177520814782e-02 */ - .quad 0xBFB47B3E603AFC5D /* A00 = -8.000554894805263217439e-02 */ - .quad 0x3FF5D6E3EDE40487 /* A01 = +1.364963464031718975988e+00 */ - .quad 0xBFE400D5BCA6D631 /* A02 = -6.251019177058819709103e-01 */ - .quad 0x3FBA0B830ED567FE /* A03 = +1.017381583418739132707e-01 */ - .quad 0xBFB5BBFE8AC90496 /* A00 = -8.489981544791400103200e-02 */ - .quad 0x3FF612BA70107E95 /* A01 = +1.379572332145390989311e+00 */ - .quad 0xBFE477EAF1FA7693 /* A02 = -6.396383978023599814478e-01 */ - .quad 0x3FBB4784B7C08A95 /* A03 = +1.065600346196709652391e-01 */ - .quad 0xBFB6D5D940743939 /* A00 = -8.920057128509463473254e-02 */ - .quad 0x3FF644A8748F70CE /* A01 = +1.391762214006166953340e+00 */ - .quad 0xBFE4D646AB07EA37 /* A02 = -6.511567440459832267763e-01 */ - .quad 0x3FBC354F4E1D5292 /* A03 = +1.101884427747086558913e-01 */ - .quad 0xBFB7223D19E4F3D1 /* A00 = -9.036619074045339206069e-02 */ - .quad 0x3FF6518FEB42B7FA /* A01 = +1.394912642466350494175e+00 */ - .quad 0xBFE4ED86CB87498C /* A02 = -6.539949393430091184598e-01 */ - .quad 0x3FBC6D29F28CCA9B /* A03 = +1.110407082713131127205e-01 */ - .quad 0xBFB6878652FF6312 /* A00 = -8.800544287022329936754e-02 */ - .quad 0x3FF63948C302D040 /* A01 = +1.388985406648330922508e+00 */ - .quad 0xBFE4C4E2E7904E17 /* A02 = -6.490339777687407218920e-01 */ - .quad 0x3FBC127356CA1ABE /* A03 = +1.096565329445224612481e-01 */ - .quad 0xBFB4F5D18B0C91D6 /* A00 = -8.187589306596207427980e-02 */ - .quad 0x3FF5FD27EB7DD0B8 /* A01 = +1.374305648697413673176e+00 */ - .quad 0xBFE464E01A2B2FC6 /* A02 = -6.373138915164353601739e-01 */ - .quad 0x3FBB460547674A30 /* A03 = +1.065371798825160976065e-01 */ - .quad 0xBFB26642FA16A685 /* A00 = -7.187288861919156890412e-02 */ - .quad 0x3FF59F9BEDE1C95A /* A01 = +1.351467065073470141812e+00 */ - .quad 0xBFE3D67920C8FBEA /* A02 = -6.199308052381387046381e-01 */ - .quad 0x3FBA24F6A8D3CBC1 /* A03 = +1.021265184570401413078e-01 */ - .quad 0xBFADB5294794F097 /* A00 = -5.802277563859197656582e-02 */ - .quad 0x3FF523EA7B9CF453 /* A01 = +1.321268542159732772845e+00 */ - .quad 0xBFE322A8B55E35DB /* A02 = -5.979808370918208160205e-01 */ - .quad 0x3FB8C8673B1B3E37 /* A03 = +9.680791085269722928697e-02 */ - .quad 0xBFA4B7D661965C6A /* A00 = -4.046506825687219699450e-02 */ - .quad 0x3FF48DE3E2CE3122 /* A01 = +1.284641157110919085227e+00 */ - .quad 0xBFE251FED1A7F445 /* A02 = -5.725092024655472622285e-01 */ - .quad 0x3FB745699FCABDB9 /* A03 = +9.090290213747821701507e-02 */ - .quad 0xBF93E60456E4EE1D /* A00 = -1.943213253365004902773e-02 */ - .quad 0x3FF3E1A14E628A59 /* A01 = +1.242585474196536532432e+00 */ - .quad 0xBFE16C5AB660E876 /* A02 = -5.444768488007543094653e-01 */ - .quad 0x3FB5AD33AA8C188F /* A03 = +8.467410005332197397987e-02 */ - .quad 0x3F738C17C47C7961 /* A00 = +4.772274820224659853951e-03 */ - .quad 0x3FF3234DDE3BD146 /* A01 = +1.196119182682268355933e+00 */ - .quad 0xBFE078C0D77A9D3B /* A02 = -5.147403915952176722826e-01 */ - .quad 0x3FB40D74B3E276B8 /* A03 = +7.833032027925923568290e-02 */ - .quad 0x3FA0474BECC689C7 /* A00 = +3.179394975019849550746e-02 */ - .quad 0x3FF256FB4FA7D18A /* A01 = +1.146235762743432307076e+00 */ - .quad 0xBFDEFA8E3FB285E2 /* A02 = -4.840427038235174395098e-01 */ - .quad 0x3FB270C007493D59 /* A03 = +7.203293016322244446403e-02 */ - .quad 0x3FAF5BD51E479BDC /* A00 = +6.124750132203590768931e-02 */ - .quad 0x3FF18081D0B53BC5 /* A01 = +1.093873801484492647162e+00 */ - .quad 0xBFDCFE2439BD0C03 /* A02 = -4.530115665294831006626e-01 */ - .quad 0x3FB0DEFE5A45AFDD /* A03 = +6.590261176978580437424e-02 */ - .quad 0x3FB7BD5D2806EA26 /* A00 = +9.273321368429118805032e-02 */ - .quad 0x3FF0A369E35B4440 /* A01 = +1.039895904647224256223e+00 */ - .quad 0xBFDB04BC5C9951E7 /* A02 = -4.221640495573226181669e-01 */ - .quad 0x3FAEBBBAA9D6DEEF /* A03 = +6.002600978120919278380e-02 */ - .quad 0x3FC01BE411098DBC /* A00 = +1.258511622610124502941e-01 */ - .quad 0x3FEF85BDABC031C1 /* A01 = +9.850757936961188621083e-01 */ - .quad 0xBFD91521375097C2 /* A02 = -3.919146576102968682065e-01 */ - .quad 0x3FABE26F0086D982 /* A03 = +5.446192628317005068883e-02 */ - .quad 0x3FC481D7FF5776B9 /* A00 = +1.602125164781023347604e-01 */ - .quad 0x3FEDC3506C1E7218 /* A01 = +9.300920592973538347792e-01 */ - .quad 0xBFD7349A88DA7D4F /* A02 = -3.625856720409119104964e-01 */ - .quad 0x3FA936E2DFF8E2AE /* A03 = +4.924687370334389358018e-02 */ - .quad 0x3FC90471F96FA27A /* A00 = +1.954481571149420671141e-01 */ - .quad 0x3FEC0451601987A2 /* A01 = +8.755270840595026360376e-01 */ - .quad 0xBFD5671CD4B898DC /* A02 = -3.344184949259110251063e-01 */ - .quad 0x3FA6BB9594603B67 /* A03 = +4.439990459660841243261e-02 */ - .quad 0x3FCFD8ADB9ED944C /* A00 = +2.488000066615846384011e-01 */ - .quad 0x3FE978C073F6809A /* A01 = +7.959902062321078108909e-01 */ - .quad 0xBFD2DF7E00BCD5A9 /* A02 = -2.948908812716931060471e-01 */ - .quad 0x3FA3614033D490B2 /* A03 = +3.785133965200894456959e-02 */ - .quad 0x3FD4846A12AFE5A0 /* A00 = +3.205819303981005674586e-01 */ - .quad 0x3FE63A1147D40472 /* A01 = +6.945883181471244061100e-01 */ - .quad 0xBFCFA2268AD34450 /* A02 = -2.471359422548027318101e-01 */ - .quad 0x3F9F150201D9FFE0 /* A03 = +3.035357605267552383310e-02 */ - .quad 0x3FD9018641F82BEB /* A00 = +3.907180446846598154131e-01 */ - .quad 0x3FE33B7C220FFBDC /* A01 = +6.010113396913498995389e-01 */ - .quad 0xBFCA4E4187E29C86 /* A02 = -2.055131829740483584423e-01 */ - .quad 0x3F98C30CED19F8F4 /* A03 = +2.418155858185229434287e-02 */ - .quad 0x3FDD4B8255BEB078 /* A00 = +4.577337109901757905561e-01 */ - .quad 0x3FE0858B19D3A49B /* A01 = +5.163016800335243905451e-01 */ - .quad 0xBFC5BC929EACE564 /* A02 = -1.698172831327539045176e-01 */ - .quad 0x3F93A083CE57DE2B /* A03 = +1.916700312537337677621e-02 */ - .quad 0x3FE0A8E5E039295C /* A00 = +5.206174258576470315063e-01 */ - .quad 0x3FDC35E1234583FE /* A01 = +4.407885403107342225937e-01 */ - .quad 0xBFC1DE034E31AEB9 /* A02 = -1.395877963835710222629e-01 */ - .quad 0x3F8EFDEBB3471BDC /* A03 = +1.513275280821162888101e-02 */ - .quad 0x3FE2851B603CB2A5 /* A00 = +5.787484054213406503564e-01 */ - .quad 0x3FD7F4A44ABBB286 /* A01 = +3.743067483726821853551e-01 */ - .quad 0xBFBD3EEB67087DE7 /* A02 = -1.142413260026767657385e-01 */ - .quad 0x3F8864F38329E8BD /* A03 = +1.191129917173260922836e-02 */ - .quad 0x3FE437DBE3C34AC1 /* A00 = +6.318187187665317283702e-01 */ - .quad 0x3FD43F6F789441B5 /* A01 = +3.163717916040938438194e-01 */ - .quad 0xBFB7D92E7901B9A4 /* A02 = -9.315767721429907277653e-02 */ - .quad 0x3F8327ED342308E1 /* A03 = +9.353497651663324544136e-03 */ - .quad 0x3FE5C0977766D55C /* A00 = +6.797597248138731451661e-01 */ - .quad 0x3FD10B42A764D8F9 /* A01 = +2.663122782427219115142e-01 */ - .quad 0xBFB3633351D3D70F /* A02 = -7.573242900602060456716e-02 */ - .quad 0x3F7E079E30FF899C /* A03 = +7.331483779099558922843e-03 */ - .quad 0x3FE7202CE08A88C4 /* A00 = +7.226776490754436288455e-01 */ - .quad 0x3FCC973EB5662B01 /* A01 = +2.233656297433626314319e-01 */ - .quad 0xBFAF70A455F9920B /* A02 = -6.140626477716545211782e-02 */ - .quad 0x3F77812411CE99B6 /* A03 = +5.738392731393584730859e-03 */ - .quad 0x3FE85879424095B1 /* A00 = +7.608000082006382003286e-01 */ - .quad 0x3FC7E73BD1674D84 /* A01 = +1.867441914060742336190e-01 */ - .quad 0xBFA96F84E4BF333B /* A02 = -4.967894832916504993525e-02 */ - .quad 0x3F72606DDCA6E117 /* A03 = +4.486493251924870105662e-03 */ - .quad 0x3FE96BFE4957F4DD /* A00 = +7.944327766887472330737e-01 */ - .quad 0x3FC3ED4780D25478 /* A01 = +1.556786898624158421711e-01 */ - .quad 0xBFA489C5F9A56B58 /* A02 = -4.011362717093075458408e-02 */ - .quad 0x3F6CB5DC17E9AD2A /* A03 = +3.504686231556104931972e-03 */ - .quad 0x3FEA5D9CB2F41234 /* A00 = +8.239272589858672724006e-01 */ - .quad 0x3FC091A758374DCF /* A01 = +1.294449978582705440555e-01 */ - .quad 0xBFA08E436D4B5CE0 /* A02 = -3.233538350257858517978e-02 */ - .quad 0x3F666997AD53E6B7 /* A03 = +2.735897297154145629133e-03 */ - .quad 0x3FEB3060342CB850 /* A00 = +8.496552485501158713532e-01 */ - .quad 0x3FBB7D30BBC7DC1B /* A01 = +1.073790033768634993860e-01 */ - .quad 0xBF9AA6BA3443D9E3 /* A02 = -2.602663940430173170060e-02 */ - .quad 0x3F617CA764B7850B /* A03 = +2.134634914668814050648e-03 */ - .quad 0x3FEBE759A6A0C7B8 /* A00 = +8.719909910635044170135e-01 */ - .quad 0x3FB6C10DE6A703FF /* A01 = +8.888327485239243264115e-02 */ - .quad 0xBF956C566D8BE1F6 /* A02 = -2.092108768099084498138e-02 */ - .quad 0x3F5B46D1A4A59CF8 /* A03 = +1.664833764687232917079e-03 */ - .quad 0x3FEC858494887A04 /* A00 = +8.912985707318630268503e-01 */ - .quad 0x3FB2CC31F543394D /* A01 = +7.342827070099140762682e-02 */ - .quad 0xBF9133477FF69137 /* A02 = -1.679717749142747504343e-02 */ - .quad 0x3F5544482FBB4DA5 /* A03 = +1.298017973501022466823e-03 */ - .quad 0x3FED0DB59D0E32E9 /* A00 = +9.079235141267335551518e-01 */ - .quad 0x3FAF006BAFFC6EF4 /* A01 = +6.055008433597022787787e-02 */ - .quad 0xBF8B97146FA2B97A /* A02 = -1.347175565419144252499e-02 */ - .quad 0x3F5093B01F4CDC69 /* A03 = +1.011774057770665211434e-03 */ - .quad 0x3FEDB487C3EC457C /* A00 = +9.282873942012623835751e-01 */ - .quad 0x3FA7390C09D0BD1D /* A01 = +4.535710925881118044112e-02 */ - .quad 0xBF83D9F7C3181106 /* A02 = -9.693084374710735778846e-03 */ - .quad 0x3F46E34A0A3C0E64 /* A03 = +6.984817050299072134500e-04 */ - .quad 0x3FEE5FFCB4E6EB00 /* A00 = +9.492171796076434020506e-01 */ - .quad 0x3F9F4913ED00AADF /* A01 = +3.055220731782070861526e-02 */ - .quad 0xBF79670BD0E59B5C /* A02 = -6.201788097633133961528e-03 */ - .quad 0x3F3BC998EBCAF96D /* A03 = +4.240034429975534616304e-04 */ - .quad 0x3FEEDBA41E9542FE /* A00 = +9.643116566968215064293e-01 */ - .quad 0x3F94F5DD18D9C24D /* A01 = +2.046914543319848858727e-02 */ - .quad 0xBF7034896AA122B9 /* A02 = -3.956352980886528904192e-03 */ - .quad 0x3F30DCCB47810B39 /* A03 = +2.573009765038273091199e-04 */ - .quad 0x3FEF33F2882520ED /* A00 = +9.750912341196716903724e-01 */ - .quad 0x3F8BF37F2CF553FF /* A01 = +1.364802699996836392315e-02 */ - .quad 0xBF649F6F05A69619 /* A02 = -2.517430152880317534986e-03 */ - .quad 0x3F247623C950AAC9 /* A03 = +1.561087307505231250044e-04 */ - .quad 0x3FEF727757751741 /* A00 = +9.827229221489021115943e-01 */ - .quad 0x3F828E67912C4400 /* A01 = +9.060677640748693306705e-03 */ - .quad 0xBF5A2F51A806CC2C /* A02 = -1.598195784123355826789e-03 */ - .quad 0x3F18D35D7687E613 /* A03 = +9.470231965016282719549e-05 */ - .quad 0x3FEF9E6325C5942A /* A00 = +9.880843866091073568469e-01 */ - .quad 0x3F788AB117618F76 /* A01 = +5.991641772286606867914e-03 */ - .quad 0xBF5096EAB0B1EA89 /* A02 = -1.012543859160305046233e-03 */ - .quad 0x3F0E1E50EC4435AB /* A03 = +5.744633156910412119652e-05 */ - .quad 0x3FEFBD0784049369 /* A00 = +9.918248728250605994461e-01 */ - .quad 0x3F702BBD8294035F /* A01 = +3.947963975634432264028e-03 */ - .quad 0xBF44FB55E0F00593 /* A02 = -6.403130845457509273330e-04 */ - .quad 0x3F0244DCD723230A /* A03 = +3.484534217219031730379e-05 */ - .quad 0x3FEFD245E2366A43 /* A00 = +9.944180887426415926811e-01 */ - .quad 0x3F653D82EC088433 /* A01 = +2.592807490387838333795e-03 */ - .quad 0xBF3A7DF75E013CB8 /* A02 = -4.042366908878036561859e-04 */ - .quad 0x3EF6298E69F991CD /* A03 = +2.113564425911141559972e-05 */ - .quad 0x3FEFE0EAA508BC69 /* A00 = +9.962056372950317539861e-01 */ - .quad 0x3F5BD0771AF3FDDA /* A01 = +1.697651208644282514598e-03 */ - .quad 0xBF30B2E1254DE571 /* A02 = -2.548026725928887099328e-04 */ - .quad 0x3EEAE28B70EC0256 /* A03 = +1.281973848454955042307e-05 */ - .quad 0x3FEFEAF5303D7F96 /* A00 = +9.974313680831865536192e-01 */ - .quad 0x3F5229111365657E /* A01 = +1.108423877289460134782e-03 */ - .quad 0xBF250572D04DFE66 /* A02 = -1.603796628408704519168e-04 */ - .quad 0x3EE04E89BB57C981 /* A03 = +7.775682983689149966743e-06 */ - .quad 0x3FEFF1CF52F1CF44 /* A00 = +9.982678051005469122003e-01 */ - .quad 0x3F47A71316147CEB /* A01 = +7.218211359577819110842e-04 */ - .quad 0xBF1A6D7604055719 /* A02 = -1.008132248946049582547e-04 */ - .quad 0x3ED3C8047586A85C /* A03 = +4.716233739913014633626e-06 */ - .quad 0x3FEFF6770369EF69 /* A00 = +9.988360468555416149528e-01 */ - .quad 0x3F3EBB261180FBF0 /* A01 = +4.689186039321105101130e-04 */ - .quad 0xBF1097754FE19D7F /* A02 = -6.329206004950480057066e-05 */ - .quad 0x3EC7FEFF83BCA0A7 /* A03 = +2.860556404988488738366e-06 */ - .quad 0x3FEFF99D42371AC4 /* A00 = +9.992204945818561334647e-01 */ - .quad 0x3F33EB2AEC271F59 /* A01 = +3.039340773764907474054e-04 */ - .quad 0xBF04CF18E0FC0D79 /* A02 = -3.968996690952969588805e-05 */ - .quad 0x3EBD1BDBD6019BE9 /* A03 = +1.735021065507727833886e-06 */ - .quad 0x3FEFFBBCA32B0D91 /* A00 = +9.994795977476532700123e-01 */ - .quad 0x3F29C41E1615110A /* A01 = +1.965796209707565346710e-04 */ - .quad 0xBEFA11F93D9DCB5A /* A02 = -2.486248909101414873235e-05 */ - .quad 0x3EB1A7CA4546F7A7 /* A03 = +1.052345642723709228769e-06 */ - .quad 0x3FEFFD298B8E8DE2 /* A00 = +9.996535993308806045121e-01 */ - .quad 0x3F20A1C42D523C5B /* A01 = +1.268913244172078754520e-04 */ - .quad 0xBEF0507A364AFAE4 /* A02 = -1.555859070622834605755e-05 */ - .quad 0x3EA56ACA17E7CDF4 /* A03 = +6.382806956848098872313e-07 */ - .quad 0x3FEFFE1DC82BA5A3 /* A00 = +9.997700604991915929176e-01 */ - .quad 0x3F156E73B90F1769 /* A01 = +8.175450626798714452801e-05 */ - .quad 0xBEE4663579D0A09F /* A02 = -9.727122057226747625365e-06 */ - .quad 0x3E99FAF6FEC5D4C1 /* A03 = +3.871371052824002996020e-07 */ - .quad 0x3FEFFEF8D0BB5E81 /* A00 = +9.998745037837154514548e-01 */ - .quad 0x3F06686DA18D39C3 /* A01 = +4.273972098777251447726e-05 */ - .quad 0xBED46BC298073E90 /* A02 = -4.868731025855742842491e-06 */ - .quad 0x3E88E42286B9D0FD /* A03 = +1.854535328530838170114e-07 */ - .quad 0x3FEFFF8DBC68DDC7 /* A00 = +9.999455146670975791423e-01 */ - .quad 0x3EF26B2953A80AF0 /* A01 = +1.756534514108903368909e-05 */ - .quad 0xBEBFC4472D580F83 /* A02 = -1.893443529411295465239e-06 */ - .quad 0x3E72505B4553D19F /* A03 = +6.822456673547912277047e-08 */ - .quad 0x3FEFFFCED1276609 /* A00 = +9.999765477215883935358e-01 */ - .quad 0x3EDE1A94C7CC58F5 /* A01 = +7.177313020153979672606e-06 */ - .quad 0xBEA8A2C988744E57 /* A02 = -7.342066660497443762363e-07 */ - .quad 0x3E5AF30036BBBAF4 /* A03 = +2.509841882843541084885e-08 */ - .quad 0x3FEFFFEAFE70FCFC /* A00 = +9.999899835164849370983e-01 */ - .quad 0x3EC879175E3549F5 /* A01 = +2.917410471128503564412e-06 */ - .quad 0xBE930E36677D1813 /* A02 = -2.839493400307523115929e-07 */ - .quad 0x3E43D4005B42D48F /* A03 = +9.233192745401904898013e-09 */ - .quad 0x3ff0000000000000 - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .align 16 - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */ - .align 16 - .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */ - .align 16 - .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */ - .align 16 - .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */ - .align 16 - .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */ - .align 16 - .long 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */ - .align 16 - .type __svml_stanh_data_internal,@object - .size __svml_stanh_data_internal,.-__svml_stanh_data_internal + movups (%rsi, %rax), %xmm2 + movups (%rdi, %rax), %xmm7 + + movaps %xmm2, %xmm3 + + unpckhpd %xmm7, %xmm2 + movlhps %xmm7, %xmm3 + + addpd %xmm13, %xmm2 + + mulpd %xmm5, %xmm6 + addpd %xmm4, %xmm6 + + mulpd %xmm2, %xmm0 + addpd %xmm3, %xmm0 + + cvtpd2ps %xmm0, %xmm2 + cvtpd2ps %xmm6, %xmm0 + + movlhps %xmm2, %xmm0 + andnps %xmm12, %xmm1 + orps %xmm1, %xmm0 + + movmskps %xmm8, %edx + testl %edx, %edx + + /* Go to special inputs processing branch. */ + jne L(SPECIAL_VALUES_BRANCH) + + /* No stack restoration on the fastpath. */ + ret + +L(SPECIAL_VALUES_BRANCH): + subq $48, %rsp + + movups %xmm0, (%rsp) + movups %xmm12, 16(%rsp) + + movq %r12, 32(%rsp) + movq %r13, 40(%rsp) + + /* edx has 1s where there was a special value that needs to be handled + by a tanhf call. */ + movl %edx, %r13d +L(SPECIAL_VALUES_LOOP): + /* use r12 as index for special value that is saved across calls to + tanhf. We technically don't need a callee save register here as offset + to rsp is always [0, 12] so we can restore rsp by realigning to 64. + Essentially the tradeoff is 1 extra save/restore vs 2 extra instructions + in the loop. */ + xorl %r12d, %r12d + bsfl %r13d, %r12d + + /* Scalar math fucntion call to process special input. */ + movss 16(%rsp, %r12, 4), %xmm0 + call tanhf@PLT + /* No good way to avoid the store-forwarding fault this will cause on + return. `lfence` avoids the SF fault but at greater cost as it + serialized stack/callee save restoration. */ + movss %xmm0, (%rsp, %r12, 4) + + leal -1(%r13), %eax + andl %eax, %r13d + jnz L(SPECIAL_VALUES_LOOP) + + /* All results have been written to 16(%rsp). */ + movups (%rsp), %xmm0 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + addq $48, %rsp + ret +END(_ZGVbN4v_tanhf_sse4)