[v3] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
Commit Message
In _dl_tlsdesc_dynamic, there are three 'addi.d sp, sp, -size'
instructions to allocate stack size for Float/LSX/LASX registers.
Every 'addi.d sp, sp, -size' needs a cfi_adjust_cfa_offset because
of sp is used to compute CFA. But only one 'addi.d sp, sp, -size'
will be run according to HWCAP value. And all cfi_adjust_cfa_offset
will be executed in stack unwinding, it result in incorrect CFA.
Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
_dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
Conflicting cfi instructions can be distributed to the three functions.
And cfi instructions can correspond to stack down instructions.
---
Changes v2 -> v3:
- SUPPORT_LSX/LASX -> RTLD_SUPPORT_LSX/LASX.
- Define macros to reduce codes.
- Delete content of HWCAP in tlsdesc.sym.
Changes v1 -> v2:
- Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
_dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
v2 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157783.html
v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
sysdeps/loongarch/dl-machine.h | 7 +
sysdeps/loongarch/dl-tlsdesc-dynamic.h | 225 ++++++++++++++
sysdeps/loongarch/dl-tlsdesc.S | 386 ++-----------------------
sysdeps/loongarch/dl-tlsdesc.h | 4 +
sysdeps/loongarch/tlsdesc.sym | 9 -
5 files changed, 258 insertions(+), 373 deletions(-)
create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
Comments
This patch is the third version, having gone through two previous
iterations.
It was reverted by Andreas K. Hüttel due to the code freeze for the 2.40
release.
Now that the release is out, I plan to merge this patch.
Patch-v3: https://sourceware.org/pipermail/libc-alpha/2024-July/158025.html
Changes v2 -> v3:
- SUPPORT_LSX/LASX -> RTLD_SUPPORT_LSX/LASX.
- Define macros to reduce codes.
- Delete content of HWCAP in tlsdesc.sym.
Changes v1 -> v2:
- Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
_dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
v2 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157783.html
v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
在 2024/7/5 上午10:40, mengqinggang 写道:
> In _dl_tlsdesc_dynamic, there are three 'addi.d sp, sp, -size'
> instructions to allocate stack size for Float/LSX/LASX registers.
> Every 'addi.d sp, sp, -size' needs a cfi_adjust_cfa_offset because
> of sp is used to compute CFA. But only one 'addi.d sp, sp, -size'
> will be run according to HWCAP value. And all cfi_adjust_cfa_offset
> will be executed in stack unwinding, it result in incorrect CFA.
>
> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
> Conflicting cfi instructions can be distributed to the three functions.
> And cfi instructions can correspond to stack down instructions.
>
> ---
>
>
> sysdeps/loongarch/dl-machine.h | 7 +
> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 225 ++++++++++++++
> sysdeps/loongarch/dl-tlsdesc.S | 386 ++-----------------------
> sysdeps/loongarch/dl-tlsdesc.h | 4 +
> sysdeps/loongarch/tlsdesc.sym | 9 -
> 5 files changed, 258 insertions(+), 373 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab6f1da7c0..a15d8e0ab6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
> {
> td->arg = _dl_make_tlsdesc_dynamic (sym_map,
> sym->st_value + reloc->r_addend);
> +# ifndef __loongarch_soft_float
> + if (RTLD_SUPPORT_LASX)
> + td->entry = _dl_tlsdesc_dynamic_lasx;
> + else if (RTLD_SUPPORT_LSX)
> + td->entry = _dl_tlsdesc_dynamic_lsx;
> + else
> +# endif
> td->entry = _dl_tlsdesc_dynamic;
> }
> else
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..d10f4a8800
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,225 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
> +
> + /* Handler for dynamic TLS symbols.
> + Prototype:
> + _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> + The second word of the descriptor points to a
> + tlsdesc_dynamic_arg structure.
> +
> + Returns the offset between the thread pointer and the
> + object referenced by the argument.
> +
> + ptrdiff_t
> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> + {
> + struct tlsdesc_dynamic_arg *td = tdp->arg;
> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
> + if (__glibc_likely (td->gen_count <= dtv[0].counter
> + && (dtv[td->tlsinfo.ti_module].pointer.val
> + != TLS_DTV_UNALLOCATED),
> + 1))
> + return dtv[td->tlsinfo.ti_module].pointer.val
> + + td->tlsinfo.ti_offset
> + - __thread_pointer;
> +
> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> + } */
> + .hidden _dl_tlsdesc_dynamic
> + .global _dl_tlsdesc_dynamic
> + .type _dl_tlsdesc_dynamic,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_dynamic:
> + /* Save just enough registers to support fast path, if we fall
> + into slow path we will save additional registers. */
> + ADDI sp, sp, -32
> + cfi_adjust_cfa_offset (32)
> + REG_S t0, sp, 0
> + REG_S t1, sp, 8
> + REG_S t2, sp, 16
> + cfi_rel_offset (12, 0)
> + cfi_rel_offset (13, 8)
> + cfi_rel_offset (14, 16)
> +
> +/* Runtime Storage Layout of Thread-Local Storage
> + TP point to the start of TLS block.
> +
> + dtv
> +Low address TCB ----------------> dtv0(counter)
> + TP --> static_block0 <----- dtv1
> + static_block1 <----- dtv2
> + static_block2 <----- dtv3
> + dynamic_block0 <----- dtv4
> +Hign address dynamic_block1 <----- dtv5 */
> +
> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
> + /* If dtv[0].counter < td->gen_count, goto slow path. */
> + bltu t2, t1, .Lslow
> +
> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
> + slli.d t1, t1, 4
> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
> + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
> + li.d t2, TLS_DTV_UNALLOCATED
> + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
> + goto slow path. */
> + beq t1, t2, .Lslow
> +
> + cfi_remember_state
> + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
> + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
> + add.d a0, t1, t2
> +.Lret:
> + sub.d a0, a0, tp
> + REG_L t0, sp, 0
> + REG_L t1, sp, 8
> + REG_L t2, sp, 16
> + ADDI sp, sp, 32
> + cfi_adjust_cfa_offset (-32)
> + RET
> +
> +.Lslow:
> + /* This is the slow path. We need to call __tls_get_addr() which
> + means we need to save and restore all the register that the
> + callee will trash. */
> +
> + /* Save the remaining registers that we must treat as caller save. */
> + cfi_restore_state
> + ADDI sp, sp, -FRAME_SIZE
> + cfi_adjust_cfa_offset (FRAME_SIZE)
> + REG_S ra, sp, 0 * SZREG
> + REG_S a1, sp, 1 * SZREG
> + REG_S a2, sp, 2 * SZREG
> + REG_S a3, sp, 3 * SZREG
> + REG_S a4, sp, 4 * SZREG
> + REG_S a5, sp, 5 * SZREG
> + REG_S a6, sp, 6 * SZREG
> + REG_S a7, sp, 7 * SZREG
> + REG_S t3, sp, 8 * SZREG
> + REG_S t4, sp, 9 * SZREG
> + REG_S t5, sp, 10 * SZREG
> + REG_S t6, sp, 11 * SZREG
> + REG_S t7, sp, 12 * SZREG
> + REG_S t8, sp, 13 * SZREG
> + cfi_rel_offset (1, 0 * SZREG)
> + cfi_rel_offset (5, 1 * SZREG)
> + cfi_rel_offset (6, 2 * SZREG)
> + cfi_rel_offset (7, 3 * SZREG)
> + cfi_rel_offset (8, 4 * SZREG)
> + cfi_rel_offset (9, 5 * SZREG)
> + cfi_rel_offset (10, 6 * SZREG)
> + cfi_rel_offset (11, 7 * SZREG)
> + cfi_rel_offset (15, 8 * SZREG)
> + cfi_rel_offset (16, 9 * SZREG)
> + cfi_rel_offset (17, 10 * SZREG)
> + cfi_rel_offset (18, 11 * SZREG)
> + cfi_rel_offset (19, 12 * SZREG)
> + cfi_rel_offset (20, 13 * SZREG)
> +
> +#ifndef __loongarch_soft_float
> +
> + /* Save fcsr0 register.
> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
> + of some fields in fcsr0. */
> + movfcsr2gr t0, fcsr0
> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */
> +
> +#ifdef USE_LASX
> + #define V_REG_S xvst
> + #define V_REG_L xvld
> + #define V_SPACE FRAME_SIZE_LASX
> + #define V_REG(n) $xr##n
> + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
> + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
> + #define V_REGSZ SZXREG
> +#elif defined USE_LSX
> + #define V_REG_S vst
> + #define V_REG_L vld
> + #define V_SPACE FRAME_SIZE_LSX
> + #define V_REG(n) $vr##n
> + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
> + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
> + #define V_REGSZ SZVREG
> +#else
> + #define V_REG_S fst.d
> + #define V_REG_L fld.d
> + #define V_SPACE FRAME_SIZE_FLOAT
> + #define V_REG(n) $f##n
> + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
> + #define V_REGSZ SZFREG
> +#endif
> +
> + ADDI sp, sp, -V_SPACE
> + cfi_adjust_cfa_offset (V_SPACE)
> + .irp i,V_REGS
> + V_REG_S V_REG(\i), sp, \i * V_REGSZ
> + .endr
> +
> +#endif /* #ifndef __loongarch_soft_float */
> +
> + bl HIDDEN_JUMPTARGET(__tls_get_addr)
> + ADDI a0, a0, -TLS_DTV_OFFSET
> +
> +#ifndef __loongarch_soft_float
> +
> + .irp i,V_REGS
> + V_REG_L V_REG(\i), sp, \i * V_REGSZ
> + .endr
> + ADDI sp, sp, V_SPACE
> + cfi_adjust_cfa_offset (-V_SPACE)
> +
> + /* Restore fcsr0 register. */
> + ld.w t0, sp, FRAME_SIZE + 24
> + movgr2fcsr fcsr0, t0
> +
> +#endif /* #ifndef __loongarch_soft_float */
> +
> + REG_L ra, sp, 0 * SZREG
> + REG_L a1, sp, 1 * SZREG
> + REG_L a2, sp, 2 * SZREG
> + REG_L a3, sp, 3 * SZREG
> + REG_L a4, sp, 4 * SZREG
> + REG_L a5, sp, 5 * SZREG
> + REG_L a6, sp, 6 * SZREG
> + REG_L a7, sp, 7 * SZREG
> + REG_L t3, sp, 8 * SZREG
> + REG_L t4, sp, 9 * SZREG
> + REG_L t5, sp, 10 * SZREG
> + REG_L t6, sp, 11 * SZREG
> + REG_L t7, sp, 12 * SZREG
> + REG_L t8, sp, 13 * SZREG
> + ADDI sp, sp, FRAME_SIZE
> + cfi_adjust_cfa_offset (-FRAME_SIZE)
> +
> + b .Lret
> + cfi_endproc
> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> index a6627cc754..b6cfd6121d 100644
> --- a/sysdeps/loongarch/dl-tlsdesc.S
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
> cfi_endproc
> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>
> -
> #ifdef SHARED
>
> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
> -
> - /* Handler for dynamic TLS symbols.
> - Prototype:
> - _dl_tlsdesc_dynamic (tlsdesc *) ;
> -
> - The second word of the descriptor points to a
> - tlsdesc_dynamic_arg structure.
> -
> - Returns the offset between the thread pointer and the
> - object referenced by the argument.
> -
> - ptrdiff_t
> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> - {
> - struct tlsdesc_dynamic_arg *td = tdp->arg;
> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
> - if (__glibc_likely (td->gen_count <= dtv[0].counter
> - && (dtv[td->tlsinfo.ti_module].pointer.val
> - != TLS_DTV_UNALLOCATED),
> - 1))
> - return dtv[td->tlsinfo.ti_module].pointer.val
> - + td->tlsinfo.ti_offset
> - - __thread_pointer;
> -
> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> - } */
> - .hidden _dl_tlsdesc_dynamic
> - .global _dl_tlsdesc_dynamic
> - .type _dl_tlsdesc_dynamic,%function
> - cfi_startproc
> - .align 2
> -_dl_tlsdesc_dynamic:
> - /* Save just enough registers to support fast path, if we fall
> - into slow path we will save additional registers. */
> - ADDI sp, sp, -32
> - REG_S t0, sp, 0
> - REG_S t1, sp, 8
> - REG_S t2, sp, 16
> -
> -/* Runtime Storage Layout of Thread-Local Storage
> - TP point to the start of TLS block.
> -
> - dtv
> -Low address TCB ----------------> dtv0(counter)
> - TP --> static_block0 <----- dtv1
> - static_block1 <----- dtv2
> - static_block2 <----- dtv3
> - dynamic_block0 <----- dtv4
> -Hign address dynamic_block1 <----- dtv5 */
> -
> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
> - /* If dtv[0].counter < td->gen_count, goto slow path. */
> - bltu t2, t1, .Lslow
> -
> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
> - slli.d t1, t1, 4
> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
> - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
> - li.d t2, TLS_DTV_UNALLOCATED
> - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
> - goto slow path. */
> - beq t1, t2, .Lslow
> -
> - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
> - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
> - add.d a0, t1, t2
> -.Lret:
> - sub.d a0, a0, tp
> - REG_L t0, sp, 0
> - REG_L t1, sp, 8
> - REG_L t2, sp, 16
> - ADDI sp, sp, 32
> - RET
> -
> -.Lslow:
> - /* This is the slow path. We need to call __tls_get_addr() which
> - means we need to save and restore all the register that the
> - callee will trash. */
> -
> - /* Save the remaining registers that we must treat as caller save. */
> - ADDI sp, sp, -FRAME_SIZE
> - REG_S ra, sp, 0 * SZREG
> - REG_S a1, sp, 1 * SZREG
> - REG_S a2, sp, 2 * SZREG
> - REG_S a3, sp, 3 * SZREG
> - REG_S a4, sp, 4 * SZREG
> - REG_S a5, sp, 5 * SZREG
> - REG_S a6, sp, 6 * SZREG
> - REG_S a7, sp, 7 * SZREG
> - REG_S t3, sp, 8 * SZREG
> - REG_S t4, sp, 9 * SZREG
> - REG_S t5, sp, 10 * SZREG
> - REG_S t6, sp, 11 * SZREG
> - REG_S t7, sp, 12 * SZREG
> - REG_S t8, sp, 13 * SZREG
> -
> #ifndef __loongarch_soft_float
>
> - /* Save fcsr0 register.
> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
> - of some fields in fcsr0. */
> - movfcsr2gr t0, fcsr0
> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
> -
> - /* Whether support LASX. */
> - la.global t0, _rtld_global_ro
> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
> - andi t1, t0, HWCAP_LOONGARCH_LASX
> - beqz t1, .Llsx
> -
> - /* Save 256-bit vector registers.
> - FIXME: Without vector ABI, save all vector registers. */
> - ADDI sp, sp, -FRAME_SIZE_LASX
> - xvst xr0, sp, 0*SZXREG
> - xvst xr1, sp, 1*SZXREG
> - xvst xr2, sp, 2*SZXREG
> - xvst xr3, sp, 3*SZXREG
> - xvst xr4, sp, 4*SZXREG
> - xvst xr5, sp, 5*SZXREG
> - xvst xr6, sp, 6*SZXREG
> - xvst xr7, sp, 7*SZXREG
> - xvst xr8, sp, 8*SZXREG
> - xvst xr9, sp, 9*SZXREG
> - xvst xr10, sp, 10*SZXREG
> - xvst xr11, sp, 11*SZXREG
> - xvst xr12, sp, 12*SZXREG
> - xvst xr13, sp, 13*SZXREG
> - xvst xr14, sp, 14*SZXREG
> - xvst xr15, sp, 15*SZXREG
> - xvst xr16, sp, 16*SZXREG
> - xvst xr17, sp, 17*SZXREG
> - xvst xr18, sp, 18*SZXREG
> - xvst xr19, sp, 19*SZXREG
> - xvst xr20, sp, 20*SZXREG
> - xvst xr21, sp, 21*SZXREG
> - xvst xr22, sp, 22*SZXREG
> - xvst xr23, sp, 23*SZXREG
> - xvst xr24, sp, 24*SZXREG
> - xvst xr25, sp, 25*SZXREG
> - xvst xr26, sp, 26*SZXREG
> - xvst xr27, sp, 27*SZXREG
> - xvst xr28, sp, 28*SZXREG
> - xvst xr29, sp, 29*SZXREG
> - xvst xr30, sp, 30*SZXREG
> - xvst xr31, sp, 31*SZXREG
> - b .Ltga
> -
> -.Llsx:
> - /* Whether support LSX. */
> - andi t1, t0, HWCAP_LOONGARCH_LSX
> - beqz t1, .Lfloat
> -
> - /* Save 128-bit vector registers. */
> - ADDI sp, sp, -FRAME_SIZE_LSX
> - vst vr0, sp, 0*SZVREG
> - vst vr1, sp, 1*SZVREG
> - vst vr2, sp, 2*SZVREG
> - vst vr3, sp, 3*SZVREG
> - vst vr4, sp, 4*SZVREG
> - vst vr5, sp, 5*SZVREG
> - vst vr6, sp, 6*SZVREG
> - vst vr7, sp, 7*SZVREG
> - vst vr8, sp, 8*SZVREG
> - vst vr9, sp, 9*SZVREG
> - vst vr10, sp, 10*SZVREG
> - vst vr11, sp, 11*SZVREG
> - vst vr12, sp, 12*SZVREG
> - vst vr13, sp, 13*SZVREG
> - vst vr14, sp, 14*SZVREG
> - vst vr15, sp, 15*SZVREG
> - vst vr16, sp, 16*SZVREG
> - vst vr17, sp, 17*SZVREG
> - vst vr18, sp, 18*SZVREG
> - vst vr19, sp, 19*SZVREG
> - vst vr20, sp, 20*SZVREG
> - vst vr21, sp, 21*SZVREG
> - vst vr22, sp, 22*SZVREG
> - vst vr23, sp, 23*SZVREG
> - vst vr24, sp, 24*SZVREG
> - vst vr25, sp, 25*SZVREG
> - vst vr26, sp, 26*SZVREG
> - vst vr27, sp, 27*SZVREG
> - vst vr28, sp, 28*SZVREG
> - vst vr29, sp, 29*SZVREG
> - vst vr30, sp, 30*SZVREG
> - vst vr31, sp, 31*SZVREG
> - b .Ltga
> -
> -.Lfloat:
> - /* Save float registers. */
> - ADDI sp, sp, -FRAME_SIZE_FLOAT
> - FREG_S fa0, sp, 0*SZFREG
> - FREG_S fa1, sp, 1*SZFREG
> - FREG_S fa2, sp, 2*SZFREG
> - FREG_S fa3, sp, 3*SZFREG
> - FREG_S fa4, sp, 4*SZFREG
> - FREG_S fa5, sp, 5*SZFREG
> - FREG_S fa6, sp, 6*SZFREG
> - FREG_S fa7, sp, 7*SZFREG
> - FREG_S ft0, sp, 8*SZFREG
> - FREG_S ft1, sp, 9*SZFREG
> - FREG_S ft2, sp, 10*SZFREG
> - FREG_S ft3, sp, 11*SZFREG
> - FREG_S ft4, sp, 12*SZFREG
> - FREG_S ft5, sp, 13*SZFREG
> - FREG_S ft6, sp, 14*SZFREG
> - FREG_S ft7, sp, 15*SZFREG
> - FREG_S ft8, sp, 16*SZFREG
> - FREG_S ft9, sp, 17*SZFREG
> - FREG_S ft10, sp, 18*SZFREG
> - FREG_S ft11, sp, 19*SZFREG
> - FREG_S ft12, sp, 20*SZFREG
> - FREG_S ft13, sp, 21*SZFREG
> - FREG_S ft14, sp, 22*SZFREG
> - FREG_S ft15, sp, 23*SZFREG
> -
> -#endif /* #ifndef __loongarch_soft_float */
> -
> -.Ltga:
> - bl HIDDEN_JUMPTARGET(__tls_get_addr)
> - ADDI a0, a0, -TLS_DTV_OFFSET
> -
> -#ifndef __loongarch_soft_float
> -
> - la.global t0, _rtld_global_ro
> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
> - andi t1, t0, HWCAP_LOONGARCH_LASX
> - beqz t1, .Llsx1
> -
> - /* Restore 256-bit vector registers. */
> - xvld xr0, sp, 0*SZXREG
> - xvld xr1, sp, 1*SZXREG
> - xvld xr2, sp, 2*SZXREG
> - xvld xr3, sp, 3*SZXREG
> - xvld xr4, sp, 4*SZXREG
> - xvld xr5, sp, 5*SZXREG
> - xvld xr6, sp, 6*SZXREG
> - xvld xr7, sp, 7*SZXREG
> - xvld xr8, sp, 8*SZXREG
> - xvld xr9, sp, 9*SZXREG
> - xvld xr10, sp, 10*SZXREG
> - xvld xr11, sp, 11*SZXREG
> - xvld xr12, sp, 12*SZXREG
> - xvld xr13, sp, 13*SZXREG
> - xvld xr14, sp, 14*SZXREG
> - xvld xr15, sp, 15*SZXREG
> - xvld xr16, sp, 16*SZXREG
> - xvld xr17, sp, 17*SZXREG
> - xvld xr18, sp, 18*SZXREG
> - xvld xr19, sp, 19*SZXREG
> - xvld xr20, sp, 20*SZXREG
> - xvld xr21, sp, 21*SZXREG
> - xvld xr22, sp, 22*SZXREG
> - xvld xr23, sp, 23*SZXREG
> - xvld xr24, sp, 24*SZXREG
> - xvld xr25, sp, 25*SZXREG
> - xvld xr26, sp, 26*SZXREG
> - xvld xr27, sp, 27*SZXREG
> - xvld xr28, sp, 28*SZXREG
> - xvld xr29, sp, 29*SZXREG
> - xvld xr30, sp, 30*SZXREG
> - xvld xr31, sp, 31*SZXREG
> - ADDI sp, sp, FRAME_SIZE_LASX
> - b .Lfcsr
> -
> -.Llsx1:
> - andi t1, t0, HWCAP_LOONGARCH_LSX
> - beqz t1, .Lfloat1
> -
> - /* Restore 128-bit vector registers. */
> - vld vr0, sp, 0*SZVREG
> - vld vr1, sp, 1*SZVREG
> - vld vr2, sp, 2*SZVREG
> - vld vr3, sp, 3*SZVREG
> - vld vr4, sp, 4*SZVREG
> - vld vr5, sp, 5*SZVREG
> - vld vr6, sp, 6*SZVREG
> - vld vr7, sp, 7*SZVREG
> - vld vr8, sp, 8*SZVREG
> - vld vr9, sp, 9*SZVREG
> - vld vr10, sp, 10*SZVREG
> - vld vr11, sp, 11*SZVREG
> - vld vr12, sp, 12*SZVREG
> - vld vr13, sp, 13*SZVREG
> - vld vr14, sp, 14*SZVREG
> - vld vr15, sp, 15*SZVREG
> - vld vr16, sp, 16*SZVREG
> - vld vr17, sp, 17*SZVREG
> - vld vr18, sp, 18*SZVREG
> - vld vr19, sp, 19*SZVREG
> - vld vr20, sp, 20*SZVREG
> - vld vr21, sp, 21*SZVREG
> - vld vr22, sp, 22*SZVREG
> - vld vr23, sp, 23*SZVREG
> - vld vr24, sp, 24*SZVREG
> - vld vr25, sp, 25*SZVREG
> - vld vr26, sp, 26*SZVREG
> - vld vr27, sp, 27*SZVREG
> - vld vr28, sp, 28*SZVREG
> - vld vr29, sp, 29*SZVREG
> - vld vr30, sp, 30*SZVREG
> - vld vr31, sp, 31*SZVREG
> - ADDI sp, sp, FRAME_SIZE_LSX
> - b .Lfcsr
> -
> -.Lfloat1:
> - /* Restore float registers. */
> - FREG_L fa0, sp, 0*SZFREG
> - FREG_L fa1, sp, 1*SZFREG
> - FREG_L fa2, sp, 2*SZFREG
> - FREG_L fa3, sp, 3*SZFREG
> - FREG_L fa4, sp, 4*SZFREG
> - FREG_L fa5, sp, 5*SZFREG
> - FREG_L fa6, sp, 6*SZFREG
> - FREG_L fa7, sp, 7*SZFREG
> - FREG_L ft0, sp, 8*SZFREG
> - FREG_L ft1, sp, 9*SZFREG
> - FREG_L ft2, sp, 10*SZFREG
> - FREG_L ft3, sp, 11*SZFREG
> - FREG_L ft4, sp, 12*SZFREG
> - FREG_L ft5, sp, 13*SZFREG
> - FREG_L ft6, sp, 14*SZFREG
> - FREG_L ft7, sp, 15*SZFREG
> - FREG_L ft8, sp, 16*SZFREG
> - FREG_L ft9, sp, 17*SZFREG
> - FREG_L ft10, sp, 18*SZFREG
> - FREG_L ft11, sp, 19*SZFREG
> - FREG_L ft12, sp, 20*SZFREG
> - FREG_L ft13, sp, 21*SZFREG
> - FREG_L ft14, sp, 22*SZFREG
> - FREG_L ft15, sp, 23*SZFREG
> - ADDI sp, sp, FRAME_SIZE_FLOAT
> -
> -.Lfcsr:
> - /* Restore fcsr0 register. */
> - ld.w t0, sp, FRAME_SIZE + 24
> - movgr2fcsr fcsr0, t0
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
>
> #endif /* #ifndef __loongarch_soft_float */
>
> - REG_L ra, sp, 0 * SZREG
> - REG_L a1, sp, 1 * SZREG
> - REG_L a2, sp, 2 * SZREG
> - REG_L a3, sp, 3 * SZREG
> - REG_L a4, sp, 4 * SZREG
> - REG_L a5, sp, 5 * SZREG
> - REG_L a6, sp, 6 * SZREG
> - REG_L a7, sp, 7 * SZREG
> - REG_L t3, sp, 8 * SZREG
> - REG_L t4, sp, 9 * SZREG
> - REG_L t5, sp, 10 * SZREG
> - REG_L t6, sp, 11 * SZREG
> - REG_L t7, sp, 12 * SZREG
> - REG_L t8, sp, 13 * SZREG
> - ADDI sp, sp, FRAME_SIZE
> -
> - b .Lret
> - cfi_endproc
> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
> +#include "dl-tlsdesc-dynamic.h"
>
> #endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> index ff8c69cb93..45c43a5b52 100644
> --- a/sysdeps/loongarch/dl-tlsdesc.h
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>
> #ifdef SHARED
> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#ifndef __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> #endif
>
> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
> index 213d0b3074..9f80fceca6 100644
> --- a/sysdeps/loongarch/tlsdesc.sym
> +++ b/sysdeps/loongarch/tlsdesc.sym
> @@ -4,12 +4,6 @@
> #include <link.h>
> #include <dl-tlsdesc.h>
>
> -#define SHARED 1
> -
> -#include <ldsodefs.h>
> -
> -#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
> -
> --
>
> -- Abuse tls.h macros to derive offsets relative to the thread register.
> @@ -23,6 +17,3 @@ DTV_COUNTER offsetof(dtv_t, counter)
> TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
> TLS_DTV_OFFSET TLS_DTV_OFFSET
> SIZE_OF_TCB sizeof(tcbhead_t)
> -GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap)
> -HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX
> -HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX
@@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
{
td->arg = _dl_make_tlsdesc_dynamic (sym_map,
sym->st_value + reloc->r_addend);
+# ifndef __loongarch_soft_float
+ if (RTLD_SUPPORT_LASX)
+ td->entry = _dl_tlsdesc_dynamic_lasx;
+ else if (RTLD_SUPPORT_LSX)
+ td->entry = _dl_tlsdesc_dynamic_lsx;
+ else
+# endif
td->entry = _dl_tlsdesc_dynamic;
}
else
new file mode 100644
@@ -0,0 +1,225 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
+#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
+#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
+#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
+
+ /* Handler for dynamic TLS symbols.
+ Prototype:
+ _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+ The second word of the descriptor points to a
+ tlsdesc_dynamic_arg structure.
+
+ Returns the offset between the thread pointer and the
+ object referenced by the argument.
+
+ ptrdiff_t
+ _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+ {
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
+ if (__glibc_likely (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val
+ + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+ } */
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_dynamic:
+ /* Save just enough registers to support fast path, if we fall
+ into slow path we will save additional registers. */
+ ADDI sp, sp, -32
+ cfi_adjust_cfa_offset (32)
+ REG_S t0, sp, 0
+ REG_S t1, sp, 8
+ REG_S t2, sp, 16
+ cfi_rel_offset (12, 0)
+ cfi_rel_offset (13, 8)
+ cfi_rel_offset (14, 16)
+
+/* Runtime Storage Layout of Thread-Local Storage
+ TP point to the start of TLS block.
+
+ dtv
+Low address TCB ----------------> dtv0(counter)
+ TP --> static_block0 <----- dtv1
+ static_block1 <----- dtv2
+ static_block2 <----- dtv3
+ dynamic_block0 <----- dtv4
+Hign address dynamic_block1 <----- dtv5 */
+
+ REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
+ REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
+ REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
+ REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
+ /* If dtv[0].counter < td->gen_count, goto slow path. */
+ bltu t2, t1, .Lslow
+
+ REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
+ /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
+ slli.d t1, t1, 4
+ add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
+ REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
+ li.d t2, TLS_DTV_UNALLOCATED
+ /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
+ goto slow path. */
+ beq t1, t2, .Lslow
+
+ cfi_remember_state
+ REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
+ /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
+ add.d a0, t1, t2
+.Lret:
+ sub.d a0, a0, tp
+ REG_L t0, sp, 0
+ REG_L t1, sp, 8
+ REG_L t2, sp, 16
+ ADDI sp, sp, 32
+ cfi_adjust_cfa_offset (-32)
+ RET
+
+.Lslow:
+ /* This is the slow path. We need to call __tls_get_addr() which
+ means we need to save and restore all the register that the
+ callee will trash. */
+
+ /* Save the remaining registers that we must treat as caller save. */
+ cfi_restore_state
+ ADDI sp, sp, -FRAME_SIZE
+ cfi_adjust_cfa_offset (FRAME_SIZE)
+ REG_S ra, sp, 0 * SZREG
+ REG_S a1, sp, 1 * SZREG
+ REG_S a2, sp, 2 * SZREG
+ REG_S a3, sp, 3 * SZREG
+ REG_S a4, sp, 4 * SZREG
+ REG_S a5, sp, 5 * SZREG
+ REG_S a6, sp, 6 * SZREG
+ REG_S a7, sp, 7 * SZREG
+ REG_S t3, sp, 8 * SZREG
+ REG_S t4, sp, 9 * SZREG
+ REG_S t5, sp, 10 * SZREG
+ REG_S t6, sp, 11 * SZREG
+ REG_S t7, sp, 12 * SZREG
+ REG_S t8, sp, 13 * SZREG
+ cfi_rel_offset (1, 0 * SZREG)
+ cfi_rel_offset (5, 1 * SZREG)
+ cfi_rel_offset (6, 2 * SZREG)
+ cfi_rel_offset (7, 3 * SZREG)
+ cfi_rel_offset (8, 4 * SZREG)
+ cfi_rel_offset (9, 5 * SZREG)
+ cfi_rel_offset (10, 6 * SZREG)
+ cfi_rel_offset (11, 7 * SZREG)
+ cfi_rel_offset (15, 8 * SZREG)
+ cfi_rel_offset (16, 9 * SZREG)
+ cfi_rel_offset (17, 10 * SZREG)
+ cfi_rel_offset (18, 11 * SZREG)
+ cfi_rel_offset (19, 12 * SZREG)
+ cfi_rel_offset (20, 13 * SZREG)
+
+#ifndef __loongarch_soft_float
+
+ /* Save fcsr0 register.
+ Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
+ of some fields in fcsr0. */
+ movfcsr2gr t0, fcsr0
+ st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */
+
+#ifdef USE_LASX
+ #define V_REG_S xvst
+ #define V_REG_L xvld
+ #define V_SPACE FRAME_SIZE_LASX
+ #define V_REG(n) $xr##n
+ #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ #define V_REGSZ SZXREG
+#elif defined USE_LSX
+ #define V_REG_S vst
+ #define V_REG_L vld
+ #define V_SPACE FRAME_SIZE_LSX
+ #define V_REG(n) $vr##n
+ #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ #define V_REGSZ SZVREG
+#else
+ #define V_REG_S fst.d
+ #define V_REG_L fld.d
+ #define V_SPACE FRAME_SIZE_FLOAT
+ #define V_REG(n) $f##n
+ #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+ #define V_REGSZ SZFREG
+#endif
+
+ ADDI sp, sp, -V_SPACE
+ cfi_adjust_cfa_offset (V_SPACE)
+ .irp i,V_REGS
+ V_REG_S V_REG(\i), sp, \i * V_REGSZ
+ .endr
+
+#endif /* #ifndef __loongarch_soft_float */
+
+ bl HIDDEN_JUMPTARGET(__tls_get_addr)
+ ADDI a0, a0, -TLS_DTV_OFFSET
+
+#ifndef __loongarch_soft_float
+
+ .irp i,V_REGS
+ V_REG_L V_REG(\i), sp, \i * V_REGSZ
+ .endr
+ ADDI sp, sp, V_SPACE
+ cfi_adjust_cfa_offset (-V_SPACE)
+
+ /* Restore fcsr0 register. */
+ ld.w t0, sp, FRAME_SIZE + 24
+ movgr2fcsr fcsr0, t0
+
+#endif /* #ifndef __loongarch_soft_float */
+
+ REG_L ra, sp, 0 * SZREG
+ REG_L a1, sp, 1 * SZREG
+ REG_L a2, sp, 2 * SZREG
+ REG_L a3, sp, 3 * SZREG
+ REG_L a4, sp, 4 * SZREG
+ REG_L a5, sp, 5 * SZREG
+ REG_L a6, sp, 6 * SZREG
+ REG_L a7, sp, 7 * SZREG
+ REG_L t3, sp, 8 * SZREG
+ REG_L t4, sp, 9 * SZREG
+ REG_L t5, sp, 10 * SZREG
+ REG_L t6, sp, 11 * SZREG
+ REG_L t7, sp, 12 * SZREG
+ REG_L t8, sp, 13 * SZREG
+ ADDI sp, sp, FRAME_SIZE
+ cfi_adjust_cfa_offset (-FRAME_SIZE)
+
+ b .Lret
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+ .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
@@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
cfi_endproc
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
-
#ifdef SHARED
-#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
-#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
-#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
-#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
-
- /* Handler for dynamic TLS symbols.
- Prototype:
- _dl_tlsdesc_dynamic (tlsdesc *) ;
-
- The second word of the descriptor points to a
- tlsdesc_dynamic_arg structure.
-
- Returns the offset between the thread pointer and the
- object referenced by the argument.
-
- ptrdiff_t
- _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
- {
- struct tlsdesc_dynamic_arg *td = tdp->arg;
- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
- if (__glibc_likely (td->gen_count <= dtv[0].counter
- && (dtv[td->tlsinfo.ti_module].pointer.val
- != TLS_DTV_UNALLOCATED),
- 1))
- return dtv[td->tlsinfo.ti_module].pointer.val
- + td->tlsinfo.ti_offset
- - __thread_pointer;
-
- return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
- } */
- .hidden _dl_tlsdesc_dynamic
- .global _dl_tlsdesc_dynamic
- .type _dl_tlsdesc_dynamic,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_dynamic:
- /* Save just enough registers to support fast path, if we fall
- into slow path we will save additional registers. */
- ADDI sp, sp, -32
- REG_S t0, sp, 0
- REG_S t1, sp, 8
- REG_S t2, sp, 16
-
-/* Runtime Storage Layout of Thread-Local Storage
- TP point to the start of TLS block.
-
- dtv
-Low address TCB ----------------> dtv0(counter)
- TP --> static_block0 <----- dtv1
- static_block1 <----- dtv2
- static_block2 <----- dtv3
- dynamic_block0 <----- dtv4
-Hign address dynamic_block1 <----- dtv5 */
-
- REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
- REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
- REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
- REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
- /* If dtv[0].counter < td->gen_count, goto slow path. */
- bltu t2, t1, .Lslow
-
- REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
- /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
- slli.d t1, t1, 4
- add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
- REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
- li.d t2, TLS_DTV_UNALLOCATED
- /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
- goto slow path. */
- beq t1, t2, .Lslow
-
- REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
- /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
- add.d a0, t1, t2
-.Lret:
- sub.d a0, a0, tp
- REG_L t0, sp, 0
- REG_L t1, sp, 8
- REG_L t2, sp, 16
- ADDI sp, sp, 32
- RET
-
-.Lslow:
- /* This is the slow path. We need to call __tls_get_addr() which
- means we need to save and restore all the register that the
- callee will trash. */
-
- /* Save the remaining registers that we must treat as caller save. */
- ADDI sp, sp, -FRAME_SIZE
- REG_S ra, sp, 0 * SZREG
- REG_S a1, sp, 1 * SZREG
- REG_S a2, sp, 2 * SZREG
- REG_S a3, sp, 3 * SZREG
- REG_S a4, sp, 4 * SZREG
- REG_S a5, sp, 5 * SZREG
- REG_S a6, sp, 6 * SZREG
- REG_S a7, sp, 7 * SZREG
- REG_S t3, sp, 8 * SZREG
- REG_S t4, sp, 9 * SZREG
- REG_S t5, sp, 10 * SZREG
- REG_S t6, sp, 11 * SZREG
- REG_S t7, sp, 12 * SZREG
- REG_S t8, sp, 13 * SZREG
-
#ifndef __loongarch_soft_float
- /* Save fcsr0 register.
- Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
- of some fields in fcsr0. */
- movfcsr2gr t0, fcsr0
- st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
-
- /* Whether support LASX. */
- la.global t0, _rtld_global_ro
- REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
- andi t1, t0, HWCAP_LOONGARCH_LASX
- beqz t1, .Llsx
-
- /* Save 256-bit vector registers.
- FIXME: Without vector ABI, save all vector registers. */
- ADDI sp, sp, -FRAME_SIZE_LASX
- xvst xr0, sp, 0*SZXREG
- xvst xr1, sp, 1*SZXREG
- xvst xr2, sp, 2*SZXREG
- xvst xr3, sp, 3*SZXREG
- xvst xr4, sp, 4*SZXREG
- xvst xr5, sp, 5*SZXREG
- xvst xr6, sp, 6*SZXREG
- xvst xr7, sp, 7*SZXREG
- xvst xr8, sp, 8*SZXREG
- xvst xr9, sp, 9*SZXREG
- xvst xr10, sp, 10*SZXREG
- xvst xr11, sp, 11*SZXREG
- xvst xr12, sp, 12*SZXREG
- xvst xr13, sp, 13*SZXREG
- xvst xr14, sp, 14*SZXREG
- xvst xr15, sp, 15*SZXREG
- xvst xr16, sp, 16*SZXREG
- xvst xr17, sp, 17*SZXREG
- xvst xr18, sp, 18*SZXREG
- xvst xr19, sp, 19*SZXREG
- xvst xr20, sp, 20*SZXREG
- xvst xr21, sp, 21*SZXREG
- xvst xr22, sp, 22*SZXREG
- xvst xr23, sp, 23*SZXREG
- xvst xr24, sp, 24*SZXREG
- xvst xr25, sp, 25*SZXREG
- xvst xr26, sp, 26*SZXREG
- xvst xr27, sp, 27*SZXREG
- xvst xr28, sp, 28*SZXREG
- xvst xr29, sp, 29*SZXREG
- xvst xr30, sp, 30*SZXREG
- xvst xr31, sp, 31*SZXREG
- b .Ltga
-
-.Llsx:
- /* Whether support LSX. */
- andi t1, t0, HWCAP_LOONGARCH_LSX
- beqz t1, .Lfloat
-
- /* Save 128-bit vector registers. */
- ADDI sp, sp, -FRAME_SIZE_LSX
- vst vr0, sp, 0*SZVREG
- vst vr1, sp, 1*SZVREG
- vst vr2, sp, 2*SZVREG
- vst vr3, sp, 3*SZVREG
- vst vr4, sp, 4*SZVREG
- vst vr5, sp, 5*SZVREG
- vst vr6, sp, 6*SZVREG
- vst vr7, sp, 7*SZVREG
- vst vr8, sp, 8*SZVREG
- vst vr9, sp, 9*SZVREG
- vst vr10, sp, 10*SZVREG
- vst vr11, sp, 11*SZVREG
- vst vr12, sp, 12*SZVREG
- vst vr13, sp, 13*SZVREG
- vst vr14, sp, 14*SZVREG
- vst vr15, sp, 15*SZVREG
- vst vr16, sp, 16*SZVREG
- vst vr17, sp, 17*SZVREG
- vst vr18, sp, 18*SZVREG
- vst vr19, sp, 19*SZVREG
- vst vr20, sp, 20*SZVREG
- vst vr21, sp, 21*SZVREG
- vst vr22, sp, 22*SZVREG
- vst vr23, sp, 23*SZVREG
- vst vr24, sp, 24*SZVREG
- vst vr25, sp, 25*SZVREG
- vst vr26, sp, 26*SZVREG
- vst vr27, sp, 27*SZVREG
- vst vr28, sp, 28*SZVREG
- vst vr29, sp, 29*SZVREG
- vst vr30, sp, 30*SZVREG
- vst vr31, sp, 31*SZVREG
- b .Ltga
-
-.Lfloat:
- /* Save float registers. */
- ADDI sp, sp, -FRAME_SIZE_FLOAT
- FREG_S fa0, sp, 0*SZFREG
- FREG_S fa1, sp, 1*SZFREG
- FREG_S fa2, sp, 2*SZFREG
- FREG_S fa3, sp, 3*SZFREG
- FREG_S fa4, sp, 4*SZFREG
- FREG_S fa5, sp, 5*SZFREG
- FREG_S fa6, sp, 6*SZFREG
- FREG_S fa7, sp, 7*SZFREG
- FREG_S ft0, sp, 8*SZFREG
- FREG_S ft1, sp, 9*SZFREG
- FREG_S ft2, sp, 10*SZFREG
- FREG_S ft3, sp, 11*SZFREG
- FREG_S ft4, sp, 12*SZFREG
- FREG_S ft5, sp, 13*SZFREG
- FREG_S ft6, sp, 14*SZFREG
- FREG_S ft7, sp, 15*SZFREG
- FREG_S ft8, sp, 16*SZFREG
- FREG_S ft9, sp, 17*SZFREG
- FREG_S ft10, sp, 18*SZFREG
- FREG_S ft11, sp, 19*SZFREG
- FREG_S ft12, sp, 20*SZFREG
- FREG_S ft13, sp, 21*SZFREG
- FREG_S ft14, sp, 22*SZFREG
- FREG_S ft15, sp, 23*SZFREG
-
-#endif /* #ifndef __loongarch_soft_float */
-
-.Ltga:
- bl HIDDEN_JUMPTARGET(__tls_get_addr)
- ADDI a0, a0, -TLS_DTV_OFFSET
-
-#ifndef __loongarch_soft_float
-
- la.global t0, _rtld_global_ro
- REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
- andi t1, t0, HWCAP_LOONGARCH_LASX
- beqz t1, .Llsx1
-
- /* Restore 256-bit vector registers. */
- xvld xr0, sp, 0*SZXREG
- xvld xr1, sp, 1*SZXREG
- xvld xr2, sp, 2*SZXREG
- xvld xr3, sp, 3*SZXREG
- xvld xr4, sp, 4*SZXREG
- xvld xr5, sp, 5*SZXREG
- xvld xr6, sp, 6*SZXREG
- xvld xr7, sp, 7*SZXREG
- xvld xr8, sp, 8*SZXREG
- xvld xr9, sp, 9*SZXREG
- xvld xr10, sp, 10*SZXREG
- xvld xr11, sp, 11*SZXREG
- xvld xr12, sp, 12*SZXREG
- xvld xr13, sp, 13*SZXREG
- xvld xr14, sp, 14*SZXREG
- xvld xr15, sp, 15*SZXREG
- xvld xr16, sp, 16*SZXREG
- xvld xr17, sp, 17*SZXREG
- xvld xr18, sp, 18*SZXREG
- xvld xr19, sp, 19*SZXREG
- xvld xr20, sp, 20*SZXREG
- xvld xr21, sp, 21*SZXREG
- xvld xr22, sp, 22*SZXREG
- xvld xr23, sp, 23*SZXREG
- xvld xr24, sp, 24*SZXREG
- xvld xr25, sp, 25*SZXREG
- xvld xr26, sp, 26*SZXREG
- xvld xr27, sp, 27*SZXREG
- xvld xr28, sp, 28*SZXREG
- xvld xr29, sp, 29*SZXREG
- xvld xr30, sp, 30*SZXREG
- xvld xr31, sp, 31*SZXREG
- ADDI sp, sp, FRAME_SIZE_LASX
- b .Lfcsr
-
-.Llsx1:
- andi t1, t0, HWCAP_LOONGARCH_LSX
- beqz t1, .Lfloat1
-
- /* Restore 128-bit vector registers. */
- vld vr0, sp, 0*SZVREG
- vld vr1, sp, 1*SZVREG
- vld vr2, sp, 2*SZVREG
- vld vr3, sp, 3*SZVREG
- vld vr4, sp, 4*SZVREG
- vld vr5, sp, 5*SZVREG
- vld vr6, sp, 6*SZVREG
- vld vr7, sp, 7*SZVREG
- vld vr8, sp, 8*SZVREG
- vld vr9, sp, 9*SZVREG
- vld vr10, sp, 10*SZVREG
- vld vr11, sp, 11*SZVREG
- vld vr12, sp, 12*SZVREG
- vld vr13, sp, 13*SZVREG
- vld vr14, sp, 14*SZVREG
- vld vr15, sp, 15*SZVREG
- vld vr16, sp, 16*SZVREG
- vld vr17, sp, 17*SZVREG
- vld vr18, sp, 18*SZVREG
- vld vr19, sp, 19*SZVREG
- vld vr20, sp, 20*SZVREG
- vld vr21, sp, 21*SZVREG
- vld vr22, sp, 22*SZVREG
- vld vr23, sp, 23*SZVREG
- vld vr24, sp, 24*SZVREG
- vld vr25, sp, 25*SZVREG
- vld vr26, sp, 26*SZVREG
- vld vr27, sp, 27*SZVREG
- vld vr28, sp, 28*SZVREG
- vld vr29, sp, 29*SZVREG
- vld vr30, sp, 30*SZVREG
- vld vr31, sp, 31*SZVREG
- ADDI sp, sp, FRAME_SIZE_LSX
- b .Lfcsr
-
-.Lfloat1:
- /* Restore float registers. */
- FREG_L fa0, sp, 0*SZFREG
- FREG_L fa1, sp, 1*SZFREG
- FREG_L fa2, sp, 2*SZFREG
- FREG_L fa3, sp, 3*SZFREG
- FREG_L fa4, sp, 4*SZFREG
- FREG_L fa5, sp, 5*SZFREG
- FREG_L fa6, sp, 6*SZFREG
- FREG_L fa7, sp, 7*SZFREG
- FREG_L ft0, sp, 8*SZFREG
- FREG_L ft1, sp, 9*SZFREG
- FREG_L ft2, sp, 10*SZFREG
- FREG_L ft3, sp, 11*SZFREG
- FREG_L ft4, sp, 12*SZFREG
- FREG_L ft5, sp, 13*SZFREG
- FREG_L ft6, sp, 14*SZFREG
- FREG_L ft7, sp, 15*SZFREG
- FREG_L ft8, sp, 16*SZFREG
- FREG_L ft9, sp, 17*SZFREG
- FREG_L ft10, sp, 18*SZFREG
- FREG_L ft11, sp, 19*SZFREG
- FREG_L ft12, sp, 20*SZFREG
- FREG_L ft13, sp, 21*SZFREG
- FREG_L ft14, sp, 22*SZFREG
- FREG_L ft15, sp, 23*SZFREG
- ADDI sp, sp, FRAME_SIZE_FLOAT
-
-.Lfcsr:
- /* Restore fcsr0 register. */
- ld.w t0, sp, FRAME_SIZE + 24
- movgr2fcsr fcsr0, t0
+#define USE_LASX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
+#define Lret Lret_lasx
+#define Lslow Lslow_lasx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LASX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#define USE_LSX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
+#define Lret Lret_lsx
+#define Lslow Lslow_lsx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LSX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
#endif /* #ifndef __loongarch_soft_float */
- REG_L ra, sp, 0 * SZREG
- REG_L a1, sp, 1 * SZREG
- REG_L a2, sp, 2 * SZREG
- REG_L a3, sp, 3 * SZREG
- REG_L a4, sp, 4 * SZREG
- REG_L a5, sp, 5 * SZREG
- REG_L a6, sp, 6 * SZREG
- REG_L a7, sp, 7 * SZREG
- REG_L t3, sp, 8 * SZREG
- REG_L t4, sp, 9 * SZREG
- REG_L t5, sp, 10 * SZREG
- REG_L t6, sp, 11 * SZREG
- REG_L t7, sp, 12 * SZREG
- REG_L t8, sp, 13 * SZREG
- ADDI sp, sp, FRAME_SIZE
-
- b .Lret
- cfi_endproc
- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
- .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
+#include "dl-tlsdesc-dynamic.h"
#endif /* #ifdef SHARED */
@@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
#ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+#ifndef __loongarch_soft_float
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
+#endif
extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
#endif
@@ -4,12 +4,6 @@
#include <link.h>
#include <dl-tlsdesc.h>
-#define SHARED 1
-
-#include <ldsodefs.h>
-
-#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
-
--
-- Abuse tls.h macros to derive offsets relative to the thread register.
@@ -23,6 +17,3 @@ DTV_COUNTER offsetof(dtv_t, counter)
TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
TLS_DTV_OFFSET TLS_DTV_OFFSET
SIZE_OF_TCB sizeof(tcbhead_t)
-GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap)
-HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX
-HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX