[v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
redhat-pt-bot/TryBot-still_applies |
warning
|
Patch no longer applies to master
|
Commit Message
Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
_dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
Conflicting cfi instructions can be distributed to the
three functions.
---
Changes v1 -> v2:
- Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
_dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
sysdeps/loongarch/dl-machine.h | 7 +
sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++
sysdeps/loongarch/dl-tlsdesc.S | 386 ++---------------------
sysdeps/loongarch/dl-tlsdesc.h | 4 +
4 files changed, 436 insertions(+), 364 deletions(-)
create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
Comments
Ping.
The reason of changing to three _dl_tlsdesc_dynamic:
In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset for
Float/LSX/LASX path.
Three cfi_adjust_cfa_offset are always executed in stack unwinding, but
only once stack down
instruction is executed. It resulting in incorrect CFA address.
With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset
can be distributed to three functions.
So cfi instructions can correspond to stack down instructions.
在 2024/6/26 下午2:34, mengqinggang 写道:
> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
> Conflicting cfi instructions can be distributed to the
> three functions.
> ---
> Changes v1 -> v2:
> - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
>
> sysdeps/loongarch/dl-machine.h | 7 +
> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++
> sysdeps/loongarch/dl-tlsdesc.S | 386 ++---------------------
> sysdeps/loongarch/dl-tlsdesc.h | 4 +
> 4 files changed, 436 insertions(+), 364 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab6f1da7c0..04fabbf598 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
> {
> td->arg = _dl_make_tlsdesc_dynamic (sym_map,
> sym->st_value + reloc->r_addend);
> +# ifndef __loongarch_soft_float
> + if (SUPPORT_LASX)
> + td->entry = _dl_tlsdesc_dynamic_lasx;
> + else if (SUPPORT_LSX)
> + td->entry = _dl_tlsdesc_dynamic_lsx;
> + else
> +# endif
> td->entry = _dl_tlsdesc_dynamic;
> }
> else
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..5b1f43aaf4
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,403 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
> +
> + /* Handler for dynamic TLS symbols.
> + Prototype:
> + _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> + The second word of the descriptor points to a
> + tlsdesc_dynamic_arg structure.
> +
> + Returns the offset between the thread pointer and the
> + object referenced by the argument.
> +
> + ptrdiff_t
> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> + {
> + struct tlsdesc_dynamic_arg *td = tdp->arg;
> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
> + if (__glibc_likely (td->gen_count <= dtv[0].counter
> + && (dtv[td->tlsinfo.ti_module].pointer.val
> + != TLS_DTV_UNALLOCATED),
> + 1))
> + return dtv[td->tlsinfo.ti_module].pointer.val
> + + td->tlsinfo.ti_offset
> + - __thread_pointer;
> +
> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> + } */
> + .hidden _dl_tlsdesc_dynamic
> + .global _dl_tlsdesc_dynamic
> + .type _dl_tlsdesc_dynamic,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_dynamic:
> + /* Save just enough registers to support fast path, if we fall
> + into slow path we will save additional registers. */
> + ADDI sp, sp, -32
> + cfi_adjust_cfa_offset (32)
> + REG_S t0, sp, 0
> + REG_S t1, sp, 8
> + REG_S t2, sp, 16
> + cfi_rel_offset (12, 0)
> + cfi_rel_offset (13, 8)
> + cfi_rel_offset (14, 16)
> +
> +/* Runtime Storage Layout of Thread-Local Storage
> + TP point to the start of TLS block.
> +
> + dtv
> +Low address TCB ----------------> dtv0(counter)
> + TP --> static_block0 <----- dtv1
> + static_block1 <----- dtv2
> + static_block2 <----- dtv3
> + dynamic_block0 <----- dtv4
> +Hign address dynamic_block1 <----- dtv5 */
> +
> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
> + /* If dtv[0].counter < td->gen_count, goto slow path. */
> + bltu t2, t1, .Lslow
> +
> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
> + slli.d t1, t1, 4
> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
> + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
> + li.d t2, TLS_DTV_UNALLOCATED
> + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
> + goto slow path. */
> + beq t1, t2, .Lslow
> +
> + cfi_remember_state
> + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
> + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
> + add.d a0, t1, t2
> +.Lret:
> + sub.d a0, a0, tp
> + REG_L t0, sp, 0
> + REG_L t1, sp, 8
> + REG_L t2, sp, 16
> + ADDI sp, sp, 32
> + cfi_adjust_cfa_offset (-32)
> + RET
> +
> +.Lslow:
> + /* This is the slow path. We need to call __tls_get_addr() which
> + means we need to save and restore all the register that the
> + callee will trash. */
> +
> + /* Save the remaining registers that we must treat as caller save. */
> + cfi_restore_state
> + ADDI sp, sp, -FRAME_SIZE
> + cfi_adjust_cfa_offset (FRAME_SIZE)
> + REG_S ra, sp, 0 * SZREG
> + REG_S a1, sp, 1 * SZREG
> + REG_S a2, sp, 2 * SZREG
> + REG_S a3, sp, 3 * SZREG
> + REG_S a4, sp, 4 * SZREG
> + REG_S a5, sp, 5 * SZREG
> + REG_S a6, sp, 6 * SZREG
> + REG_S a7, sp, 7 * SZREG
> + REG_S t3, sp, 8 * SZREG
> + REG_S t4, sp, 9 * SZREG
> + REG_S t5, sp, 10 * SZREG
> + REG_S t6, sp, 11 * SZREG
> + REG_S t7, sp, 12 * SZREG
> + REG_S t8, sp, 13 * SZREG
> + cfi_rel_offset (1, 0 * SZREG)
> + cfi_rel_offset (5, 1 * SZREG)
> + cfi_rel_offset (6, 2 * SZREG)
> + cfi_rel_offset (7, 3 * SZREG)
> + cfi_rel_offset (8, 4 * SZREG)
> + cfi_rel_offset (9, 5 * SZREG)
> + cfi_rel_offset (10, 6 * SZREG)
> + cfi_rel_offset (11, 7 * SZREG)
> + cfi_rel_offset (15, 8 * SZREG)
> + cfi_rel_offset (16, 9 * SZREG)
> + cfi_rel_offset (17, 10 * SZREG)
> + cfi_rel_offset (18, 11 * SZREG)
> + cfi_rel_offset (19, 12 * SZREG)
> + cfi_rel_offset (20, 13 * SZREG)
> +
> +#ifndef __loongarch_soft_float
> +
> + /* Save fcsr0 register.
> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
> + of some fields in fcsr0. */
> + movfcsr2gr t0, fcsr0
> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */
> +
> +#ifdef USE_LASX
> +
> + /* Save 256-bit vector registers.
> + FIXME: Without vector ABI, save all vector registers. */
> + ADDI sp, sp, -FRAME_SIZE_LASX
> + cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
> + xvst xr0, sp, 0*SZXREG
> + xvst xr1, sp, 1*SZXREG
> + xvst xr2, sp, 2*SZXREG
> + xvst xr3, sp, 3*SZXREG
> + xvst xr4, sp, 4*SZXREG
> + xvst xr5, sp, 5*SZXREG
> + xvst xr6, sp, 6*SZXREG
> + xvst xr7, sp, 7*SZXREG
> + xvst xr8, sp, 8*SZXREG
> + xvst xr9, sp, 9*SZXREG
> + xvst xr10, sp, 10*SZXREG
> + xvst xr11, sp, 11*SZXREG
> + xvst xr12, sp, 12*SZXREG
> + xvst xr13, sp, 13*SZXREG
> + xvst xr14, sp, 14*SZXREG
> + xvst xr15, sp, 15*SZXREG
> + xvst xr16, sp, 16*SZXREG
> + xvst xr17, sp, 17*SZXREG
> + xvst xr18, sp, 18*SZXREG
> + xvst xr19, sp, 19*SZXREG
> + xvst xr20, sp, 20*SZXREG
> + xvst xr21, sp, 21*SZXREG
> + xvst xr22, sp, 22*SZXREG
> + xvst xr23, sp, 23*SZXREG
> + xvst xr24, sp, 24*SZXREG
> + xvst xr25, sp, 25*SZXREG
> + xvst xr26, sp, 26*SZXREG
> + xvst xr27, sp, 27*SZXREG
> + xvst xr28, sp, 28*SZXREG
> + xvst xr29, sp, 29*SZXREG
> + xvst xr30, sp, 30*SZXREG
> + xvst xr31, sp, 31*SZXREG
> +
> +#elif defined USE_LSX
> +
> + /* Save 128-bit vector registers. */
> + ADDI sp, sp, -FRAME_SIZE_LSX
> + cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
> + vst vr0, sp, 0*SZVREG
> + vst vr1, sp, 1*SZVREG
> + vst vr2, sp, 2*SZVREG
> + vst vr3, sp, 3*SZVREG
> + vst vr4, sp, 4*SZVREG
> + vst vr5, sp, 5*SZVREG
> + vst vr6, sp, 6*SZVREG
> + vst vr7, sp, 7*SZVREG
> + vst vr8, sp, 8*SZVREG
> + vst vr9, sp, 9*SZVREG
> + vst vr10, sp, 10*SZVREG
> + vst vr11, sp, 11*SZVREG
> + vst vr12, sp, 12*SZVREG
> + vst vr13, sp, 13*SZVREG
> + vst vr14, sp, 14*SZVREG
> + vst vr15, sp, 15*SZVREG
> + vst vr16, sp, 16*SZVREG
> + vst vr17, sp, 17*SZVREG
> + vst vr18, sp, 18*SZVREG
> + vst vr19, sp, 19*SZVREG
> + vst vr20, sp, 20*SZVREG
> + vst vr21, sp, 21*SZVREG
> + vst vr22, sp, 22*SZVREG
> + vst vr23, sp, 23*SZVREG
> + vst vr24, sp, 24*SZVREG
> + vst vr25, sp, 25*SZVREG
> + vst vr26, sp, 26*SZVREG
> + vst vr27, sp, 27*SZVREG
> + vst vr28, sp, 28*SZVREG
> + vst vr29, sp, 29*SZVREG
> + vst vr30, sp, 30*SZVREG
> + vst vr31, sp, 31*SZVREG
> +
> +# else
> +
> + /* Save float registers. */
> + ADDI sp, sp, -FRAME_SIZE_FLOAT
> + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
> + FREG_S fa0, sp, 0*SZFREG
> + FREG_S fa1, sp, 1*SZFREG
> + FREG_S fa2, sp, 2*SZFREG
> + FREG_S fa3, sp, 3*SZFREG
> + FREG_S fa4, sp, 4*SZFREG
> + FREG_S fa5, sp, 5*SZFREG
> + FREG_S fa6, sp, 6*SZFREG
> + FREG_S fa7, sp, 7*SZFREG
> + FREG_S ft0, sp, 8*SZFREG
> + FREG_S ft1, sp, 9*SZFREG
> + FREG_S ft2, sp, 10*SZFREG
> + FREG_S ft3, sp, 11*SZFREG
> + FREG_S ft4, sp, 12*SZFREG
> + FREG_S ft5, sp, 13*SZFREG
> + FREG_S ft6, sp, 14*SZFREG
> + FREG_S ft7, sp, 15*SZFREG
> + FREG_S ft8, sp, 16*SZFREG
> + FREG_S ft9, sp, 17*SZFREG
> + FREG_S ft10, sp, 18*SZFREG
> + FREG_S ft11, sp, 19*SZFREG
> + FREG_S ft12, sp, 20*SZFREG
> + FREG_S ft13, sp, 21*SZFREG
> + FREG_S ft14, sp, 22*SZFREG
> + FREG_S ft15, sp, 23*SZFREG
> +
> +#endif /* #ifdef USE_LASX */
> +#endif /* #ifndef __loongarch_soft_float */
> +
> + bl HIDDEN_JUMPTARGET(__tls_get_addr)
> + ADDI a0, a0, -TLS_DTV_OFFSET
> +
> +#ifndef __loongarch_soft_float
> +#ifdef USE_LASX
> +
> + /* Restore 256-bit vector registers. */
> + xvld xr0, sp, 0*SZXREG
> + xvld xr1, sp, 1*SZXREG
> + xvld xr2, sp, 2*SZXREG
> + xvld xr3, sp, 3*SZXREG
> + xvld xr4, sp, 4*SZXREG
> + xvld xr5, sp, 5*SZXREG
> + xvld xr6, sp, 6*SZXREG
> + xvld xr7, sp, 7*SZXREG
> + xvld xr8, sp, 8*SZXREG
> + xvld xr9, sp, 9*SZXREG
> + xvld xr10, sp, 10*SZXREG
> + xvld xr11, sp, 11*SZXREG
> + xvld xr12, sp, 12*SZXREG
> + xvld xr13, sp, 13*SZXREG
> + xvld xr14, sp, 14*SZXREG
> + xvld xr15, sp, 15*SZXREG
> + xvld xr16, sp, 16*SZXREG
> + xvld xr17, sp, 17*SZXREG
> + xvld xr18, sp, 18*SZXREG
> + xvld xr19, sp, 19*SZXREG
> + xvld xr20, sp, 20*SZXREG
> + xvld xr21, sp, 21*SZXREG
> + xvld xr22, sp, 22*SZXREG
> + xvld xr23, sp, 23*SZXREG
> + xvld xr24, sp, 24*SZXREG
> + xvld xr25, sp, 25*SZXREG
> + xvld xr26, sp, 26*SZXREG
> + xvld xr27, sp, 27*SZXREG
> + xvld xr28, sp, 28*SZXREG
> + xvld xr29, sp, 29*SZXREG
> + xvld xr30, sp, 30*SZXREG
> + xvld xr31, sp, 31*SZXREG
> + ADDI sp, sp, FRAME_SIZE_LASX
> + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
> +
> +#elif defined USE_LSX
> +
> + /* Restore 128-bit vector registers. */
> + vld vr0, sp, 0*SZVREG
> + vld vr1, sp, 1*SZVREG
> + vld vr2, sp, 2*SZVREG
> + vld vr3, sp, 3*SZVREG
> + vld vr4, sp, 4*SZVREG
> + vld vr5, sp, 5*SZVREG
> + vld vr6, sp, 6*SZVREG
> + vld vr7, sp, 7*SZVREG
> + vld vr8, sp, 8*SZVREG
> + vld vr9, sp, 9*SZVREG
> + vld vr10, sp, 10*SZVREG
> + vld vr11, sp, 11*SZVREG
> + vld vr12, sp, 12*SZVREG
> + vld vr13, sp, 13*SZVREG
> + vld vr14, sp, 14*SZVREG
> + vld vr15, sp, 15*SZVREG
> + vld vr16, sp, 16*SZVREG
> + vld vr17, sp, 17*SZVREG
> + vld vr18, sp, 18*SZVREG
> + vld vr19, sp, 19*SZVREG
> + vld vr20, sp, 20*SZVREG
> + vld vr21, sp, 21*SZVREG
> + vld vr22, sp, 22*SZVREG
> + vld vr23, sp, 23*SZVREG
> + vld vr24, sp, 24*SZVREG
> + vld vr25, sp, 25*SZVREG
> + vld vr26, sp, 26*SZVREG
> + vld vr27, sp, 27*SZVREG
> + vld vr28, sp, 28*SZVREG
> + vld vr29, sp, 29*SZVREG
> + vld vr30, sp, 30*SZVREG
> + vld vr31, sp, 31*SZVREG
> + ADDI sp, sp, FRAME_SIZE_LSX
> + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
> +
> +#else
> +
> + /* Restore float registers. */
> + FREG_L fa0, sp, 0*SZFREG
> + FREG_L fa1, sp, 1*SZFREG
> + FREG_L fa2, sp, 2*SZFREG
> + FREG_L fa3, sp, 3*SZFREG
> + FREG_L fa4, sp, 4*SZFREG
> + FREG_L fa5, sp, 5*SZFREG
> + FREG_L fa6, sp, 6*SZFREG
> + FREG_L fa7, sp, 7*SZFREG
> + FREG_L ft0, sp, 8*SZFREG
> + FREG_L ft1, sp, 9*SZFREG
> + FREG_L ft2, sp, 10*SZFREG
> + FREG_L ft3, sp, 11*SZFREG
> + FREG_L ft4, sp, 12*SZFREG
> + FREG_L ft5, sp, 13*SZFREG
> + FREG_L ft6, sp, 14*SZFREG
> + FREG_L ft7, sp, 15*SZFREG
> + FREG_L ft8, sp, 16*SZFREG
> + FREG_L ft9, sp, 17*SZFREG
> + FREG_L ft10, sp, 18*SZFREG
> + FREG_L ft11, sp, 19*SZFREG
> + FREG_L ft12, sp, 20*SZFREG
> + FREG_L ft13, sp, 21*SZFREG
> + FREG_L ft14, sp, 22*SZFREG
> + FREG_L ft15, sp, 23*SZFREG
> + ADDI sp, sp, FRAME_SIZE_FLOAT
> + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
> +
> +#endif /* #ifdef USE_LASX */
> +
> + /* Restore fcsr0 register. */
> + ld.w t0, sp, FRAME_SIZE + 24
> + movgr2fcsr fcsr0, t0
> +
> +#endif /* #ifndef __loongarch_soft_float */
> +
> + REG_L ra, sp, 0 * SZREG
> + REG_L a1, sp, 1 * SZREG
> + REG_L a2, sp, 2 * SZREG
> + REG_L a3, sp, 3 * SZREG
> + REG_L a4, sp, 4 * SZREG
> + REG_L a5, sp, 5 * SZREG
> + REG_L a6, sp, 6 * SZREG
> + REG_L a7, sp, 7 * SZREG
> + REG_L t3, sp, 8 * SZREG
> + REG_L t4, sp, 9 * SZREG
> + REG_L t5, sp, 10 * SZREG
> + REG_L t6, sp, 11 * SZREG
> + REG_L t7, sp, 12 * SZREG
> + REG_L t8, sp, 13 * SZREG
> + ADDI sp, sp, FRAME_SIZE
> + cfi_adjust_cfa_offset (-FRAME_SIZE)
> +
> + b .Lret
> + cfi_endproc
> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> index a6627cc754..b6cfd6121d 100644
> --- a/sysdeps/loongarch/dl-tlsdesc.S
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
> cfi_endproc
> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>
> -
> #ifdef SHARED
>
> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
> -
> - /* Handler for dynamic TLS symbols.
> - Prototype:
> - _dl_tlsdesc_dynamic (tlsdesc *) ;
> -
> - The second word of the descriptor points to a
> - tlsdesc_dynamic_arg structure.
> -
> - Returns the offset between the thread pointer and the
> - object referenced by the argument.
> -
> - ptrdiff_t
> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> - {
> - struct tlsdesc_dynamic_arg *td = tdp->arg;
> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
> - if (__glibc_likely (td->gen_count <= dtv[0].counter
> - && (dtv[td->tlsinfo.ti_module].pointer.val
> - != TLS_DTV_UNALLOCATED),
> - 1))
> - return dtv[td->tlsinfo.ti_module].pointer.val
> - + td->tlsinfo.ti_offset
> - - __thread_pointer;
> -
> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> - } */
> - .hidden _dl_tlsdesc_dynamic
> - .global _dl_tlsdesc_dynamic
> - .type _dl_tlsdesc_dynamic,%function
> - cfi_startproc
> - .align 2
> -_dl_tlsdesc_dynamic:
> - /* Save just enough registers to support fast path, if we fall
> - into slow path we will save additional registers. */
> - ADDI sp, sp, -32
> - REG_S t0, sp, 0
> - REG_S t1, sp, 8
> - REG_S t2, sp, 16
> -
> -/* Runtime Storage Layout of Thread-Local Storage
> - TP point to the start of TLS block.
> -
> - dtv
> -Low address TCB ----------------> dtv0(counter)
> - TP --> static_block0 <----- dtv1
> - static_block1 <----- dtv2
> - static_block2 <----- dtv3
> - dynamic_block0 <----- dtv4
> -Hign address dynamic_block1 <----- dtv5 */
> -
> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
> - /* If dtv[0].counter < td->gen_count, goto slow path. */
> - bltu t2, t1, .Lslow
> -
> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
> - slli.d t1, t1, 4
> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
> - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
> - li.d t2, TLS_DTV_UNALLOCATED
> - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
> - goto slow path. */
> - beq t1, t2, .Lslow
> -
> - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
> - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
> - add.d a0, t1, t2
> -.Lret:
> - sub.d a0, a0, tp
> - REG_L t0, sp, 0
> - REG_L t1, sp, 8
> - REG_L t2, sp, 16
> - ADDI sp, sp, 32
> - RET
> -
> -.Lslow:
> - /* This is the slow path. We need to call __tls_get_addr() which
> - means we need to save and restore all the register that the
> - callee will trash. */
> -
> - /* Save the remaining registers that we must treat as caller save. */
> - ADDI sp, sp, -FRAME_SIZE
> - REG_S ra, sp, 0 * SZREG
> - REG_S a1, sp, 1 * SZREG
> - REG_S a2, sp, 2 * SZREG
> - REG_S a3, sp, 3 * SZREG
> - REG_S a4, sp, 4 * SZREG
> - REG_S a5, sp, 5 * SZREG
> - REG_S a6, sp, 6 * SZREG
> - REG_S a7, sp, 7 * SZREG
> - REG_S t3, sp, 8 * SZREG
> - REG_S t4, sp, 9 * SZREG
> - REG_S t5, sp, 10 * SZREG
> - REG_S t6, sp, 11 * SZREG
> - REG_S t7, sp, 12 * SZREG
> - REG_S t8, sp, 13 * SZREG
> -
> #ifndef __loongarch_soft_float
>
> - /* Save fcsr0 register.
> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
> - of some fields in fcsr0. */
> - movfcsr2gr t0, fcsr0
> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
> -
> - /* Whether support LASX. */
> - la.global t0, _rtld_global_ro
> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
> - andi t1, t0, HWCAP_LOONGARCH_LASX
> - beqz t1, .Llsx
> -
> - /* Save 256-bit vector registers.
> - FIXME: Without vector ABI, save all vector registers. */
> - ADDI sp, sp, -FRAME_SIZE_LASX
> - xvst xr0, sp, 0*SZXREG
> - xvst xr1, sp, 1*SZXREG
> - xvst xr2, sp, 2*SZXREG
> - xvst xr3, sp, 3*SZXREG
> - xvst xr4, sp, 4*SZXREG
> - xvst xr5, sp, 5*SZXREG
> - xvst xr6, sp, 6*SZXREG
> - xvst xr7, sp, 7*SZXREG
> - xvst xr8, sp, 8*SZXREG
> - xvst xr9, sp, 9*SZXREG
> - xvst xr10, sp, 10*SZXREG
> - xvst xr11, sp, 11*SZXREG
> - xvst xr12, sp, 12*SZXREG
> - xvst xr13, sp, 13*SZXREG
> - xvst xr14, sp, 14*SZXREG
> - xvst xr15, sp, 15*SZXREG
> - xvst xr16, sp, 16*SZXREG
> - xvst xr17, sp, 17*SZXREG
> - xvst xr18, sp, 18*SZXREG
> - xvst xr19, sp, 19*SZXREG
> - xvst xr20, sp, 20*SZXREG
> - xvst xr21, sp, 21*SZXREG
> - xvst xr22, sp, 22*SZXREG
> - xvst xr23, sp, 23*SZXREG
> - xvst xr24, sp, 24*SZXREG
> - xvst xr25, sp, 25*SZXREG
> - xvst xr26, sp, 26*SZXREG
> - xvst xr27, sp, 27*SZXREG
> - xvst xr28, sp, 28*SZXREG
> - xvst xr29, sp, 29*SZXREG
> - xvst xr30, sp, 30*SZXREG
> - xvst xr31, sp, 31*SZXREG
> - b .Ltga
> -
> -.Llsx:
> - /* Whether support LSX. */
> - andi t1, t0, HWCAP_LOONGARCH_LSX
> - beqz t1, .Lfloat
> -
> - /* Save 128-bit vector registers. */
> - ADDI sp, sp, -FRAME_SIZE_LSX
> - vst vr0, sp, 0*SZVREG
> - vst vr1, sp, 1*SZVREG
> - vst vr2, sp, 2*SZVREG
> - vst vr3, sp, 3*SZVREG
> - vst vr4, sp, 4*SZVREG
> - vst vr5, sp, 5*SZVREG
> - vst vr6, sp, 6*SZVREG
> - vst vr7, sp, 7*SZVREG
> - vst vr8, sp, 8*SZVREG
> - vst vr9, sp, 9*SZVREG
> - vst vr10, sp, 10*SZVREG
> - vst vr11, sp, 11*SZVREG
> - vst vr12, sp, 12*SZVREG
> - vst vr13, sp, 13*SZVREG
> - vst vr14, sp, 14*SZVREG
> - vst vr15, sp, 15*SZVREG
> - vst vr16, sp, 16*SZVREG
> - vst vr17, sp, 17*SZVREG
> - vst vr18, sp, 18*SZVREG
> - vst vr19, sp, 19*SZVREG
> - vst vr20, sp, 20*SZVREG
> - vst vr21, sp, 21*SZVREG
> - vst vr22, sp, 22*SZVREG
> - vst vr23, sp, 23*SZVREG
> - vst vr24, sp, 24*SZVREG
> - vst vr25, sp, 25*SZVREG
> - vst vr26, sp, 26*SZVREG
> - vst vr27, sp, 27*SZVREG
> - vst vr28, sp, 28*SZVREG
> - vst vr29, sp, 29*SZVREG
> - vst vr30, sp, 30*SZVREG
> - vst vr31, sp, 31*SZVREG
> - b .Ltga
> -
> -.Lfloat:
> - /* Save float registers. */
> - ADDI sp, sp, -FRAME_SIZE_FLOAT
> - FREG_S fa0, sp, 0*SZFREG
> - FREG_S fa1, sp, 1*SZFREG
> - FREG_S fa2, sp, 2*SZFREG
> - FREG_S fa3, sp, 3*SZFREG
> - FREG_S fa4, sp, 4*SZFREG
> - FREG_S fa5, sp, 5*SZFREG
> - FREG_S fa6, sp, 6*SZFREG
> - FREG_S fa7, sp, 7*SZFREG
> - FREG_S ft0, sp, 8*SZFREG
> - FREG_S ft1, sp, 9*SZFREG
> - FREG_S ft2, sp, 10*SZFREG
> - FREG_S ft3, sp, 11*SZFREG
> - FREG_S ft4, sp, 12*SZFREG
> - FREG_S ft5, sp, 13*SZFREG
> - FREG_S ft6, sp, 14*SZFREG
> - FREG_S ft7, sp, 15*SZFREG
> - FREG_S ft8, sp, 16*SZFREG
> - FREG_S ft9, sp, 17*SZFREG
> - FREG_S ft10, sp, 18*SZFREG
> - FREG_S ft11, sp, 19*SZFREG
> - FREG_S ft12, sp, 20*SZFREG
> - FREG_S ft13, sp, 21*SZFREG
> - FREG_S ft14, sp, 22*SZFREG
> - FREG_S ft15, sp, 23*SZFREG
> -
> -#endif /* #ifndef __loongarch_soft_float */
> -
> -.Ltga:
> - bl HIDDEN_JUMPTARGET(__tls_get_addr)
> - ADDI a0, a0, -TLS_DTV_OFFSET
> -
> -#ifndef __loongarch_soft_float
> -
> - la.global t0, _rtld_global_ro
> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
> - andi t1, t0, HWCAP_LOONGARCH_LASX
> - beqz t1, .Llsx1
> -
> - /* Restore 256-bit vector registers. */
> - xvld xr0, sp, 0*SZXREG
> - xvld xr1, sp, 1*SZXREG
> - xvld xr2, sp, 2*SZXREG
> - xvld xr3, sp, 3*SZXREG
> - xvld xr4, sp, 4*SZXREG
> - xvld xr5, sp, 5*SZXREG
> - xvld xr6, sp, 6*SZXREG
> - xvld xr7, sp, 7*SZXREG
> - xvld xr8, sp, 8*SZXREG
> - xvld xr9, sp, 9*SZXREG
> - xvld xr10, sp, 10*SZXREG
> - xvld xr11, sp, 11*SZXREG
> - xvld xr12, sp, 12*SZXREG
> - xvld xr13, sp, 13*SZXREG
> - xvld xr14, sp, 14*SZXREG
> - xvld xr15, sp, 15*SZXREG
> - xvld xr16, sp, 16*SZXREG
> - xvld xr17, sp, 17*SZXREG
> - xvld xr18, sp, 18*SZXREG
> - xvld xr19, sp, 19*SZXREG
> - xvld xr20, sp, 20*SZXREG
> - xvld xr21, sp, 21*SZXREG
> - xvld xr22, sp, 22*SZXREG
> - xvld xr23, sp, 23*SZXREG
> - xvld xr24, sp, 24*SZXREG
> - xvld xr25, sp, 25*SZXREG
> - xvld xr26, sp, 26*SZXREG
> - xvld xr27, sp, 27*SZXREG
> - xvld xr28, sp, 28*SZXREG
> - xvld xr29, sp, 29*SZXREG
> - xvld xr30, sp, 30*SZXREG
> - xvld xr31, sp, 31*SZXREG
> - ADDI sp, sp, FRAME_SIZE_LASX
> - b .Lfcsr
> -
> -.Llsx1:
> - andi t1, t0, HWCAP_LOONGARCH_LSX
> - beqz t1, .Lfloat1
> -
> - /* Restore 128-bit vector registers. */
> - vld vr0, sp, 0*SZVREG
> - vld vr1, sp, 1*SZVREG
> - vld vr2, sp, 2*SZVREG
> - vld vr3, sp, 3*SZVREG
> - vld vr4, sp, 4*SZVREG
> - vld vr5, sp, 5*SZVREG
> - vld vr6, sp, 6*SZVREG
> - vld vr7, sp, 7*SZVREG
> - vld vr8, sp, 8*SZVREG
> - vld vr9, sp, 9*SZVREG
> - vld vr10, sp, 10*SZVREG
> - vld vr11, sp, 11*SZVREG
> - vld vr12, sp, 12*SZVREG
> - vld vr13, sp, 13*SZVREG
> - vld vr14, sp, 14*SZVREG
> - vld vr15, sp, 15*SZVREG
> - vld vr16, sp, 16*SZVREG
> - vld vr17, sp, 17*SZVREG
> - vld vr18, sp, 18*SZVREG
> - vld vr19, sp, 19*SZVREG
> - vld vr20, sp, 20*SZVREG
> - vld vr21, sp, 21*SZVREG
> - vld vr22, sp, 22*SZVREG
> - vld vr23, sp, 23*SZVREG
> - vld vr24, sp, 24*SZVREG
> - vld vr25, sp, 25*SZVREG
> - vld vr26, sp, 26*SZVREG
> - vld vr27, sp, 27*SZVREG
> - vld vr28, sp, 28*SZVREG
> - vld vr29, sp, 29*SZVREG
> - vld vr30, sp, 30*SZVREG
> - vld vr31, sp, 31*SZVREG
> - ADDI sp, sp, FRAME_SIZE_LSX
> - b .Lfcsr
> -
> -.Lfloat1:
> - /* Restore float registers. */
> - FREG_L fa0, sp, 0*SZFREG
> - FREG_L fa1, sp, 1*SZFREG
> - FREG_L fa2, sp, 2*SZFREG
> - FREG_L fa3, sp, 3*SZFREG
> - FREG_L fa4, sp, 4*SZFREG
> - FREG_L fa5, sp, 5*SZFREG
> - FREG_L fa6, sp, 6*SZFREG
> - FREG_L fa7, sp, 7*SZFREG
> - FREG_L ft0, sp, 8*SZFREG
> - FREG_L ft1, sp, 9*SZFREG
> - FREG_L ft2, sp, 10*SZFREG
> - FREG_L ft3, sp, 11*SZFREG
> - FREG_L ft4, sp, 12*SZFREG
> - FREG_L ft5, sp, 13*SZFREG
> - FREG_L ft6, sp, 14*SZFREG
> - FREG_L ft7, sp, 15*SZFREG
> - FREG_L ft8, sp, 16*SZFREG
> - FREG_L ft9, sp, 17*SZFREG
> - FREG_L ft10, sp, 18*SZFREG
> - FREG_L ft11, sp, 19*SZFREG
> - FREG_L ft12, sp, 20*SZFREG
> - FREG_L ft13, sp, 21*SZFREG
> - FREG_L ft14, sp, 22*SZFREG
> - FREG_L ft15, sp, 23*SZFREG
> - ADDI sp, sp, FRAME_SIZE_FLOAT
> -
> -.Lfcsr:
> - /* Restore fcsr0 register. */
> - ld.w t0, sp, FRAME_SIZE + 24
> - movgr2fcsr fcsr0, t0
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
>
> #endif /* #ifndef __loongarch_soft_float */
>
> - REG_L ra, sp, 0 * SZREG
> - REG_L a1, sp, 1 * SZREG
> - REG_L a2, sp, 2 * SZREG
> - REG_L a3, sp, 3 * SZREG
> - REG_L a4, sp, 4 * SZREG
> - REG_L a5, sp, 5 * SZREG
> - REG_L a6, sp, 6 * SZREG
> - REG_L a7, sp, 7 * SZREG
> - REG_L t3, sp, 8 * SZREG
> - REG_L t4, sp, 9 * SZREG
> - REG_L t5, sp, 10 * SZREG
> - REG_L t6, sp, 11 * SZREG
> - REG_L t7, sp, 12 * SZREG
> - REG_L t8, sp, 13 * SZREG
> - ADDI sp, sp, FRAME_SIZE
> -
> - b .Lret
> - cfi_endproc
> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
> +#include "dl-tlsdesc-dynamic.h"
>
> #endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> index ff8c69cb93..45c43a5b52 100644
> --- a/sysdeps/loongarch/dl-tlsdesc.h
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>
> #ifdef SHARED
> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#ifndef __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> #endif
>
On 2024-07-01 17:27, mengqinggang wrote:
> Ping.
>
>
> The reason of changing to three _dl_tlsdesc_dynamic:
> In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset for
> Float/LSX/LASX path.
> Three cfi_adjust_cfa_offset are always executed in stack unwinding,
> but only once stack down
> instruction is executed. It resulting in incorrect CFA address.
>
>
> With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset
> can be distributed to three functions.
> So cfi instructions can correspond to stack down instructions.
But in the old version, the code didn't set other cfi_adjust_cfa_offset
when do "stack down instructions", which may cause wrong. I think we can
keep it just one _dl_tlsdesc_dynamic if set enough `cfi_*`. Of course,
spliting it into three funcs is OK because we not need read HWCAP each
time when call _dl_tlsdesc_dynamic.
>
>
>
> 在 2024/6/26 下午2:34, mengqinggang 写道:
>> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
>> Conflicting cfi instructions can be distributed to the
>> three functions.
>> ---
>> Changes v1 -> v2:
>> - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
>>
>> v1 link:
>> https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
>>
>> sysdeps/loongarch/dl-machine.h | 7 +
>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++
>> sysdeps/loongarch/dl-tlsdesc.S | 386 ++---------------------
>> sysdeps/loongarch/dl-tlsdesc.h | 4 +
>> 4 files changed, 436 insertions(+), 364 deletions(-)
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>
>> diff --git a/sysdeps/loongarch/dl-machine.h
>> b/sysdeps/loongarch/dl-machine.h
>> index ab6f1da7c0..04fabbf598 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct
>> r_scope_elem *scope[],
>> {
>> td->arg = _dl_make_tlsdesc_dynamic (sym_map,
>> sym->st_value + reloc->r_addend);
>> +# ifndef __loongarch_soft_float
>> + if (SUPPORT_LASX)
Why "SUPPORT_LASX" rather than "RTLD_SUPPORT_LASX"?
The old version read HWCAP at "GLRO_DL_HWCAP_OFFSET", it means
the HWCAP is at "GLRO_offsetof (dl_hwcap)".
But,
#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap &
HWCAP_LOONGARCH_LASX)
#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>> + else if (SUPPORT_LSX)
>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>> + else
>> +# endif
>> td->entry = _dl_tlsdesc_dynamic;
>> }
>> else
>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> new file mode 100644
>> index 0000000000..5b1f43aaf4
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> @@ -0,0 +1,403 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2024 Free Software Foundation, Inc.
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
>> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
>> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
>> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
>> +
>> + /* Handler for dynamic TLS symbols.
>> + Prototype:
>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> + The second word of the descriptor points to a
>> + tlsdesc_dynamic_arg structure.
>> +
>> + Returns the offset between the thread pointer and the
>> + object referenced by the argument.
>> +
>> + ptrdiff_t
>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> + {
>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer -
>> SIZE_OF_TCB);
>> + if (__glibc_likely (td->gen_count <= dtv[0].counter
>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>> + != TLS_DTV_UNALLOCATED),
>> + 1))
>> + return dtv[td->tlsinfo.ti_module].pointer.val
>> + + td->tlsinfo.ti_offset
>> + - __thread_pointer;
>> +
>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> + } */
>> + .hidden _dl_tlsdesc_dynamic
>> + .global _dl_tlsdesc_dynamic
>> + .type _dl_tlsdesc_dynamic,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_dynamic:
>> + /* Save just enough registers to support fast path, if we fall
>> + into slow path we will save additional registers. */
>> + ADDI sp, sp, -32
>> + cfi_adjust_cfa_offset (32)
>> + REG_S t0, sp, 0
>> + REG_S t1, sp, 8
>> + REG_S t2, sp, 16
>> + cfi_rel_offset (12, 0)
>> + cfi_rel_offset (13, 8)
>> + cfi_rel_offset (14, 16)
>> +
>> +/* Runtime Storage Layout of Thread-Local Storage
>> + TP point to the start of TLS block.
>> +
>> + dtv
>> +Low address TCB ----------------> dtv0(counter)
>> + TP --> static_block0 <----- dtv1
>> + static_block1 <----- dtv2
>> + static_block2 <----- dtv3
>> + dynamic_block0 <----- dtv4
>> +Hign address dynamic_block1 <----- dtv5 */
>> +
>> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
>> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
>> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
>> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
>> + /* If dtv[0].counter < td->gen_count, goto slow path. */
>> + bltu t2, t1, .Lslow
>> +
>> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
>> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
>> + slli.d t1, t1, 4
>> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
>> + REG_L t1, t1, 0 /* t1 =
>> dtv[td->tlsinfo.ti_module].pointer.val */
>> + li.d t2, TLS_DTV_UNALLOCATED
>> + /* If dtv[td->tlsinfo.ti_module].pointer.val is
>> TLS_DTV_UNALLOCATED,
>> + goto slow path. */
>> + beq t1, t2, .Lslow
>> +
>> + cfi_remember_state
>> + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
>> + /* dtv[td->tlsinfo.ti_module].pointer.val +
>> td->tlsinfo.ti_offset */
>> + add.d a0, t1, t2
>> +.Lret:
>> + sub.d a0, a0, tp
>> + REG_L t0, sp, 0
>> + REG_L t1, sp, 8
>> + REG_L t2, sp, 16
>> + ADDI sp, sp, 32
>> + cfi_adjust_cfa_offset (-32)
>> + RET
>> +
>> +.Lslow:
>> + /* This is the slow path. We need to call __tls_get_addr() which
>> + means we need to save and restore all the register that the
>> + callee will trash. */
>> +
>> + /* Save the remaining registers that we must treat as caller
>> save. */
>> + cfi_restore_state
>> + ADDI sp, sp, -FRAME_SIZE
>> + cfi_adjust_cfa_offset (FRAME_SIZE)
>> + REG_S ra, sp, 0 * SZREG
>> + REG_S a1, sp, 1 * SZREG
>> + REG_S a2, sp, 2 * SZREG
>> + REG_S a3, sp, 3 * SZREG
>> + REG_S a4, sp, 4 * SZREG
>> + REG_S a5, sp, 5 * SZREG
>> + REG_S a6, sp, 6 * SZREG
>> + REG_S a7, sp, 7 * SZREG
>> + REG_S t3, sp, 8 * SZREG
>> + REG_S t4, sp, 9 * SZREG
>> + REG_S t5, sp, 10 * SZREG
>> + REG_S t6, sp, 11 * SZREG
>> + REG_S t7, sp, 12 * SZREG
>> + REG_S t8, sp, 13 * SZREG
>> + cfi_rel_offset (1, 0 * SZREG)
>> + cfi_rel_offset (5, 1 * SZREG)
>> + cfi_rel_offset (6, 2 * SZREG)
>> + cfi_rel_offset (7, 3 * SZREG)
>> + cfi_rel_offset (8, 4 * SZREG)
>> + cfi_rel_offset (9, 5 * SZREG)
>> + cfi_rel_offset (10, 6 * SZREG)
>> + cfi_rel_offset (11, 7 * SZREG)
>> + cfi_rel_offset (15, 8 * SZREG)
>> + cfi_rel_offset (16, 9 * SZREG)
>> + cfi_rel_offset (17, 10 * SZREG)
>> + cfi_rel_offset (18, 11 * SZREG)
>> + cfi_rel_offset (19, 12 * SZREG)
>> + cfi_rel_offset (20, 13 * SZREG)
>> +
>> +#ifndef __loongarch_soft_float
>> +
>> + /* Save fcsr0 register.
>> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>> + of some fields in fcsr0. */
>> + movfcsr2gr t0, fcsr0
>> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */
>> +
>> +#ifdef USE_LASX
If we define macros like follows, we can reduce some codes.
// An example.
#if defined(USE_LASX)
#define V_REG_S xvst
#define V_REG_L xvld
#define V_SPACE FRAME_SIZE_LASX
#define V_REG(n) $xr##n
#define V_REGS
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
#define V_REGSZ SZXREG
#elif defined(USE_LSX)
#define V_REG_S vst
#define V_REG_L vld
#define V_SPACE FRAME_SIZE_LSX
#define V_REG(n) $vr##n
#define V_REGS
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
#define V_REGSZ SZVREG
#else
#define V_REG_S fst.d
#define V_REG_L fld.d
#define V_SPACE FRAME_SIZE_FLOAT
#define V_REG(n) $f##n
#define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
#define V_REGSZ SZFREG
#endif
ADDI sp, sp, -V_SPACE
cfi_adjust_cfa_offset (V_SPACE)
.irp i,V_REGS
V_REG_S V_REG(\i), sp, \i * V_REGSZ
.endr
.irp i,V_REGS
V_REG_L V_REG(\i), sp, \i * V_REGSZ
.endr
ADDI sp, sp, V_SPACE
cfi_adjust_cfa_offset (-V_SPACE)
Thanks.
Jinyang
>> +
>> + /* Save 256-bit vector registers.
>> + FIXME: Without vector ABI, save all vector registers. */
>> + ADDI sp, sp, -FRAME_SIZE_LASX
>> + cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
>> + xvst xr0, sp, 0*SZXREG
>> + xvst xr1, sp, 1*SZXREG
>> + xvst xr2, sp, 2*SZXREG
>> + xvst xr3, sp, 3*SZXREG
>> + xvst xr4, sp, 4*SZXREG
>> + xvst xr5, sp, 5*SZXREG
>> + xvst xr6, sp, 6*SZXREG
>> + xvst xr7, sp, 7*SZXREG
>> + xvst xr8, sp, 8*SZXREG
>> + xvst xr9, sp, 9*SZXREG
>> + xvst xr10, sp, 10*SZXREG
>> + xvst xr11, sp, 11*SZXREG
>> + xvst xr12, sp, 12*SZXREG
>> + xvst xr13, sp, 13*SZXREG
>> + xvst xr14, sp, 14*SZXREG
>> + xvst xr15, sp, 15*SZXREG
>> + xvst xr16, sp, 16*SZXREG
>> + xvst xr17, sp, 17*SZXREG
>> + xvst xr18, sp, 18*SZXREG
>> + xvst xr19, sp, 19*SZXREG
>> + xvst xr20, sp, 20*SZXREG
>> + xvst xr21, sp, 21*SZXREG
>> + xvst xr22, sp, 22*SZXREG
>> + xvst xr23, sp, 23*SZXREG
>> + xvst xr24, sp, 24*SZXREG
>> + xvst xr25, sp, 25*SZXREG
>> + xvst xr26, sp, 26*SZXREG
>> + xvst xr27, sp, 27*SZXREG
>> + xvst xr28, sp, 28*SZXREG
>> + xvst xr29, sp, 29*SZXREG
>> + xvst xr30, sp, 30*SZXREG
>> + xvst xr31, sp, 31*SZXREG
>> +
>> +#elif defined USE_LSX
>> +
>> + /* Save 128-bit vector registers. */
>> + ADDI sp, sp, -FRAME_SIZE_LSX
>> + cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
>> + vst vr0, sp, 0*SZVREG
>> + vst vr1, sp, 1*SZVREG
>> + vst vr2, sp, 2*SZVREG
>> + vst vr3, sp, 3*SZVREG
>> + vst vr4, sp, 4*SZVREG
>> + vst vr5, sp, 5*SZVREG
>> + vst vr6, sp, 6*SZVREG
>> + vst vr7, sp, 7*SZVREG
>> + vst vr8, sp, 8*SZVREG
>> + vst vr9, sp, 9*SZVREG
>> + vst vr10, sp, 10*SZVREG
>> + vst vr11, sp, 11*SZVREG
>> + vst vr12, sp, 12*SZVREG
>> + vst vr13, sp, 13*SZVREG
>> + vst vr14, sp, 14*SZVREG
>> + vst vr15, sp, 15*SZVREG
>> + vst vr16, sp, 16*SZVREG
>> + vst vr17, sp, 17*SZVREG
>> + vst vr18, sp, 18*SZVREG
>> + vst vr19, sp, 19*SZVREG
>> + vst vr20, sp, 20*SZVREG
>> + vst vr21, sp, 21*SZVREG
>> + vst vr22, sp, 22*SZVREG
>> + vst vr23, sp, 23*SZVREG
>> + vst vr24, sp, 24*SZVREG
>> + vst vr25, sp, 25*SZVREG
>> + vst vr26, sp, 26*SZVREG
>> + vst vr27, sp, 27*SZVREG
>> + vst vr28, sp, 28*SZVREG
>> + vst vr29, sp, 29*SZVREG
>> + vst vr30, sp, 30*SZVREG
>> + vst vr31, sp, 31*SZVREG
>> +
>> +# else
>> +
>> + /* Save float registers. */
>> + ADDI sp, sp, -FRAME_SIZE_FLOAT
>> + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
>> + FREG_S fa0, sp, 0*SZFREG
>> + FREG_S fa1, sp, 1*SZFREG
>> + FREG_S fa2, sp, 2*SZFREG
>> + FREG_S fa3, sp, 3*SZFREG
>> + FREG_S fa4, sp, 4*SZFREG
>> + FREG_S fa5, sp, 5*SZFREG
>> + FREG_S fa6, sp, 6*SZFREG
>> + FREG_S fa7, sp, 7*SZFREG
>> + FREG_S ft0, sp, 8*SZFREG
>> + FREG_S ft1, sp, 9*SZFREG
>> + FREG_S ft2, sp, 10*SZFREG
>> + FREG_S ft3, sp, 11*SZFREG
>> + FREG_S ft4, sp, 12*SZFREG
>> + FREG_S ft5, sp, 13*SZFREG
>> + FREG_S ft6, sp, 14*SZFREG
>> + FREG_S ft7, sp, 15*SZFREG
>> + FREG_S ft8, sp, 16*SZFREG
>> + FREG_S ft9, sp, 17*SZFREG
>> + FREG_S ft10, sp, 18*SZFREG
>> + FREG_S ft11, sp, 19*SZFREG
>> + FREG_S ft12, sp, 20*SZFREG
>> + FREG_S ft13, sp, 21*SZFREG
>> + FREG_S ft14, sp, 22*SZFREG
>> + FREG_S ft15, sp, 23*SZFREG
>> +
>> +#endif /* #ifdef USE_LASX */
>> +#endif /* #ifndef __loongarch_soft_float */
>> +
>> + bl HIDDEN_JUMPTARGET(__tls_get_addr)
>> + ADDI a0, a0, -TLS_DTV_OFFSET
>> +
>> +#ifndef __loongarch_soft_float
>> +#ifdef USE_LASX
>> +
>> + /* Restore 256-bit vector registers. */
>> + xvld xr0, sp, 0*SZXREG
>> + xvld xr1, sp, 1*SZXREG
>> + xvld xr2, sp, 2*SZXREG
>> + xvld xr3, sp, 3*SZXREG
>> + xvld xr4, sp, 4*SZXREG
>> + xvld xr5, sp, 5*SZXREG
>> + xvld xr6, sp, 6*SZXREG
>> + xvld xr7, sp, 7*SZXREG
>> + xvld xr8, sp, 8*SZXREG
>> + xvld xr9, sp, 9*SZXREG
>> + xvld xr10, sp, 10*SZXREG
>> + xvld xr11, sp, 11*SZXREG
>> + xvld xr12, sp, 12*SZXREG
>> + xvld xr13, sp, 13*SZXREG
>> + xvld xr14, sp, 14*SZXREG
>> + xvld xr15, sp, 15*SZXREG
>> + xvld xr16, sp, 16*SZXREG
>> + xvld xr17, sp, 17*SZXREG
>> + xvld xr18, sp, 18*SZXREG
>> + xvld xr19, sp, 19*SZXREG
>> + xvld xr20, sp, 20*SZXREG
>> + xvld xr21, sp, 21*SZXREG
>> + xvld xr22, sp, 22*SZXREG
>> + xvld xr23, sp, 23*SZXREG
>> + xvld xr24, sp, 24*SZXREG
>> + xvld xr25, sp, 25*SZXREG
>> + xvld xr26, sp, 26*SZXREG
>> + xvld xr27, sp, 27*SZXREG
>> + xvld xr28, sp, 28*SZXREG
>> + xvld xr29, sp, 29*SZXREG
>> + xvld xr30, sp, 30*SZXREG
>> + xvld xr31, sp, 31*SZXREG
>> + ADDI sp, sp, FRAME_SIZE_LASX
>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
>> +
>> +#elif defined USE_LSX
>> +
>> + /* Restore 128-bit vector registers. */
>> + vld vr0, sp, 0*SZVREG
>> + vld vr1, sp, 1*SZVREG
>> + vld vr2, sp, 2*SZVREG
>> + vld vr3, sp, 3*SZVREG
>> + vld vr4, sp, 4*SZVREG
>> + vld vr5, sp, 5*SZVREG
>> + vld vr6, sp, 6*SZVREG
>> + vld vr7, sp, 7*SZVREG
>> + vld vr8, sp, 8*SZVREG
>> + vld vr9, sp, 9*SZVREG
>> + vld vr10, sp, 10*SZVREG
>> + vld vr11, sp, 11*SZVREG
>> + vld vr12, sp, 12*SZVREG
>> + vld vr13, sp, 13*SZVREG
>> + vld vr14, sp, 14*SZVREG
>> + vld vr15, sp, 15*SZVREG
>> + vld vr16, sp, 16*SZVREG
>> + vld vr17, sp, 17*SZVREG
>> + vld vr18, sp, 18*SZVREG
>> + vld vr19, sp, 19*SZVREG
>> + vld vr20, sp, 20*SZVREG
>> + vld vr21, sp, 21*SZVREG
>> + vld vr22, sp, 22*SZVREG
>> + vld vr23, sp, 23*SZVREG
>> + vld vr24, sp, 24*SZVREG
>> + vld vr25, sp, 25*SZVREG
>> + vld vr26, sp, 26*SZVREG
>> + vld vr27, sp, 27*SZVREG
>> + vld vr28, sp, 28*SZVREG
>> + vld vr29, sp, 29*SZVREG
>> + vld vr30, sp, 30*SZVREG
>> + vld vr31, sp, 31*SZVREG
>> + ADDI sp, sp, FRAME_SIZE_LSX
>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
>> +
>> +#else
>> +
>> + /* Restore float registers. */
>> + FREG_L fa0, sp, 0*SZFREG
>> + FREG_L fa1, sp, 1*SZFREG
>> + FREG_L fa2, sp, 2*SZFREG
>> + FREG_L fa3, sp, 3*SZFREG
>> + FREG_L fa4, sp, 4*SZFREG
>> + FREG_L fa5, sp, 5*SZFREG
>> + FREG_L fa6, sp, 6*SZFREG
>> + FREG_L fa7, sp, 7*SZFREG
>> + FREG_L ft0, sp, 8*SZFREG
>> + FREG_L ft1, sp, 9*SZFREG
>> + FREG_L ft2, sp, 10*SZFREG
>> + FREG_L ft3, sp, 11*SZFREG
>> + FREG_L ft4, sp, 12*SZFREG
>> + FREG_L ft5, sp, 13*SZFREG
>> + FREG_L ft6, sp, 14*SZFREG
>> + FREG_L ft7, sp, 15*SZFREG
>> + FREG_L ft8, sp, 16*SZFREG
>> + FREG_L ft9, sp, 17*SZFREG
>> + FREG_L ft10, sp, 18*SZFREG
>> + FREG_L ft11, sp, 19*SZFREG
>> + FREG_L ft12, sp, 20*SZFREG
>> + FREG_L ft13, sp, 21*SZFREG
>> + FREG_L ft14, sp, 22*SZFREG
>> + FREG_L ft15, sp, 23*SZFREG
>> + ADDI sp, sp, FRAME_SIZE_FLOAT
>> + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
>> +
>> +#endif /* #ifdef USE_LASX */
>> +
>> + /* Restore fcsr0 register. */
>> + ld.w t0, sp, FRAME_SIZE + 24
>> + movgr2fcsr fcsr0, t0
>> +
>> +#endif /* #ifndef __loongarch_soft_float */
>> +
>> + REG_L ra, sp, 0 * SZREG
>> + REG_L a1, sp, 1 * SZREG
>> + REG_L a2, sp, 2 * SZREG
>> + REG_L a3, sp, 3 * SZREG
>> + REG_L a4, sp, 4 * SZREG
>> + REG_L a5, sp, 5 * SZREG
>> + REG_L a6, sp, 6 * SZREG
>> + REG_L a7, sp, 7 * SZREG
>> + REG_L t3, sp, 8 * SZREG
>> + REG_L t4, sp, 9 * SZREG
>> + REG_L t5, sp, 10 * SZREG
>> + REG_L t6, sp, 11 * SZREG
>> + REG_L t7, sp, 12 * SZREG
>> + REG_L t8, sp, 13 * SZREG
>> + ADDI sp, sp, FRAME_SIZE
>> + cfi_adjust_cfa_offset (-FRAME_SIZE)
>> +
>> + b .Lret
>> + cfi_endproc
>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S
>> b/sysdeps/loongarch/dl-tlsdesc.S
>> index a6627cc754..b6cfd6121d 100644
>> --- a/sysdeps/loongarch/dl-tlsdesc.S
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
>> cfi_endproc
>> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> -
>> #ifdef SHARED
>> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
>> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
>> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
>> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
>> -
>> - /* Handler for dynamic TLS symbols.
>> - Prototype:
>> - _dl_tlsdesc_dynamic (tlsdesc *) ;
>> -
>> - The second word of the descriptor points to a
>> - tlsdesc_dynamic_arg structure.
>> -
>> - Returns the offset between the thread pointer and the
>> - object referenced by the argument.
>> -
>> - ptrdiff_t
>> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> - {
>> - struct tlsdesc_dynamic_arg *td = tdp->arg;
>> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer -
>> SIZE_OF_TCB);
>> - if (__glibc_likely (td->gen_count <= dtv[0].counter
>> - && (dtv[td->tlsinfo.ti_module].pointer.val
>> - != TLS_DTV_UNALLOCATED),
>> - 1))
>> - return dtv[td->tlsinfo.ti_module].pointer.val
>> - + td->tlsinfo.ti_offset
>> - - __thread_pointer;
>> -
>> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> - } */
>> - .hidden _dl_tlsdesc_dynamic
>> - .global _dl_tlsdesc_dynamic
>> - .type _dl_tlsdesc_dynamic,%function
>> - cfi_startproc
>> - .align 2
>> -_dl_tlsdesc_dynamic:
>> - /* Save just enough registers to support fast path, if we fall
>> - into slow path we will save additional registers. */
>> - ADDI sp, sp, -32
>> - REG_S t0, sp, 0
>> - REG_S t1, sp, 8
>> - REG_S t2, sp, 16
>> -
>> -/* Runtime Storage Layout of Thread-Local Storage
>> - TP point to the start of TLS block.
>> -
>> - dtv
>> -Low address TCB ----------------> dtv0(counter)
>> - TP --> static_block0 <----- dtv1
>> - static_block1 <----- dtv2
>> - static_block2 <----- dtv3
>> - dynamic_block0 <----- dtv4
>> -Hign address dynamic_block1 <----- dtv5 */
>> -
>> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
>> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
>> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
>> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
>> - /* If dtv[0].counter < td->gen_count, goto slow path. */
>> - bltu t2, t1, .Lslow
>> -
>> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
>> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
>> - slli.d t1, t1, 4
>> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
>> - REG_L t1, t1, 0 /* t1 =
>> dtv[td->tlsinfo.ti_module].pointer.val */
>> - li.d t2, TLS_DTV_UNALLOCATED
>> - /* If dtv[td->tlsinfo.ti_module].pointer.val is
>> TLS_DTV_UNALLOCATED,
>> - goto slow path. */
>> - beq t1, t2, .Lslow
>> -
>> - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
>> - /* dtv[td->tlsinfo.ti_module].pointer.val +
>> td->tlsinfo.ti_offset */
>> - add.d a0, t1, t2
>> -.Lret:
>> - sub.d a0, a0, tp
>> - REG_L t0, sp, 0
>> - REG_L t1, sp, 8
>> - REG_L t2, sp, 16
>> - ADDI sp, sp, 32
>> - RET
>> -
>> -.Lslow:
>> - /* This is the slow path. We need to call __tls_get_addr() which
>> - means we need to save and restore all the register that the
>> - callee will trash. */
>> -
>> - /* Save the remaining registers that we must treat as caller
>> save. */
>> - ADDI sp, sp, -FRAME_SIZE
>> - REG_S ra, sp, 0 * SZREG
>> - REG_S a1, sp, 1 * SZREG
>> - REG_S a2, sp, 2 * SZREG
>> - REG_S a3, sp, 3 * SZREG
>> - REG_S a4, sp, 4 * SZREG
>> - REG_S a5, sp, 5 * SZREG
>> - REG_S a6, sp, 6 * SZREG
>> - REG_S a7, sp, 7 * SZREG
>> - REG_S t3, sp, 8 * SZREG
>> - REG_S t4, sp, 9 * SZREG
>> - REG_S t5, sp, 10 * SZREG
>> - REG_S t6, sp, 11 * SZREG
>> - REG_S t7, sp, 12 * SZREG
>> - REG_S t8, sp, 13 * SZREG
>> -
>> #ifndef __loongarch_soft_float
>> - /* Save fcsr0 register.
>> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>> - of some fields in fcsr0. */
>> - movfcsr2gr t0, fcsr0
>> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
>> -
>> - /* Whether support LASX. */
>> - la.global t0, _rtld_global_ro
>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
>> - andi t1, t0, HWCAP_LOONGARCH_LASX
>> - beqz t1, .Llsx
>> -
>> - /* Save 256-bit vector registers.
>> - FIXME: Without vector ABI, save all vector registers. */
>> - ADDI sp, sp, -FRAME_SIZE_LASX
>> - xvst xr0, sp, 0*SZXREG
>> - xvst xr1, sp, 1*SZXREG
>> - xvst xr2, sp, 2*SZXREG
>> - xvst xr3, sp, 3*SZXREG
>> - xvst xr4, sp, 4*SZXREG
>> - xvst xr5, sp, 5*SZXREG
>> - xvst xr6, sp, 6*SZXREG
>> - xvst xr7, sp, 7*SZXREG
>> - xvst xr8, sp, 8*SZXREG
>> - xvst xr9, sp, 9*SZXREG
>> - xvst xr10, sp, 10*SZXREG
>> - xvst xr11, sp, 11*SZXREG
>> - xvst xr12, sp, 12*SZXREG
>> - xvst xr13, sp, 13*SZXREG
>> - xvst xr14, sp, 14*SZXREG
>> - xvst xr15, sp, 15*SZXREG
>> - xvst xr16, sp, 16*SZXREG
>> - xvst xr17, sp, 17*SZXREG
>> - xvst xr18, sp, 18*SZXREG
>> - xvst xr19, sp, 19*SZXREG
>> - xvst xr20, sp, 20*SZXREG
>> - xvst xr21, sp, 21*SZXREG
>> - xvst xr22, sp, 22*SZXREG
>> - xvst xr23, sp, 23*SZXREG
>> - xvst xr24, sp, 24*SZXREG
>> - xvst xr25, sp, 25*SZXREG
>> - xvst xr26, sp, 26*SZXREG
>> - xvst xr27, sp, 27*SZXREG
>> - xvst xr28, sp, 28*SZXREG
>> - xvst xr29, sp, 29*SZXREG
>> - xvst xr30, sp, 30*SZXREG
>> - xvst xr31, sp, 31*SZXREG
>> - b .Ltga
>> -
>> -.Llsx:
>> - /* Whether support LSX. */
>> - andi t1, t0, HWCAP_LOONGARCH_LSX
>> - beqz t1, .Lfloat
>> -
>> - /* Save 128-bit vector registers. */
>> - ADDI sp, sp, -FRAME_SIZE_LSX
>> - vst vr0, sp, 0*SZVREG
>> - vst vr1, sp, 1*SZVREG
>> - vst vr2, sp, 2*SZVREG
>> - vst vr3, sp, 3*SZVREG
>> - vst vr4, sp, 4*SZVREG
>> - vst vr5, sp, 5*SZVREG
>> - vst vr6, sp, 6*SZVREG
>> - vst vr7, sp, 7*SZVREG
>> - vst vr8, sp, 8*SZVREG
>> - vst vr9, sp, 9*SZVREG
>> - vst vr10, sp, 10*SZVREG
>> - vst vr11, sp, 11*SZVREG
>> - vst vr12, sp, 12*SZVREG
>> - vst vr13, sp, 13*SZVREG
>> - vst vr14, sp, 14*SZVREG
>> - vst vr15, sp, 15*SZVREG
>> - vst vr16, sp, 16*SZVREG
>> - vst vr17, sp, 17*SZVREG
>> - vst vr18, sp, 18*SZVREG
>> - vst vr19, sp, 19*SZVREG
>> - vst vr20, sp, 20*SZVREG
>> - vst vr21, sp, 21*SZVREG
>> - vst vr22, sp, 22*SZVREG
>> - vst vr23, sp, 23*SZVREG
>> - vst vr24, sp, 24*SZVREG
>> - vst vr25, sp, 25*SZVREG
>> - vst vr26, sp, 26*SZVREG
>> - vst vr27, sp, 27*SZVREG
>> - vst vr28, sp, 28*SZVREG
>> - vst vr29, sp, 29*SZVREG
>> - vst vr30, sp, 30*SZVREG
>> - vst vr31, sp, 31*SZVREG
>> - b .Ltga
>> -
>> -.Lfloat:
>> - /* Save float registers. */
>> - ADDI sp, sp, -FRAME_SIZE_FLOAT
>> - FREG_S fa0, sp, 0*SZFREG
>> - FREG_S fa1, sp, 1*SZFREG
>> - FREG_S fa2, sp, 2*SZFREG
>> - FREG_S fa3, sp, 3*SZFREG
>> - FREG_S fa4, sp, 4*SZFREG
>> - FREG_S fa5, sp, 5*SZFREG
>> - FREG_S fa6, sp, 6*SZFREG
>> - FREG_S fa7, sp, 7*SZFREG
>> - FREG_S ft0, sp, 8*SZFREG
>> - FREG_S ft1, sp, 9*SZFREG
>> - FREG_S ft2, sp, 10*SZFREG
>> - FREG_S ft3, sp, 11*SZFREG
>> - FREG_S ft4, sp, 12*SZFREG
>> - FREG_S ft5, sp, 13*SZFREG
>> - FREG_S ft6, sp, 14*SZFREG
>> - FREG_S ft7, sp, 15*SZFREG
>> - FREG_S ft8, sp, 16*SZFREG
>> - FREG_S ft9, sp, 17*SZFREG
>> - FREG_S ft10, sp, 18*SZFREG
>> - FREG_S ft11, sp, 19*SZFREG
>> - FREG_S ft12, sp, 20*SZFREG
>> - FREG_S ft13, sp, 21*SZFREG
>> - FREG_S ft14, sp, 22*SZFREG
>> - FREG_S ft15, sp, 23*SZFREG
>> -
>> -#endif /* #ifndef __loongarch_soft_float */
>> -
>> -.Ltga:
>> - bl HIDDEN_JUMPTARGET(__tls_get_addr)
>> - ADDI a0, a0, -TLS_DTV_OFFSET
>> -
>> -#ifndef __loongarch_soft_float
>> -
>> - la.global t0, _rtld_global_ro
>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
>> - andi t1, t0, HWCAP_LOONGARCH_LASX
>> - beqz t1, .Llsx1
>> -
>> - /* Restore 256-bit vector registers. */
>> - xvld xr0, sp, 0*SZXREG
>> - xvld xr1, sp, 1*SZXREG
>> - xvld xr2, sp, 2*SZXREG
>> - xvld xr3, sp, 3*SZXREG
>> - xvld xr4, sp, 4*SZXREG
>> - xvld xr5, sp, 5*SZXREG
>> - xvld xr6, sp, 6*SZXREG
>> - xvld xr7, sp, 7*SZXREG
>> - xvld xr8, sp, 8*SZXREG
>> - xvld xr9, sp, 9*SZXREG
>> - xvld xr10, sp, 10*SZXREG
>> - xvld xr11, sp, 11*SZXREG
>> - xvld xr12, sp, 12*SZXREG
>> - xvld xr13, sp, 13*SZXREG
>> - xvld xr14, sp, 14*SZXREG
>> - xvld xr15, sp, 15*SZXREG
>> - xvld xr16, sp, 16*SZXREG
>> - xvld xr17, sp, 17*SZXREG
>> - xvld xr18, sp, 18*SZXREG
>> - xvld xr19, sp, 19*SZXREG
>> - xvld xr20, sp, 20*SZXREG
>> - xvld xr21, sp, 21*SZXREG
>> - xvld xr22, sp, 22*SZXREG
>> - xvld xr23, sp, 23*SZXREG
>> - xvld xr24, sp, 24*SZXREG
>> - xvld xr25, sp, 25*SZXREG
>> - xvld xr26, sp, 26*SZXREG
>> - xvld xr27, sp, 27*SZXREG
>> - xvld xr28, sp, 28*SZXREG
>> - xvld xr29, sp, 29*SZXREG
>> - xvld xr30, sp, 30*SZXREG
>> - xvld xr31, sp, 31*SZXREG
>> - ADDI sp, sp, FRAME_SIZE_LASX
>> - b .Lfcsr
>> -
>> -.Llsx1:
>> - andi t1, t0, HWCAP_LOONGARCH_LSX
>> - beqz t1, .Lfloat1
>> -
>> - /* Restore 128-bit vector registers. */
>> - vld vr0, sp, 0*SZVREG
>> - vld vr1, sp, 1*SZVREG
>> - vld vr2, sp, 2*SZVREG
>> - vld vr3, sp, 3*SZVREG
>> - vld vr4, sp, 4*SZVREG
>> - vld vr5, sp, 5*SZVREG
>> - vld vr6, sp, 6*SZVREG
>> - vld vr7, sp, 7*SZVREG
>> - vld vr8, sp, 8*SZVREG
>> - vld vr9, sp, 9*SZVREG
>> - vld vr10, sp, 10*SZVREG
>> - vld vr11, sp, 11*SZVREG
>> - vld vr12, sp, 12*SZVREG
>> - vld vr13, sp, 13*SZVREG
>> - vld vr14, sp, 14*SZVREG
>> - vld vr15, sp, 15*SZVREG
>> - vld vr16, sp, 16*SZVREG
>> - vld vr17, sp, 17*SZVREG
>> - vld vr18, sp, 18*SZVREG
>> - vld vr19, sp, 19*SZVREG
>> - vld vr20, sp, 20*SZVREG
>> - vld vr21, sp, 21*SZVREG
>> - vld vr22, sp, 22*SZVREG
>> - vld vr23, sp, 23*SZVREG
>> - vld vr24, sp, 24*SZVREG
>> - vld vr25, sp, 25*SZVREG
>> - vld vr26, sp, 26*SZVREG
>> - vld vr27, sp, 27*SZVREG
>> - vld vr28, sp, 28*SZVREG
>> - vld vr29, sp, 29*SZVREG
>> - vld vr30, sp, 30*SZVREG
>> - vld vr31, sp, 31*SZVREG
>> - ADDI sp, sp, FRAME_SIZE_LSX
>> - b .Lfcsr
>> -
>> -.Lfloat1:
>> - /* Restore float registers. */
>> - FREG_L fa0, sp, 0*SZFREG
>> - FREG_L fa1, sp, 1*SZFREG
>> - FREG_L fa2, sp, 2*SZFREG
>> - FREG_L fa3, sp, 3*SZFREG
>> - FREG_L fa4, sp, 4*SZFREG
>> - FREG_L fa5, sp, 5*SZFREG
>> - FREG_L fa6, sp, 6*SZFREG
>> - FREG_L fa7, sp, 7*SZFREG
>> - FREG_L ft0, sp, 8*SZFREG
>> - FREG_L ft1, sp, 9*SZFREG
>> - FREG_L ft2, sp, 10*SZFREG
>> - FREG_L ft3, sp, 11*SZFREG
>> - FREG_L ft4, sp, 12*SZFREG
>> - FREG_L ft5, sp, 13*SZFREG
>> - FREG_L ft6, sp, 14*SZFREG
>> - FREG_L ft7, sp, 15*SZFREG
>> - FREG_L ft8, sp, 16*SZFREG
>> - FREG_L ft9, sp, 17*SZFREG
>> - FREG_L ft10, sp, 18*SZFREG
>> - FREG_L ft11, sp, 19*SZFREG
>> - FREG_L ft12, sp, 20*SZFREG
>> - FREG_L ft13, sp, 21*SZFREG
>> - FREG_L ft14, sp, 22*SZFREG
>> - FREG_L ft15, sp, 23*SZFREG
>> - ADDI sp, sp, FRAME_SIZE_FLOAT
>> -
>> -.Lfcsr:
>> - /* Restore fcsr0 register. */
>> - ld.w t0, sp, FRAME_SIZE + 24
>> - movgr2fcsr fcsr0, t0
>> +#define USE_LASX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>> +#define Lret Lret_lasx
>> +#define Lslow Lslow_lasx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LASX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#define USE_LSX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>> +#define Lret Lret_lsx
>> +#define Lslow Lslow_lsx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LSX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> #endif /* #ifndef __loongarch_soft_float */
>> - REG_L ra, sp, 0 * SZREG
>> - REG_L a1, sp, 1 * SZREG
>> - REG_L a2, sp, 2 * SZREG
>> - REG_L a3, sp, 3 * SZREG
>> - REG_L a4, sp, 4 * SZREG
>> - REG_L a5, sp, 5 * SZREG
>> - REG_L a6, sp, 6 * SZREG
>> - REG_L a7, sp, 7 * SZREG
>> - REG_L t3, sp, 8 * SZREG
>> - REG_L t4, sp, 9 * SZREG
>> - REG_L t5, sp, 10 * SZREG
>> - REG_L t6, sp, 11 * SZREG
>> - REG_L t7, sp, 12 * SZREG
>> - REG_L t8, sp, 13 * SZREG
>> - ADDI sp, sp, FRAME_SIZE
>> -
>> - b .Lret
>> - cfi_endproc
>> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
>> +#include "dl-tlsdesc-dynamic.h"
>> #endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h
>> b/sysdeps/loongarch/dl-tlsdesc.h
>> index ff8c69cb93..45c43a5b52 100644
>> --- a/sysdeps/loongarch/dl-tlsdesc.h
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden
>> _dl_tlsdesc_undefweak (struct tlsdesc *);
>> #ifdef SHARED
>> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +#ifndef __loongarch_soft_float
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct
>> tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct
>> tlsdesc *);
>> +#endif
>> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct
>> tlsdesc *);
>> #endif
在 2024/7/2 下午6:44, Jinyang He 写道:
>
> On 2024-07-01 17:27, mengqinggang wrote:
>> Ping.
>>
>>
>> The reason of changing to three _dl_tlsdesc_dynamic:
>> In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset
>> for Float/LSX/LASX path.
>> Three cfi_adjust_cfa_offset are always executed in stack unwinding,
>> but only once stack down
>> instruction is executed. It resulting in incorrect CFA address.
>>
>>
>> With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset
>> can be distributed to three functions.
>> So cfi instructions can correspond to stack down instructions.
> But in the old version, the code didn't set other cfi_adjust_cfa_offset
> when do "stack down instructions", which may cause wrong. I think we can
> keep it just one _dl_tlsdesc_dynamic if set enough `cfi_*`. Of course,
I don't know how to write cfi instructions if only one _dl_tlsdesc_dynamic.
> spliting it into three funcs is OK because we not need read HWCAP each
> time when call _dl_tlsdesc_dynamic.
>>
>>
>>
>> 在 2024/6/26 下午2:34, mengqinggang 写道:
>>> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
>>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
>>> Conflicting cfi instructions can be distributed to the
>>> three functions.
>>> ---
>>> Changes v1 -> v2:
>>> - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
>>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
>>>
>>> v1 link:
>>> https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html
>>>
>>> sysdeps/loongarch/dl-machine.h | 7 +
>>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403
>>> +++++++++++++++++++++++++
>>> sysdeps/loongarch/dl-tlsdesc.S | 386 ++---------------------
>>> sysdeps/loongarch/dl-tlsdesc.h | 4 +
>>> 4 files changed, 436 insertions(+), 364 deletions(-)
>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>
>>> diff --git a/sysdeps/loongarch/dl-machine.h
>>> b/sysdeps/loongarch/dl-machine.h
>>> index ab6f1da7c0..04fabbf598 100644
>>> --- a/sysdeps/loongarch/dl-machine.h
>>> +++ b/sysdeps/loongarch/dl-machine.h
>>> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct
>>> r_scope_elem *scope[],
>>> {
>>> td->arg = _dl_make_tlsdesc_dynamic (sym_map,
>>> sym->st_value + reloc->r_addend);
>>> +# ifndef __loongarch_soft_float
>>> + if (SUPPORT_LASX)
> Why "SUPPORT_LASX" rather than "RTLD_SUPPORT_LASX"?
> The old version read HWCAP at "GLRO_DL_HWCAP_OFFSET", it means
> the HWCAP is at "GLRO_offsetof (dl_hwcap)".
> But,
> #define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap &
> HWCAP_LOONGARCH_LASX)
> #define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
>
>>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>>> + else if (SUPPORT_LSX)
>>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>>> + else
>>> +# endif
>>> td->entry = _dl_tlsdesc_dynamic;
>>> }
>>> else
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> new file mode 100644
>>> index 0000000000..5b1f43aaf4
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> @@ -0,0 +1,403 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> + LoongArch version.
>>> + Copyright (C) 2024 Free Software Foundation, Inc.
>>> +
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be
>>> useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
>>> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
>>> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
>>> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
>>> +
>>> + /* Handler for dynamic TLS symbols.
>>> + Prototype:
>>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>>> +
>>> + The second word of the descriptor points to a
>>> + tlsdesc_dynamic_arg structure.
>>> +
>>> + Returns the offset between the thread pointer and the
>>> + object referenced by the argument.
>>> +
>>> + ptrdiff_t
>>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>> + {
>>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer -
>>> SIZE_OF_TCB);
>>> + if (__glibc_likely (td->gen_count <= dtv[0].counter
>>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>>> + != TLS_DTV_UNALLOCATED),
>>> + 1))
>>> + return dtv[td->tlsinfo.ti_module].pointer.val
>>> + + td->tlsinfo.ti_offset
>>> + - __thread_pointer;
>>> +
>>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>> + } */
>>> + .hidden _dl_tlsdesc_dynamic
>>> + .global _dl_tlsdesc_dynamic
>>> + .type _dl_tlsdesc_dynamic,%function
>>> + cfi_startproc
>>> + .align 2
>>> +_dl_tlsdesc_dynamic:
>>> + /* Save just enough registers to support fast path, if we fall
>>> + into slow path we will save additional registers. */
>>> + ADDI sp, sp, -32
>>> + cfi_adjust_cfa_offset (32)
>>> + REG_S t0, sp, 0
>>> + REG_S t1, sp, 8
>>> + REG_S t2, sp, 16
>>> + cfi_rel_offset (12, 0)
>>> + cfi_rel_offset (13, 8)
>>> + cfi_rel_offset (14, 16)
>>> +
>>> +/* Runtime Storage Layout of Thread-Local Storage
>>> + TP point to the start of TLS block.
>>> +
>>> + dtv
>>> +Low address TCB ----------------> dtv0(counter)
>>> + TP --> static_block0 <----- dtv1
>>> + static_block1 <----- dtv2
>>> + static_block2 <----- dtv3
>>> + dynamic_block0 <----- dtv4
>>> +Hign address dynamic_block1 <----- dtv5 */
>>> +
>>> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
>>> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
>>> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
>>> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
>>> + /* If dtv[0].counter < td->gen_count, goto slow path. */
>>> + bltu t2, t1, .Lslow
>>> +
>>> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
>>> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
>>> + slli.d t1, t1, 4
>>> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
>>> + REG_L t1, t1, 0 /* t1 =
>>> dtv[td->tlsinfo.ti_module].pointer.val */
>>> + li.d t2, TLS_DTV_UNALLOCATED
>>> + /* If dtv[td->tlsinfo.ti_module].pointer.val is
>>> TLS_DTV_UNALLOCATED,
>>> + goto slow path. */
>>> + beq t1, t2, .Lslow
>>> +
>>> + cfi_remember_state
>>> + REG_L t2, a0, TLSDESC_MODOFF /* t2 =
>>> td->tlsinfo.ti_offset */
>>> + /* dtv[td->tlsinfo.ti_module].pointer.val +
>>> td->tlsinfo.ti_offset */
>>> + add.d a0, t1, t2
>>> +.Lret:
>>> + sub.d a0, a0, tp
>>> + REG_L t0, sp, 0
>>> + REG_L t1, sp, 8
>>> + REG_L t2, sp, 16
>>> + ADDI sp, sp, 32
>>> + cfi_adjust_cfa_offset (-32)
>>> + RET
>>> +
>>> +.Lslow:
>>> + /* This is the slow path. We need to call __tls_get_addr() which
>>> + means we need to save and restore all the register that the
>>> + callee will trash. */
>>> +
>>> + /* Save the remaining registers that we must treat as caller
>>> save. */
>>> + cfi_restore_state
>>> + ADDI sp, sp, -FRAME_SIZE
>>> + cfi_adjust_cfa_offset (FRAME_SIZE)
>>> + REG_S ra, sp, 0 * SZREG
>>> + REG_S a1, sp, 1 * SZREG
>>> + REG_S a2, sp, 2 * SZREG
>>> + REG_S a3, sp, 3 * SZREG
>>> + REG_S a4, sp, 4 * SZREG
>>> + REG_S a5, sp, 5 * SZREG
>>> + REG_S a6, sp, 6 * SZREG
>>> + REG_S a7, sp, 7 * SZREG
>>> + REG_S t3, sp, 8 * SZREG
>>> + REG_S t4, sp, 9 * SZREG
>>> + REG_S t5, sp, 10 * SZREG
>>> + REG_S t6, sp, 11 * SZREG
>>> + REG_S t7, sp, 12 * SZREG
>>> + REG_S t8, sp, 13 * SZREG
>>> + cfi_rel_offset (1, 0 * SZREG)
>>> + cfi_rel_offset (5, 1 * SZREG)
>>> + cfi_rel_offset (6, 2 * SZREG)
>>> + cfi_rel_offset (7, 3 * SZREG)
>>> + cfi_rel_offset (8, 4 * SZREG)
>>> + cfi_rel_offset (9, 5 * SZREG)
>>> + cfi_rel_offset (10, 6 * SZREG)
>>> + cfi_rel_offset (11, 7 * SZREG)
>>> + cfi_rel_offset (15, 8 * SZREG)
>>> + cfi_rel_offset (16, 9 * SZREG)
>>> + cfi_rel_offset (17, 10 * SZREG)
>>> + cfi_rel_offset (18, 11 * SZREG)
>>> + cfi_rel_offset (19, 12 * SZREG)
>>> + cfi_rel_offset (20, 13 * SZREG)
>>> +
>>> +#ifndef __loongarch_soft_float
>>> +
>>> + /* Save fcsr0 register.
>>> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>>> + of some fields in fcsr0. */
>>> + movfcsr2gr t0, fcsr0
>>> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above
>>> t2. */
>>> +
>>> +#ifdef USE_LASX
> If we define macros like follows, we can reduce some codes.
>
> // An example.
> #if defined(USE_LASX)
> #define V_REG_S xvst
> #define V_REG_L xvld
> #define V_SPACE FRAME_SIZE_LASX
> #define V_REG(n) $xr##n
> #define V_REGS
> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
> #define V_REGSZ SZXREG
> #elif defined(USE_LSX)
> #define V_REG_S vst
> #define V_REG_L vld
> #define V_SPACE FRAME_SIZE_LSX
> #define V_REG(n) $vr##n
> #define V_REGS
> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
> #define V_REGSZ SZVREG
> #else
> #define V_REG_S fst.d
> #define V_REG_L fld.d
> #define V_SPACE FRAME_SIZE_FLOAT
> #define V_REG(n) $f##n
> #define V_REGS
> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
> #define V_REGSZ SZFREG
> #endif
>
> ADDI sp, sp, -V_SPACE
>
> cfi_adjust_cfa_offset (V_SPACE)
> .irp i,V_REGS
> V_REG_S V_REG(\i), sp, \i * V_REGSZ
> .endr
>
> .irp i,V_REGS
> V_REG_L V_REG(\i), sp, \i * V_REGSZ
> .endr
> ADDI sp, sp, V_SPACE
> cfi_adjust_cfa_offset (-V_SPACE)
>
> Thanks.
>
> Jinyang
>
>>> +
>>> + /* Save 256-bit vector registers.
>>> + FIXME: Without vector ABI, save all vector registers. */
>>> + ADDI sp, sp, -FRAME_SIZE_LASX
>>> + cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
>>> + xvst xr0, sp, 0*SZXREG
>>> + xvst xr1, sp, 1*SZXREG
>>> + xvst xr2, sp, 2*SZXREG
>>> + xvst xr3, sp, 3*SZXREG
>>> + xvst xr4, sp, 4*SZXREG
>>> + xvst xr5, sp, 5*SZXREG
>>> + xvst xr6, sp, 6*SZXREG
>>> + xvst xr7, sp, 7*SZXREG
>>> + xvst xr8, sp, 8*SZXREG
>>> + xvst xr9, sp, 9*SZXREG
>>> + xvst xr10, sp, 10*SZXREG
>>> + xvst xr11, sp, 11*SZXREG
>>> + xvst xr12, sp, 12*SZXREG
>>> + xvst xr13, sp, 13*SZXREG
>>> + xvst xr14, sp, 14*SZXREG
>>> + xvst xr15, sp, 15*SZXREG
>>> + xvst xr16, sp, 16*SZXREG
>>> + xvst xr17, sp, 17*SZXREG
>>> + xvst xr18, sp, 18*SZXREG
>>> + xvst xr19, sp, 19*SZXREG
>>> + xvst xr20, sp, 20*SZXREG
>>> + xvst xr21, sp, 21*SZXREG
>>> + xvst xr22, sp, 22*SZXREG
>>> + xvst xr23, sp, 23*SZXREG
>>> + xvst xr24, sp, 24*SZXREG
>>> + xvst xr25, sp, 25*SZXREG
>>> + xvst xr26, sp, 26*SZXREG
>>> + xvst xr27, sp, 27*SZXREG
>>> + xvst xr28, sp, 28*SZXREG
>>> + xvst xr29, sp, 29*SZXREG
>>> + xvst xr30, sp, 30*SZXREG
>>> + xvst xr31, sp, 31*SZXREG
>>> +
>>> +#elif defined USE_LSX
>>> +
>>> + /* Save 128-bit vector registers. */
>>> + ADDI sp, sp, -FRAME_SIZE_LSX
>>> + cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
>>> + vst vr0, sp, 0*SZVREG
>>> + vst vr1, sp, 1*SZVREG
>>> + vst vr2, sp, 2*SZVREG
>>> + vst vr3, sp, 3*SZVREG
>>> + vst vr4, sp, 4*SZVREG
>>> + vst vr5, sp, 5*SZVREG
>>> + vst vr6, sp, 6*SZVREG
>>> + vst vr7, sp, 7*SZVREG
>>> + vst vr8, sp, 8*SZVREG
>>> + vst vr9, sp, 9*SZVREG
>>> + vst vr10, sp, 10*SZVREG
>>> + vst vr11, sp, 11*SZVREG
>>> + vst vr12, sp, 12*SZVREG
>>> + vst vr13, sp, 13*SZVREG
>>> + vst vr14, sp, 14*SZVREG
>>> + vst vr15, sp, 15*SZVREG
>>> + vst vr16, sp, 16*SZVREG
>>> + vst vr17, sp, 17*SZVREG
>>> + vst vr18, sp, 18*SZVREG
>>> + vst vr19, sp, 19*SZVREG
>>> + vst vr20, sp, 20*SZVREG
>>> + vst vr21, sp, 21*SZVREG
>>> + vst vr22, sp, 22*SZVREG
>>> + vst vr23, sp, 23*SZVREG
>>> + vst vr24, sp, 24*SZVREG
>>> + vst vr25, sp, 25*SZVREG
>>> + vst vr26, sp, 26*SZVREG
>>> + vst vr27, sp, 27*SZVREG
>>> + vst vr28, sp, 28*SZVREG
>>> + vst vr29, sp, 29*SZVREG
>>> + vst vr30, sp, 30*SZVREG
>>> + vst vr31, sp, 31*SZVREG
>>> +
>>> +# else
>>> +
>>> + /* Save float registers. */
>>> + ADDI sp, sp, -FRAME_SIZE_FLOAT
>>> + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
>>> + FREG_S fa0, sp, 0*SZFREG
>>> + FREG_S fa1, sp, 1*SZFREG
>>> + FREG_S fa2, sp, 2*SZFREG
>>> + FREG_S fa3, sp, 3*SZFREG
>>> + FREG_S fa4, sp, 4*SZFREG
>>> + FREG_S fa5, sp, 5*SZFREG
>>> + FREG_S fa6, sp, 6*SZFREG
>>> + FREG_S fa7, sp, 7*SZFREG
>>> + FREG_S ft0, sp, 8*SZFREG
>>> + FREG_S ft1, sp, 9*SZFREG
>>> + FREG_S ft2, sp, 10*SZFREG
>>> + FREG_S ft3, sp, 11*SZFREG
>>> + FREG_S ft4, sp, 12*SZFREG
>>> + FREG_S ft5, sp, 13*SZFREG
>>> + FREG_S ft6, sp, 14*SZFREG
>>> + FREG_S ft7, sp, 15*SZFREG
>>> + FREG_S ft8, sp, 16*SZFREG
>>> + FREG_S ft9, sp, 17*SZFREG
>>> + FREG_S ft10, sp, 18*SZFREG
>>> + FREG_S ft11, sp, 19*SZFREG
>>> + FREG_S ft12, sp, 20*SZFREG
>>> + FREG_S ft13, sp, 21*SZFREG
>>> + FREG_S ft14, sp, 22*SZFREG
>>> + FREG_S ft15, sp, 23*SZFREG
>>> +
>>> +#endif /* #ifdef USE_LASX */
>>> +#endif /* #ifndef __loongarch_soft_float */
>>> +
>>> + bl HIDDEN_JUMPTARGET(__tls_get_addr)
>>> + ADDI a0, a0, -TLS_DTV_OFFSET
>>> +
>>> +#ifndef __loongarch_soft_float
>>> +#ifdef USE_LASX
>>> +
>>> + /* Restore 256-bit vector registers. */
>>> + xvld xr0, sp, 0*SZXREG
>>> + xvld xr1, sp, 1*SZXREG
>>> + xvld xr2, sp, 2*SZXREG
>>> + xvld xr3, sp, 3*SZXREG
>>> + xvld xr4, sp, 4*SZXREG
>>> + xvld xr5, sp, 5*SZXREG
>>> + xvld xr6, sp, 6*SZXREG
>>> + xvld xr7, sp, 7*SZXREG
>>> + xvld xr8, sp, 8*SZXREG
>>> + xvld xr9, sp, 9*SZXREG
>>> + xvld xr10, sp, 10*SZXREG
>>> + xvld xr11, sp, 11*SZXREG
>>> + xvld xr12, sp, 12*SZXREG
>>> + xvld xr13, sp, 13*SZXREG
>>> + xvld xr14, sp, 14*SZXREG
>>> + xvld xr15, sp, 15*SZXREG
>>> + xvld xr16, sp, 16*SZXREG
>>> + xvld xr17, sp, 17*SZXREG
>>> + xvld xr18, sp, 18*SZXREG
>>> + xvld xr19, sp, 19*SZXREG
>>> + xvld xr20, sp, 20*SZXREG
>>> + xvld xr21, sp, 21*SZXREG
>>> + xvld xr22, sp, 22*SZXREG
>>> + xvld xr23, sp, 23*SZXREG
>>> + xvld xr24, sp, 24*SZXREG
>>> + xvld xr25, sp, 25*SZXREG
>>> + xvld xr26, sp, 26*SZXREG
>>> + xvld xr27, sp, 27*SZXREG
>>> + xvld xr28, sp, 28*SZXREG
>>> + xvld xr29, sp, 29*SZXREG
>>> + xvld xr30, sp, 30*SZXREG
>>> + xvld xr31, sp, 31*SZXREG
>>> + ADDI sp, sp, FRAME_SIZE_LASX
>>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
>>> +
>>> +#elif defined USE_LSX
>>> +
>>> + /* Restore 128-bit vector registers. */
>>> + vld vr0, sp, 0*SZVREG
>>> + vld vr1, sp, 1*SZVREG
>>> + vld vr2, sp, 2*SZVREG
>>> + vld vr3, sp, 3*SZVREG
>>> + vld vr4, sp, 4*SZVREG
>>> + vld vr5, sp, 5*SZVREG
>>> + vld vr6, sp, 6*SZVREG
>>> + vld vr7, sp, 7*SZVREG
>>> + vld vr8, sp, 8*SZVREG
>>> + vld vr9, sp, 9*SZVREG
>>> + vld vr10, sp, 10*SZVREG
>>> + vld vr11, sp, 11*SZVREG
>>> + vld vr12, sp, 12*SZVREG
>>> + vld vr13, sp, 13*SZVREG
>>> + vld vr14, sp, 14*SZVREG
>>> + vld vr15, sp, 15*SZVREG
>>> + vld vr16, sp, 16*SZVREG
>>> + vld vr17, sp, 17*SZVREG
>>> + vld vr18, sp, 18*SZVREG
>>> + vld vr19, sp, 19*SZVREG
>>> + vld vr20, sp, 20*SZVREG
>>> + vld vr21, sp, 21*SZVREG
>>> + vld vr22, sp, 22*SZVREG
>>> + vld vr23, sp, 23*SZVREG
>>> + vld vr24, sp, 24*SZVREG
>>> + vld vr25, sp, 25*SZVREG
>>> + vld vr26, sp, 26*SZVREG
>>> + vld vr27, sp, 27*SZVREG
>>> + vld vr28, sp, 28*SZVREG
>>> + vld vr29, sp, 29*SZVREG
>>> + vld vr30, sp, 30*SZVREG
>>> + vld vr31, sp, 31*SZVREG
>>> + ADDI sp, sp, FRAME_SIZE_LSX
>>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
>>> +
>>> +#else
>>> +
>>> + /* Restore float registers. */
>>> + FREG_L fa0, sp, 0*SZFREG
>>> + FREG_L fa1, sp, 1*SZFREG
>>> + FREG_L fa2, sp, 2*SZFREG
>>> + FREG_L fa3, sp, 3*SZFREG
>>> + FREG_L fa4, sp, 4*SZFREG
>>> + FREG_L fa5, sp, 5*SZFREG
>>> + FREG_L fa6, sp, 6*SZFREG
>>> + FREG_L fa7, sp, 7*SZFREG
>>> + FREG_L ft0, sp, 8*SZFREG
>>> + FREG_L ft1, sp, 9*SZFREG
>>> + FREG_L ft2, sp, 10*SZFREG
>>> + FREG_L ft3, sp, 11*SZFREG
>>> + FREG_L ft4, sp, 12*SZFREG
>>> + FREG_L ft5, sp, 13*SZFREG
>>> + FREG_L ft6, sp, 14*SZFREG
>>> + FREG_L ft7, sp, 15*SZFREG
>>> + FREG_L ft8, sp, 16*SZFREG
>>> + FREG_L ft9, sp, 17*SZFREG
>>> + FREG_L ft10, sp, 18*SZFREG
>>> + FREG_L ft11, sp, 19*SZFREG
>>> + FREG_L ft12, sp, 20*SZFREG
>>> + FREG_L ft13, sp, 21*SZFREG
>>> + FREG_L ft14, sp, 22*SZFREG
>>> + FREG_L ft15, sp, 23*SZFREG
>>> + ADDI sp, sp, FRAME_SIZE_FLOAT
>>> + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
>>> +
>>> +#endif /* #ifdef USE_LASX */
>>> +
>>> + /* Restore fcsr0 register. */
>>> + ld.w t0, sp, FRAME_SIZE + 24
>>> + movgr2fcsr fcsr0, t0
>>> +
>>> +#endif /* #ifndef __loongarch_soft_float */
>>> +
>>> + REG_L ra, sp, 0 * SZREG
>>> + REG_L a1, sp, 1 * SZREG
>>> + REG_L a2, sp, 2 * SZREG
>>> + REG_L a3, sp, 3 * SZREG
>>> + REG_L a4, sp, 4 * SZREG
>>> + REG_L a5, sp, 5 * SZREG
>>> + REG_L a6, sp, 6 * SZREG
>>> + REG_L a7, sp, 7 * SZREG
>>> + REG_L t3, sp, 8 * SZREG
>>> + REG_L t4, sp, 9 * SZREG
>>> + REG_L t5, sp, 10 * SZREG
>>> + REG_L t6, sp, 11 * SZREG
>>> + REG_L t7, sp, 12 * SZREG
>>> + REG_L t8, sp, 13 * SZREG
>>> + ADDI sp, sp, FRAME_SIZE
>>> + cfi_adjust_cfa_offset (-FRAME_SIZE)
>>> +
>>> + b .Lret
>>> + cfi_endproc
>>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S
>>> b/sysdeps/loongarch/dl-tlsdesc.S
>>> index a6627cc754..b6cfd6121d 100644
>>> --- a/sysdeps/loongarch/dl-tlsdesc.S
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
>>> cfi_endproc
>>> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>> -
>>> #ifdef SHARED
>>> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
>>> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
>>> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
>>> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
>>> -
>>> - /* Handler for dynamic TLS symbols.
>>> - Prototype:
>>> - _dl_tlsdesc_dynamic (tlsdesc *) ;
>>> -
>>> - The second word of the descriptor points to a
>>> - tlsdesc_dynamic_arg structure.
>>> -
>>> - Returns the offset between the thread pointer and the
>>> - object referenced by the argument.
>>> -
>>> - ptrdiff_t
>>> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>> - {
>>> - struct tlsdesc_dynamic_arg *td = tdp->arg;
>>> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer -
>>> SIZE_OF_TCB);
>>> - if (__glibc_likely (td->gen_count <= dtv[0].counter
>>> - && (dtv[td->tlsinfo.ti_module].pointer.val
>>> - != TLS_DTV_UNALLOCATED),
>>> - 1))
>>> - return dtv[td->tlsinfo.ti_module].pointer.val
>>> - + td->tlsinfo.ti_offset
>>> - - __thread_pointer;
>>> -
>>> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>> - } */
>>> - .hidden _dl_tlsdesc_dynamic
>>> - .global _dl_tlsdesc_dynamic
>>> - .type _dl_tlsdesc_dynamic,%function
>>> - cfi_startproc
>>> - .align 2
>>> -_dl_tlsdesc_dynamic:
>>> - /* Save just enough registers to support fast path, if we fall
>>> - into slow path we will save additional registers. */
>>> - ADDI sp, sp, -32
>>> - REG_S t0, sp, 0
>>> - REG_S t1, sp, 8
>>> - REG_S t2, sp, 16
>>> -
>>> -/* Runtime Storage Layout of Thread-Local Storage
>>> - TP point to the start of TLS block.
>>> -
>>> - dtv
>>> -Low address TCB ----------------> dtv0(counter)
>>> - TP --> static_block0 <----- dtv1
>>> - static_block1 <----- dtv2
>>> - static_block2 <----- dtv3
>>> - dynamic_block0 <----- dtv4
>>> -Hign address dynamic_block1 <----- dtv5 */
>>> -
>>> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
>>> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
>>> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
>>> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
>>> - /* If dtv[0].counter < td->gen_count, goto slow path. */
>>> - bltu t2, t1, .Lslow
>>> -
>>> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
>>> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
>>> - slli.d t1, t1, 4
>>> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
>>> - REG_L t1, t1, 0 /* t1 =
>>> dtv[td->tlsinfo.ti_module].pointer.val */
>>> - li.d t2, TLS_DTV_UNALLOCATED
>>> - /* If dtv[td->tlsinfo.ti_module].pointer.val is
>>> TLS_DTV_UNALLOCATED,
>>> - goto slow path. */
>>> - beq t1, t2, .Lslow
>>> -
>>> - REG_L t2, a0, TLSDESC_MODOFF /* t2 =
>>> td->tlsinfo.ti_offset */
>>> - /* dtv[td->tlsinfo.ti_module].pointer.val +
>>> td->tlsinfo.ti_offset */
>>> - add.d a0, t1, t2
>>> -.Lret:
>>> - sub.d a0, a0, tp
>>> - REG_L t0, sp, 0
>>> - REG_L t1, sp, 8
>>> - REG_L t2, sp, 16
>>> - ADDI sp, sp, 32
>>> - RET
>>> -
>>> -.Lslow:
>>> - /* This is the slow path. We need to call __tls_get_addr() which
>>> - means we need to save and restore all the register that the
>>> - callee will trash. */
>>> -
>>> - /* Save the remaining registers that we must treat as caller
>>> save. */
>>> - ADDI sp, sp, -FRAME_SIZE
>>> - REG_S ra, sp, 0 * SZREG
>>> - REG_S a1, sp, 1 * SZREG
>>> - REG_S a2, sp, 2 * SZREG
>>> - REG_S a3, sp, 3 * SZREG
>>> - REG_S a4, sp, 4 * SZREG
>>> - REG_S a5, sp, 5 * SZREG
>>> - REG_S a6, sp, 6 * SZREG
>>> - REG_S a7, sp, 7 * SZREG
>>> - REG_S t3, sp, 8 * SZREG
>>> - REG_S t4, sp, 9 * SZREG
>>> - REG_S t5, sp, 10 * SZREG
>>> - REG_S t6, sp, 11 * SZREG
>>> - REG_S t7, sp, 12 * SZREG
>>> - REG_S t8, sp, 13 * SZREG
>>> -
>>> #ifndef __loongarch_soft_float
>>> - /* Save fcsr0 register.
>>> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>>> - of some fields in fcsr0. */
>>> - movfcsr2gr t0, fcsr0
>>> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
>>> -
>>> - /* Whether support LASX. */
>>> - la.global t0, _rtld_global_ro
>>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
>>> - andi t1, t0, HWCAP_LOONGARCH_LASX
>>> - beqz t1, .Llsx
>>> -
>>> - /* Save 256-bit vector registers.
>>> - FIXME: Without vector ABI, save all vector registers. */
>>> - ADDI sp, sp, -FRAME_SIZE_LASX
>>> - xvst xr0, sp, 0*SZXREG
>>> - xvst xr1, sp, 1*SZXREG
>>> - xvst xr2, sp, 2*SZXREG
>>> - xvst xr3, sp, 3*SZXREG
>>> - xvst xr4, sp, 4*SZXREG
>>> - xvst xr5, sp, 5*SZXREG
>>> - xvst xr6, sp, 6*SZXREG
>>> - xvst xr7, sp, 7*SZXREG
>>> - xvst xr8, sp, 8*SZXREG
>>> - xvst xr9, sp, 9*SZXREG
>>> - xvst xr10, sp, 10*SZXREG
>>> - xvst xr11, sp, 11*SZXREG
>>> - xvst xr12, sp, 12*SZXREG
>>> - xvst xr13, sp, 13*SZXREG
>>> - xvst xr14, sp, 14*SZXREG
>>> - xvst xr15, sp, 15*SZXREG
>>> - xvst xr16, sp, 16*SZXREG
>>> - xvst xr17, sp, 17*SZXREG
>>> - xvst xr18, sp, 18*SZXREG
>>> - xvst xr19, sp, 19*SZXREG
>>> - xvst xr20, sp, 20*SZXREG
>>> - xvst xr21, sp, 21*SZXREG
>>> - xvst xr22, sp, 22*SZXREG
>>> - xvst xr23, sp, 23*SZXREG
>>> - xvst xr24, sp, 24*SZXREG
>>> - xvst xr25, sp, 25*SZXREG
>>> - xvst xr26, sp, 26*SZXREG
>>> - xvst xr27, sp, 27*SZXREG
>>> - xvst xr28, sp, 28*SZXREG
>>> - xvst xr29, sp, 29*SZXREG
>>> - xvst xr30, sp, 30*SZXREG
>>> - xvst xr31, sp, 31*SZXREG
>>> - b .Ltga
>>> -
>>> -.Llsx:
>>> - /* Whether support LSX. */
>>> - andi t1, t0, HWCAP_LOONGARCH_LSX
>>> - beqz t1, .Lfloat
>>> -
>>> - /* Save 128-bit vector registers. */
>>> - ADDI sp, sp, -FRAME_SIZE_LSX
>>> - vst vr0, sp, 0*SZVREG
>>> - vst vr1, sp, 1*SZVREG
>>> - vst vr2, sp, 2*SZVREG
>>> - vst vr3, sp, 3*SZVREG
>>> - vst vr4, sp, 4*SZVREG
>>> - vst vr5, sp, 5*SZVREG
>>> - vst vr6, sp, 6*SZVREG
>>> - vst vr7, sp, 7*SZVREG
>>> - vst vr8, sp, 8*SZVREG
>>> - vst vr9, sp, 9*SZVREG
>>> - vst vr10, sp, 10*SZVREG
>>> - vst vr11, sp, 11*SZVREG
>>> - vst vr12, sp, 12*SZVREG
>>> - vst vr13, sp, 13*SZVREG
>>> - vst vr14, sp, 14*SZVREG
>>> - vst vr15, sp, 15*SZVREG
>>> - vst vr16, sp, 16*SZVREG
>>> - vst vr17, sp, 17*SZVREG
>>> - vst vr18, sp, 18*SZVREG
>>> - vst vr19, sp, 19*SZVREG
>>> - vst vr20, sp, 20*SZVREG
>>> - vst vr21, sp, 21*SZVREG
>>> - vst vr22, sp, 22*SZVREG
>>> - vst vr23, sp, 23*SZVREG
>>> - vst vr24, sp, 24*SZVREG
>>> - vst vr25, sp, 25*SZVREG
>>> - vst vr26, sp, 26*SZVREG
>>> - vst vr27, sp, 27*SZVREG
>>> - vst vr28, sp, 28*SZVREG
>>> - vst vr29, sp, 29*SZVREG
>>> - vst vr30, sp, 30*SZVREG
>>> - vst vr31, sp, 31*SZVREG
>>> - b .Ltga
>>> -
>>> -.Lfloat:
>>> - /* Save float registers. */
>>> - ADDI sp, sp, -FRAME_SIZE_FLOAT
>>> - FREG_S fa0, sp, 0*SZFREG
>>> - FREG_S fa1, sp, 1*SZFREG
>>> - FREG_S fa2, sp, 2*SZFREG
>>> - FREG_S fa3, sp, 3*SZFREG
>>> - FREG_S fa4, sp, 4*SZFREG
>>> - FREG_S fa5, sp, 5*SZFREG
>>> - FREG_S fa6, sp, 6*SZFREG
>>> - FREG_S fa7, sp, 7*SZFREG
>>> - FREG_S ft0, sp, 8*SZFREG
>>> - FREG_S ft1, sp, 9*SZFREG
>>> - FREG_S ft2, sp, 10*SZFREG
>>> - FREG_S ft3, sp, 11*SZFREG
>>> - FREG_S ft4, sp, 12*SZFREG
>>> - FREG_S ft5, sp, 13*SZFREG
>>> - FREG_S ft6, sp, 14*SZFREG
>>> - FREG_S ft7, sp, 15*SZFREG
>>> - FREG_S ft8, sp, 16*SZFREG
>>> - FREG_S ft9, sp, 17*SZFREG
>>> - FREG_S ft10, sp, 18*SZFREG
>>> - FREG_S ft11, sp, 19*SZFREG
>>> - FREG_S ft12, sp, 20*SZFREG
>>> - FREG_S ft13, sp, 21*SZFREG
>>> - FREG_S ft14, sp, 22*SZFREG
>>> - FREG_S ft15, sp, 23*SZFREG
>>> -
>>> -#endif /* #ifndef __loongarch_soft_float */
>>> -
>>> -.Ltga:
>>> - bl HIDDEN_JUMPTARGET(__tls_get_addr)
>>> - ADDI a0, a0, -TLS_DTV_OFFSET
>>> -
>>> -#ifndef __loongarch_soft_float
>>> -
>>> - la.global t0, _rtld_global_ro
>>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
>>> - andi t1, t0, HWCAP_LOONGARCH_LASX
>>> - beqz t1, .Llsx1
>>> -
>>> - /* Restore 256-bit vector registers. */
>>> - xvld xr0, sp, 0*SZXREG
>>> - xvld xr1, sp, 1*SZXREG
>>> - xvld xr2, sp, 2*SZXREG
>>> - xvld xr3, sp, 3*SZXREG
>>> - xvld xr4, sp, 4*SZXREG
>>> - xvld xr5, sp, 5*SZXREG
>>> - xvld xr6, sp, 6*SZXREG
>>> - xvld xr7, sp, 7*SZXREG
>>> - xvld xr8, sp, 8*SZXREG
>>> - xvld xr9, sp, 9*SZXREG
>>> - xvld xr10, sp, 10*SZXREG
>>> - xvld xr11, sp, 11*SZXREG
>>> - xvld xr12, sp, 12*SZXREG
>>> - xvld xr13, sp, 13*SZXREG
>>> - xvld xr14, sp, 14*SZXREG
>>> - xvld xr15, sp, 15*SZXREG
>>> - xvld xr16, sp, 16*SZXREG
>>> - xvld xr17, sp, 17*SZXREG
>>> - xvld xr18, sp, 18*SZXREG
>>> - xvld xr19, sp, 19*SZXREG
>>> - xvld xr20, sp, 20*SZXREG
>>> - xvld xr21, sp, 21*SZXREG
>>> - xvld xr22, sp, 22*SZXREG
>>> - xvld xr23, sp, 23*SZXREG
>>> - xvld xr24, sp, 24*SZXREG
>>> - xvld xr25, sp, 25*SZXREG
>>> - xvld xr26, sp, 26*SZXREG
>>> - xvld xr27, sp, 27*SZXREG
>>> - xvld xr28, sp, 28*SZXREG
>>> - xvld xr29, sp, 29*SZXREG
>>> - xvld xr30, sp, 30*SZXREG
>>> - xvld xr31, sp, 31*SZXREG
>>> - ADDI sp, sp, FRAME_SIZE_LASX
>>> - b .Lfcsr
>>> -
>>> -.Llsx1:
>>> - andi t1, t0, HWCAP_LOONGARCH_LSX
>>> - beqz t1, .Lfloat1
>>> -
>>> - /* Restore 128-bit vector registers. */
>>> - vld vr0, sp, 0*SZVREG
>>> - vld vr1, sp, 1*SZVREG
>>> - vld vr2, sp, 2*SZVREG
>>> - vld vr3, sp, 3*SZVREG
>>> - vld vr4, sp, 4*SZVREG
>>> - vld vr5, sp, 5*SZVREG
>>> - vld vr6, sp, 6*SZVREG
>>> - vld vr7, sp, 7*SZVREG
>>> - vld vr8, sp, 8*SZVREG
>>> - vld vr9, sp, 9*SZVREG
>>> - vld vr10, sp, 10*SZVREG
>>> - vld vr11, sp, 11*SZVREG
>>> - vld vr12, sp, 12*SZVREG
>>> - vld vr13, sp, 13*SZVREG
>>> - vld vr14, sp, 14*SZVREG
>>> - vld vr15, sp, 15*SZVREG
>>> - vld vr16, sp, 16*SZVREG
>>> - vld vr17, sp, 17*SZVREG
>>> - vld vr18, sp, 18*SZVREG
>>> - vld vr19, sp, 19*SZVREG
>>> - vld vr20, sp, 20*SZVREG
>>> - vld vr21, sp, 21*SZVREG
>>> - vld vr22, sp, 22*SZVREG
>>> - vld vr23, sp, 23*SZVREG
>>> - vld vr24, sp, 24*SZVREG
>>> - vld vr25, sp, 25*SZVREG
>>> - vld vr26, sp, 26*SZVREG
>>> - vld vr27, sp, 27*SZVREG
>>> - vld vr28, sp, 28*SZVREG
>>> - vld vr29, sp, 29*SZVREG
>>> - vld vr30, sp, 30*SZVREG
>>> - vld vr31, sp, 31*SZVREG
>>> - ADDI sp, sp, FRAME_SIZE_LSX
>>> - b .Lfcsr
>>> -
>>> -.Lfloat1:
>>> - /* Restore float registers. */
>>> - FREG_L fa0, sp, 0*SZFREG
>>> - FREG_L fa1, sp, 1*SZFREG
>>> - FREG_L fa2, sp, 2*SZFREG
>>> - FREG_L fa3, sp, 3*SZFREG
>>> - FREG_L fa4, sp, 4*SZFREG
>>> - FREG_L fa5, sp, 5*SZFREG
>>> - FREG_L fa6, sp, 6*SZFREG
>>> - FREG_L fa7, sp, 7*SZFREG
>>> - FREG_L ft0, sp, 8*SZFREG
>>> - FREG_L ft1, sp, 9*SZFREG
>>> - FREG_L ft2, sp, 10*SZFREG
>>> - FREG_L ft3, sp, 11*SZFREG
>>> - FREG_L ft4, sp, 12*SZFREG
>>> - FREG_L ft5, sp, 13*SZFREG
>>> - FREG_L ft6, sp, 14*SZFREG
>>> - FREG_L ft7, sp, 15*SZFREG
>>> - FREG_L ft8, sp, 16*SZFREG
>>> - FREG_L ft9, sp, 17*SZFREG
>>> - FREG_L ft10, sp, 18*SZFREG
>>> - FREG_L ft11, sp, 19*SZFREG
>>> - FREG_L ft12, sp, 20*SZFREG
>>> - FREG_L ft13, sp, 21*SZFREG
>>> - FREG_L ft14, sp, 22*SZFREG
>>> - FREG_L ft15, sp, 23*SZFREG
>>> - ADDI sp, sp, FRAME_SIZE_FLOAT
>>> -
>>> -.Lfcsr:
>>> - /* Restore fcsr0 register. */
>>> - ld.w t0, sp, FRAME_SIZE + 24
>>> - movgr2fcsr fcsr0, t0
>>> +#define USE_LASX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>> +#define Lret Lret_lasx
>>> +#define Lslow Lslow_lasx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LASX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#define USE_LSX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>> +#define Lret Lret_lsx
>>> +#define Lslow Lslow_lsx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LSX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> #endif /* #ifndef __loongarch_soft_float */
>>> - REG_L ra, sp, 0 * SZREG
>>> - REG_L a1, sp, 1 * SZREG
>>> - REG_L a2, sp, 2 * SZREG
>>> - REG_L a3, sp, 3 * SZREG
>>> - REG_L a4, sp, 4 * SZREG
>>> - REG_L a5, sp, 5 * SZREG
>>> - REG_L a6, sp, 6 * SZREG
>>> - REG_L a7, sp, 7 * SZREG
>>> - REG_L t3, sp, 8 * SZREG
>>> - REG_L t4, sp, 9 * SZREG
>>> - REG_L t5, sp, 10 * SZREG
>>> - REG_L t6, sp, 11 * SZREG
>>> - REG_L t7, sp, 12 * SZREG
>>> - REG_L t8, sp, 13 * SZREG
>>> - ADDI sp, sp, FRAME_SIZE
>>> -
>>> - b .Lret
>>> - cfi_endproc
>>> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
>>> +#include "dl-tlsdesc-dynamic.h"
>>> #endif /* #ifdef SHARED */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h
>>> b/sysdeps/loongarch/dl-tlsdesc.h
>>> index ff8c69cb93..45c43a5b52 100644
>>> --- a/sysdeps/loongarch/dl-tlsdesc.h
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden
>>> _dl_tlsdesc_undefweak (struct tlsdesc *);
>>> #ifdef SHARED
>>> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>> +#ifndef __loongarch_soft_float
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct
>>> tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct
>>> tlsdesc *);
>>> +#endif
>>> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct
>>> tlsdesc *);
>>> #endif
@@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
{
td->arg = _dl_make_tlsdesc_dynamic (sym_map,
sym->st_value + reloc->r_addend);
+# ifndef __loongarch_soft_float
+ if (SUPPORT_LASX)
+ td->entry = _dl_tlsdesc_dynamic_lasx;
+ else if (SUPPORT_LSX)
+ td->entry = _dl_tlsdesc_dynamic_lsx;
+ else
+# endif
td->entry = _dl_tlsdesc_dynamic;
}
else
new file mode 100644
@@ -0,0 +1,403 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
+#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
+#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
+#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
+
+ /* Handler for dynamic TLS symbols.
+ Prototype:
+ _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+ The second word of the descriptor points to a
+ tlsdesc_dynamic_arg structure.
+
+ Returns the offset between the thread pointer and the
+ object referenced by the argument.
+
+ ptrdiff_t
+ _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+ {
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
+ if (__glibc_likely (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val
+ + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+ } */
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_dynamic:
+ /* Save just enough registers to support fast path, if we fall
+ into slow path we will save additional registers. */
+ ADDI sp, sp, -32
+ cfi_adjust_cfa_offset (32)
+ REG_S t0, sp, 0
+ REG_S t1, sp, 8
+ REG_S t2, sp, 16
+ cfi_rel_offset (12, 0)
+ cfi_rel_offset (13, 8)
+ cfi_rel_offset (14, 16)
+
+/* Runtime Storage Layout of Thread-Local Storage
+ TP point to the start of TLS block.
+
+ dtv
+Low address TCB ----------------> dtv0(counter)
+ TP --> static_block0 <----- dtv1
+ static_block1 <----- dtv2
+ static_block2 <----- dtv3
+ dynamic_block0 <----- dtv4
+Hign address dynamic_block1 <----- dtv5 */
+
+ REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
+ REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
+ REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
+ REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
+ /* If dtv[0].counter < td->gen_count, goto slow path. */
+ bltu t2, t1, .Lslow
+
+ REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
+ /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
+ slli.d t1, t1, 4
+ add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
+ REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
+ li.d t2, TLS_DTV_UNALLOCATED
+ /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
+ goto slow path. */
+ beq t1, t2, .Lslow
+
+ cfi_remember_state
+ REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
+ /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
+ add.d a0, t1, t2
+.Lret:
+ sub.d a0, a0, tp
+ REG_L t0, sp, 0
+ REG_L t1, sp, 8
+ REG_L t2, sp, 16
+ ADDI sp, sp, 32
+ cfi_adjust_cfa_offset (-32)
+ RET
+
+.Lslow:
+ /* This is the slow path. We need to call __tls_get_addr() which
+ means we need to save and restore all the register that the
+ callee will trash. */
+
+ /* Save the remaining registers that we must treat as caller save. */
+ cfi_restore_state
+ ADDI sp, sp, -FRAME_SIZE
+ cfi_adjust_cfa_offset (FRAME_SIZE)
+ REG_S ra, sp, 0 * SZREG
+ REG_S a1, sp, 1 * SZREG
+ REG_S a2, sp, 2 * SZREG
+ REG_S a3, sp, 3 * SZREG
+ REG_S a4, sp, 4 * SZREG
+ REG_S a5, sp, 5 * SZREG
+ REG_S a6, sp, 6 * SZREG
+ REG_S a7, sp, 7 * SZREG
+ REG_S t3, sp, 8 * SZREG
+ REG_S t4, sp, 9 * SZREG
+ REG_S t5, sp, 10 * SZREG
+ REG_S t6, sp, 11 * SZREG
+ REG_S t7, sp, 12 * SZREG
+ REG_S t8, sp, 13 * SZREG
+ cfi_rel_offset (1, 0 * SZREG)
+ cfi_rel_offset (5, 1 * SZREG)
+ cfi_rel_offset (6, 2 * SZREG)
+ cfi_rel_offset (7, 3 * SZREG)
+ cfi_rel_offset (8, 4 * SZREG)
+ cfi_rel_offset (9, 5 * SZREG)
+ cfi_rel_offset (10, 6 * SZREG)
+ cfi_rel_offset (11, 7 * SZREG)
+ cfi_rel_offset (15, 8 * SZREG)
+ cfi_rel_offset (16, 9 * SZREG)
+ cfi_rel_offset (17, 10 * SZREG)
+ cfi_rel_offset (18, 11 * SZREG)
+ cfi_rel_offset (19, 12 * SZREG)
+ cfi_rel_offset (20, 13 * SZREG)
+
+#ifndef __loongarch_soft_float
+
+ /* Save fcsr0 register.
+ Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
+ of some fields in fcsr0. */
+ movfcsr2gr t0, fcsr0
+ st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */
+
+#ifdef USE_LASX
+
+ /* Save 256-bit vector registers.
+ FIXME: Without vector ABI, save all vector registers. */
+ ADDI sp, sp, -FRAME_SIZE_LASX
+ cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
+ xvst xr0, sp, 0*SZXREG
+ xvst xr1, sp, 1*SZXREG
+ xvst xr2, sp, 2*SZXREG
+ xvst xr3, sp, 3*SZXREG
+ xvst xr4, sp, 4*SZXREG
+ xvst xr5, sp, 5*SZXREG
+ xvst xr6, sp, 6*SZXREG
+ xvst xr7, sp, 7*SZXREG
+ xvst xr8, sp, 8*SZXREG
+ xvst xr9, sp, 9*SZXREG
+ xvst xr10, sp, 10*SZXREG
+ xvst xr11, sp, 11*SZXREG
+ xvst xr12, sp, 12*SZXREG
+ xvst xr13, sp, 13*SZXREG
+ xvst xr14, sp, 14*SZXREG
+ xvst xr15, sp, 15*SZXREG
+ xvst xr16, sp, 16*SZXREG
+ xvst xr17, sp, 17*SZXREG
+ xvst xr18, sp, 18*SZXREG
+ xvst xr19, sp, 19*SZXREG
+ xvst xr20, sp, 20*SZXREG
+ xvst xr21, sp, 21*SZXREG
+ xvst xr22, sp, 22*SZXREG
+ xvst xr23, sp, 23*SZXREG
+ xvst xr24, sp, 24*SZXREG
+ xvst xr25, sp, 25*SZXREG
+ xvst xr26, sp, 26*SZXREG
+ xvst xr27, sp, 27*SZXREG
+ xvst xr28, sp, 28*SZXREG
+ xvst xr29, sp, 29*SZXREG
+ xvst xr30, sp, 30*SZXREG
+ xvst xr31, sp, 31*SZXREG
+
+#elif defined USE_LSX
+
+ /* Save 128-bit vector registers. */
+ ADDI sp, sp, -FRAME_SIZE_LSX
+ cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
+ vst vr0, sp, 0*SZVREG
+ vst vr1, sp, 1*SZVREG
+ vst vr2, sp, 2*SZVREG
+ vst vr3, sp, 3*SZVREG
+ vst vr4, sp, 4*SZVREG
+ vst vr5, sp, 5*SZVREG
+ vst vr6, sp, 6*SZVREG
+ vst vr7, sp, 7*SZVREG
+ vst vr8, sp, 8*SZVREG
+ vst vr9, sp, 9*SZVREG
+ vst vr10, sp, 10*SZVREG
+ vst vr11, sp, 11*SZVREG
+ vst vr12, sp, 12*SZVREG
+ vst vr13, sp, 13*SZVREG
+ vst vr14, sp, 14*SZVREG
+ vst vr15, sp, 15*SZVREG
+ vst vr16, sp, 16*SZVREG
+ vst vr17, sp, 17*SZVREG
+ vst vr18, sp, 18*SZVREG
+ vst vr19, sp, 19*SZVREG
+ vst vr20, sp, 20*SZVREG
+ vst vr21, sp, 21*SZVREG
+ vst vr22, sp, 22*SZVREG
+ vst vr23, sp, 23*SZVREG
+ vst vr24, sp, 24*SZVREG
+ vst vr25, sp, 25*SZVREG
+ vst vr26, sp, 26*SZVREG
+ vst vr27, sp, 27*SZVREG
+ vst vr28, sp, 28*SZVREG
+ vst vr29, sp, 29*SZVREG
+ vst vr30, sp, 30*SZVREG
+ vst vr31, sp, 31*SZVREG
+
+# else
+
+ /* Save float registers. */
+ ADDI sp, sp, -FRAME_SIZE_FLOAT
+ cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
+ FREG_S fa0, sp, 0*SZFREG
+ FREG_S fa1, sp, 1*SZFREG
+ FREG_S fa2, sp, 2*SZFREG
+ FREG_S fa3, sp, 3*SZFREG
+ FREG_S fa4, sp, 4*SZFREG
+ FREG_S fa5, sp, 5*SZFREG
+ FREG_S fa6, sp, 6*SZFREG
+ FREG_S fa7, sp, 7*SZFREG
+ FREG_S ft0, sp, 8*SZFREG
+ FREG_S ft1, sp, 9*SZFREG
+ FREG_S ft2, sp, 10*SZFREG
+ FREG_S ft3, sp, 11*SZFREG
+ FREG_S ft4, sp, 12*SZFREG
+ FREG_S ft5, sp, 13*SZFREG
+ FREG_S ft6, sp, 14*SZFREG
+ FREG_S ft7, sp, 15*SZFREG
+ FREG_S ft8, sp, 16*SZFREG
+ FREG_S ft9, sp, 17*SZFREG
+ FREG_S ft10, sp, 18*SZFREG
+ FREG_S ft11, sp, 19*SZFREG
+ FREG_S ft12, sp, 20*SZFREG
+ FREG_S ft13, sp, 21*SZFREG
+ FREG_S ft14, sp, 22*SZFREG
+ FREG_S ft15, sp, 23*SZFREG
+
+#endif /* #ifdef USE_LASX */
+#endif /* #ifndef __loongarch_soft_float */
+
+ bl HIDDEN_JUMPTARGET(__tls_get_addr)
+ ADDI a0, a0, -TLS_DTV_OFFSET
+
+#ifndef __loongarch_soft_float
+#ifdef USE_LASX
+
+ /* Restore 256-bit vector registers. */
+ xvld xr0, sp, 0*SZXREG
+ xvld xr1, sp, 1*SZXREG
+ xvld xr2, sp, 2*SZXREG
+ xvld xr3, sp, 3*SZXREG
+ xvld xr4, sp, 4*SZXREG
+ xvld xr5, sp, 5*SZXREG
+ xvld xr6, sp, 6*SZXREG
+ xvld xr7, sp, 7*SZXREG
+ xvld xr8, sp, 8*SZXREG
+ xvld xr9, sp, 9*SZXREG
+ xvld xr10, sp, 10*SZXREG
+ xvld xr11, sp, 11*SZXREG
+ xvld xr12, sp, 12*SZXREG
+ xvld xr13, sp, 13*SZXREG
+ xvld xr14, sp, 14*SZXREG
+ xvld xr15, sp, 15*SZXREG
+ xvld xr16, sp, 16*SZXREG
+ xvld xr17, sp, 17*SZXREG
+ xvld xr18, sp, 18*SZXREG
+ xvld xr19, sp, 19*SZXREG
+ xvld xr20, sp, 20*SZXREG
+ xvld xr21, sp, 21*SZXREG
+ xvld xr22, sp, 22*SZXREG
+ xvld xr23, sp, 23*SZXREG
+ xvld xr24, sp, 24*SZXREG
+ xvld xr25, sp, 25*SZXREG
+ xvld xr26, sp, 26*SZXREG
+ xvld xr27, sp, 27*SZXREG
+ xvld xr28, sp, 28*SZXREG
+ xvld xr29, sp, 29*SZXREG
+ xvld xr30, sp, 30*SZXREG
+ xvld xr31, sp, 31*SZXREG
+ ADDI sp, sp, FRAME_SIZE_LASX
+ cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
+
+#elif defined USE_LSX
+
+ /* Restore 128-bit vector registers. */
+ vld vr0, sp, 0*SZVREG
+ vld vr1, sp, 1*SZVREG
+ vld vr2, sp, 2*SZVREG
+ vld vr3, sp, 3*SZVREG
+ vld vr4, sp, 4*SZVREG
+ vld vr5, sp, 5*SZVREG
+ vld vr6, sp, 6*SZVREG
+ vld vr7, sp, 7*SZVREG
+ vld vr8, sp, 8*SZVREG
+ vld vr9, sp, 9*SZVREG
+ vld vr10, sp, 10*SZVREG
+ vld vr11, sp, 11*SZVREG
+ vld vr12, sp, 12*SZVREG
+ vld vr13, sp, 13*SZVREG
+ vld vr14, sp, 14*SZVREG
+ vld vr15, sp, 15*SZVREG
+ vld vr16, sp, 16*SZVREG
+ vld vr17, sp, 17*SZVREG
+ vld vr18, sp, 18*SZVREG
+ vld vr19, sp, 19*SZVREG
+ vld vr20, sp, 20*SZVREG
+ vld vr21, sp, 21*SZVREG
+ vld vr22, sp, 22*SZVREG
+ vld vr23, sp, 23*SZVREG
+ vld vr24, sp, 24*SZVREG
+ vld vr25, sp, 25*SZVREG
+ vld vr26, sp, 26*SZVREG
+ vld vr27, sp, 27*SZVREG
+ vld vr28, sp, 28*SZVREG
+ vld vr29, sp, 29*SZVREG
+ vld vr30, sp, 30*SZVREG
+ vld vr31, sp, 31*SZVREG
+ ADDI sp, sp, FRAME_SIZE_LSX
+ cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
+
+#else
+
+ /* Restore float registers. */
+ FREG_L fa0, sp, 0*SZFREG
+ FREG_L fa1, sp, 1*SZFREG
+ FREG_L fa2, sp, 2*SZFREG
+ FREG_L fa3, sp, 3*SZFREG
+ FREG_L fa4, sp, 4*SZFREG
+ FREG_L fa5, sp, 5*SZFREG
+ FREG_L fa6, sp, 6*SZFREG
+ FREG_L fa7, sp, 7*SZFREG
+ FREG_L ft0, sp, 8*SZFREG
+ FREG_L ft1, sp, 9*SZFREG
+ FREG_L ft2, sp, 10*SZFREG
+ FREG_L ft3, sp, 11*SZFREG
+ FREG_L ft4, sp, 12*SZFREG
+ FREG_L ft5, sp, 13*SZFREG
+ FREG_L ft6, sp, 14*SZFREG
+ FREG_L ft7, sp, 15*SZFREG
+ FREG_L ft8, sp, 16*SZFREG
+ FREG_L ft9, sp, 17*SZFREG
+ FREG_L ft10, sp, 18*SZFREG
+ FREG_L ft11, sp, 19*SZFREG
+ FREG_L ft12, sp, 20*SZFREG
+ FREG_L ft13, sp, 21*SZFREG
+ FREG_L ft14, sp, 22*SZFREG
+ FREG_L ft15, sp, 23*SZFREG
+ ADDI sp, sp, FRAME_SIZE_FLOAT
+ cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
+
+#endif /* #ifdef USE_LASX */
+
+ /* Restore fcsr0 register. */
+ ld.w t0, sp, FRAME_SIZE + 24
+ movgr2fcsr fcsr0, t0
+
+#endif /* #ifndef __loongarch_soft_float */
+
+ REG_L ra, sp, 0 * SZREG
+ REG_L a1, sp, 1 * SZREG
+ REG_L a2, sp, 2 * SZREG
+ REG_L a3, sp, 3 * SZREG
+ REG_L a4, sp, 4 * SZREG
+ REG_L a5, sp, 5 * SZREG
+ REG_L a6, sp, 6 * SZREG
+ REG_L a7, sp, 7 * SZREG
+ REG_L t3, sp, 8 * SZREG
+ REG_L t4, sp, 9 * SZREG
+ REG_L t5, sp, 10 * SZREG
+ REG_L t6, sp, 11 * SZREG
+ REG_L t7, sp, 12 * SZREG
+ REG_L t8, sp, 13 * SZREG
+ ADDI sp, sp, FRAME_SIZE
+ cfi_adjust_cfa_offset (-FRAME_SIZE)
+
+ b .Lret
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+ .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
@@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
cfi_endproc
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
-
#ifdef SHARED
-#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
-#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
-#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
-#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
-
- /* Handler for dynamic TLS symbols.
- Prototype:
- _dl_tlsdesc_dynamic (tlsdesc *) ;
-
- The second word of the descriptor points to a
- tlsdesc_dynamic_arg structure.
-
- Returns the offset between the thread pointer and the
- object referenced by the argument.
-
- ptrdiff_t
- _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
- {
- struct tlsdesc_dynamic_arg *td = tdp->arg;
- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
- if (__glibc_likely (td->gen_count <= dtv[0].counter
- && (dtv[td->tlsinfo.ti_module].pointer.val
- != TLS_DTV_UNALLOCATED),
- 1))
- return dtv[td->tlsinfo.ti_module].pointer.val
- + td->tlsinfo.ti_offset
- - __thread_pointer;
-
- return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
- } */
- .hidden _dl_tlsdesc_dynamic
- .global _dl_tlsdesc_dynamic
- .type _dl_tlsdesc_dynamic,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_dynamic:
- /* Save just enough registers to support fast path, if we fall
- into slow path we will save additional registers. */
- ADDI sp, sp, -32
- REG_S t0, sp, 0
- REG_S t1, sp, 8
- REG_S t2, sp, 16
-
-/* Runtime Storage Layout of Thread-Local Storage
- TP point to the start of TLS block.
-
- dtv
-Low address TCB ----------------> dtv0(counter)
- TP --> static_block0 <----- dtv1
- static_block1 <----- dtv2
- static_block2 <----- dtv3
- dynamic_block0 <----- dtv4
-Hign address dynamic_block1 <----- dtv5 */
-
- REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
- REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
- REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
- REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
- /* If dtv[0].counter < td->gen_count, goto slow path. */
- bltu t2, t1, .Lslow
-
- REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
- /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
- slli.d t1, t1, 4
- add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
- REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
- li.d t2, TLS_DTV_UNALLOCATED
- /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
- goto slow path. */
- beq t1, t2, .Lslow
-
- REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
- /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
- add.d a0, t1, t2
-.Lret:
- sub.d a0, a0, tp
- REG_L t0, sp, 0
- REG_L t1, sp, 8
- REG_L t2, sp, 16
- ADDI sp, sp, 32
- RET
-
-.Lslow:
- /* This is the slow path. We need to call __tls_get_addr() which
- means we need to save and restore all the register that the
- callee will trash. */
-
- /* Save the remaining registers that we must treat as caller save. */
- ADDI sp, sp, -FRAME_SIZE
- REG_S ra, sp, 0 * SZREG
- REG_S a1, sp, 1 * SZREG
- REG_S a2, sp, 2 * SZREG
- REG_S a3, sp, 3 * SZREG
- REG_S a4, sp, 4 * SZREG
- REG_S a5, sp, 5 * SZREG
- REG_S a6, sp, 6 * SZREG
- REG_S a7, sp, 7 * SZREG
- REG_S t3, sp, 8 * SZREG
- REG_S t4, sp, 9 * SZREG
- REG_S t5, sp, 10 * SZREG
- REG_S t6, sp, 11 * SZREG
- REG_S t7, sp, 12 * SZREG
- REG_S t8, sp, 13 * SZREG
-
#ifndef __loongarch_soft_float
- /* Save fcsr0 register.
- Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
- of some fields in fcsr0. */
- movfcsr2gr t0, fcsr0
- st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
-
- /* Whether support LASX. */
- la.global t0, _rtld_global_ro
- REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
- andi t1, t0, HWCAP_LOONGARCH_LASX
- beqz t1, .Llsx
-
- /* Save 256-bit vector registers.
- FIXME: Without vector ABI, save all vector registers. */
- ADDI sp, sp, -FRAME_SIZE_LASX
- xvst xr0, sp, 0*SZXREG
- xvst xr1, sp, 1*SZXREG
- xvst xr2, sp, 2*SZXREG
- xvst xr3, sp, 3*SZXREG
- xvst xr4, sp, 4*SZXREG
- xvst xr5, sp, 5*SZXREG
- xvst xr6, sp, 6*SZXREG
- xvst xr7, sp, 7*SZXREG
- xvst xr8, sp, 8*SZXREG
- xvst xr9, sp, 9*SZXREG
- xvst xr10, sp, 10*SZXREG
- xvst xr11, sp, 11*SZXREG
- xvst xr12, sp, 12*SZXREG
- xvst xr13, sp, 13*SZXREG
- xvst xr14, sp, 14*SZXREG
- xvst xr15, sp, 15*SZXREG
- xvst xr16, sp, 16*SZXREG
- xvst xr17, sp, 17*SZXREG
- xvst xr18, sp, 18*SZXREG
- xvst xr19, sp, 19*SZXREG
- xvst xr20, sp, 20*SZXREG
- xvst xr21, sp, 21*SZXREG
- xvst xr22, sp, 22*SZXREG
- xvst xr23, sp, 23*SZXREG
- xvst xr24, sp, 24*SZXREG
- xvst xr25, sp, 25*SZXREG
- xvst xr26, sp, 26*SZXREG
- xvst xr27, sp, 27*SZXREG
- xvst xr28, sp, 28*SZXREG
- xvst xr29, sp, 29*SZXREG
- xvst xr30, sp, 30*SZXREG
- xvst xr31, sp, 31*SZXREG
- b .Ltga
-
-.Llsx:
- /* Whether support LSX. */
- andi t1, t0, HWCAP_LOONGARCH_LSX
- beqz t1, .Lfloat
-
- /* Save 128-bit vector registers. */
- ADDI sp, sp, -FRAME_SIZE_LSX
- vst vr0, sp, 0*SZVREG
- vst vr1, sp, 1*SZVREG
- vst vr2, sp, 2*SZVREG
- vst vr3, sp, 3*SZVREG
- vst vr4, sp, 4*SZVREG
- vst vr5, sp, 5*SZVREG
- vst vr6, sp, 6*SZVREG
- vst vr7, sp, 7*SZVREG
- vst vr8, sp, 8*SZVREG
- vst vr9, sp, 9*SZVREG
- vst vr10, sp, 10*SZVREG
- vst vr11, sp, 11*SZVREG
- vst vr12, sp, 12*SZVREG
- vst vr13, sp, 13*SZVREG
- vst vr14, sp, 14*SZVREG
- vst vr15, sp, 15*SZVREG
- vst vr16, sp, 16*SZVREG
- vst vr17, sp, 17*SZVREG
- vst vr18, sp, 18*SZVREG
- vst vr19, sp, 19*SZVREG
- vst vr20, sp, 20*SZVREG
- vst vr21, sp, 21*SZVREG
- vst vr22, sp, 22*SZVREG
- vst vr23, sp, 23*SZVREG
- vst vr24, sp, 24*SZVREG
- vst vr25, sp, 25*SZVREG
- vst vr26, sp, 26*SZVREG
- vst vr27, sp, 27*SZVREG
- vst vr28, sp, 28*SZVREG
- vst vr29, sp, 29*SZVREG
- vst vr30, sp, 30*SZVREG
- vst vr31, sp, 31*SZVREG
- b .Ltga
-
-.Lfloat:
- /* Save float registers. */
- ADDI sp, sp, -FRAME_SIZE_FLOAT
- FREG_S fa0, sp, 0*SZFREG
- FREG_S fa1, sp, 1*SZFREG
- FREG_S fa2, sp, 2*SZFREG
- FREG_S fa3, sp, 3*SZFREG
- FREG_S fa4, sp, 4*SZFREG
- FREG_S fa5, sp, 5*SZFREG
- FREG_S fa6, sp, 6*SZFREG
- FREG_S fa7, sp, 7*SZFREG
- FREG_S ft0, sp, 8*SZFREG
- FREG_S ft1, sp, 9*SZFREG
- FREG_S ft2, sp, 10*SZFREG
- FREG_S ft3, sp, 11*SZFREG
- FREG_S ft4, sp, 12*SZFREG
- FREG_S ft5, sp, 13*SZFREG
- FREG_S ft6, sp, 14*SZFREG
- FREG_S ft7, sp, 15*SZFREG
- FREG_S ft8, sp, 16*SZFREG
- FREG_S ft9, sp, 17*SZFREG
- FREG_S ft10, sp, 18*SZFREG
- FREG_S ft11, sp, 19*SZFREG
- FREG_S ft12, sp, 20*SZFREG
- FREG_S ft13, sp, 21*SZFREG
- FREG_S ft14, sp, 22*SZFREG
- FREG_S ft15, sp, 23*SZFREG
-
-#endif /* #ifndef __loongarch_soft_float */
-
-.Ltga:
- bl HIDDEN_JUMPTARGET(__tls_get_addr)
- ADDI a0, a0, -TLS_DTV_OFFSET
-
-#ifndef __loongarch_soft_float
-
- la.global t0, _rtld_global_ro
- REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
- andi t1, t0, HWCAP_LOONGARCH_LASX
- beqz t1, .Llsx1
-
- /* Restore 256-bit vector registers. */
- xvld xr0, sp, 0*SZXREG
- xvld xr1, sp, 1*SZXREG
- xvld xr2, sp, 2*SZXREG
- xvld xr3, sp, 3*SZXREG
- xvld xr4, sp, 4*SZXREG
- xvld xr5, sp, 5*SZXREG
- xvld xr6, sp, 6*SZXREG
- xvld xr7, sp, 7*SZXREG
- xvld xr8, sp, 8*SZXREG
- xvld xr9, sp, 9*SZXREG
- xvld xr10, sp, 10*SZXREG
- xvld xr11, sp, 11*SZXREG
- xvld xr12, sp, 12*SZXREG
- xvld xr13, sp, 13*SZXREG
- xvld xr14, sp, 14*SZXREG
- xvld xr15, sp, 15*SZXREG
- xvld xr16, sp, 16*SZXREG
- xvld xr17, sp, 17*SZXREG
- xvld xr18, sp, 18*SZXREG
- xvld xr19, sp, 19*SZXREG
- xvld xr20, sp, 20*SZXREG
- xvld xr21, sp, 21*SZXREG
- xvld xr22, sp, 22*SZXREG
- xvld xr23, sp, 23*SZXREG
- xvld xr24, sp, 24*SZXREG
- xvld xr25, sp, 25*SZXREG
- xvld xr26, sp, 26*SZXREG
- xvld xr27, sp, 27*SZXREG
- xvld xr28, sp, 28*SZXREG
- xvld xr29, sp, 29*SZXREG
- xvld xr30, sp, 30*SZXREG
- xvld xr31, sp, 31*SZXREG
- ADDI sp, sp, FRAME_SIZE_LASX
- b .Lfcsr
-
-.Llsx1:
- andi t1, t0, HWCAP_LOONGARCH_LSX
- beqz t1, .Lfloat1
-
- /* Restore 128-bit vector registers. */
- vld vr0, sp, 0*SZVREG
- vld vr1, sp, 1*SZVREG
- vld vr2, sp, 2*SZVREG
- vld vr3, sp, 3*SZVREG
- vld vr4, sp, 4*SZVREG
- vld vr5, sp, 5*SZVREG
- vld vr6, sp, 6*SZVREG
- vld vr7, sp, 7*SZVREG
- vld vr8, sp, 8*SZVREG
- vld vr9, sp, 9*SZVREG
- vld vr10, sp, 10*SZVREG
- vld vr11, sp, 11*SZVREG
- vld vr12, sp, 12*SZVREG
- vld vr13, sp, 13*SZVREG
- vld vr14, sp, 14*SZVREG
- vld vr15, sp, 15*SZVREG
- vld vr16, sp, 16*SZVREG
- vld vr17, sp, 17*SZVREG
- vld vr18, sp, 18*SZVREG
- vld vr19, sp, 19*SZVREG
- vld vr20, sp, 20*SZVREG
- vld vr21, sp, 21*SZVREG
- vld vr22, sp, 22*SZVREG
- vld vr23, sp, 23*SZVREG
- vld vr24, sp, 24*SZVREG
- vld vr25, sp, 25*SZVREG
- vld vr26, sp, 26*SZVREG
- vld vr27, sp, 27*SZVREG
- vld vr28, sp, 28*SZVREG
- vld vr29, sp, 29*SZVREG
- vld vr30, sp, 30*SZVREG
- vld vr31, sp, 31*SZVREG
- ADDI sp, sp, FRAME_SIZE_LSX
- b .Lfcsr
-
-.Lfloat1:
- /* Restore float registers. */
- FREG_L fa0, sp, 0*SZFREG
- FREG_L fa1, sp, 1*SZFREG
- FREG_L fa2, sp, 2*SZFREG
- FREG_L fa3, sp, 3*SZFREG
- FREG_L fa4, sp, 4*SZFREG
- FREG_L fa5, sp, 5*SZFREG
- FREG_L fa6, sp, 6*SZFREG
- FREG_L fa7, sp, 7*SZFREG
- FREG_L ft0, sp, 8*SZFREG
- FREG_L ft1, sp, 9*SZFREG
- FREG_L ft2, sp, 10*SZFREG
- FREG_L ft3, sp, 11*SZFREG
- FREG_L ft4, sp, 12*SZFREG
- FREG_L ft5, sp, 13*SZFREG
- FREG_L ft6, sp, 14*SZFREG
- FREG_L ft7, sp, 15*SZFREG
- FREG_L ft8, sp, 16*SZFREG
- FREG_L ft9, sp, 17*SZFREG
- FREG_L ft10, sp, 18*SZFREG
- FREG_L ft11, sp, 19*SZFREG
- FREG_L ft12, sp, 20*SZFREG
- FREG_L ft13, sp, 21*SZFREG
- FREG_L ft14, sp, 22*SZFREG
- FREG_L ft15, sp, 23*SZFREG
- ADDI sp, sp, FRAME_SIZE_FLOAT
-
-.Lfcsr:
- /* Restore fcsr0 register. */
- ld.w t0, sp, FRAME_SIZE + 24
- movgr2fcsr fcsr0, t0
+#define USE_LASX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
+#define Lret Lret_lasx
+#define Lslow Lslow_lasx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LASX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#define USE_LSX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
+#define Lret Lret_lsx
+#define Lslow Lslow_lsx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LSX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
#endif /* #ifndef __loongarch_soft_float */
- REG_L ra, sp, 0 * SZREG
- REG_L a1, sp, 1 * SZREG
- REG_L a2, sp, 2 * SZREG
- REG_L a3, sp, 3 * SZREG
- REG_L a4, sp, 4 * SZREG
- REG_L a5, sp, 5 * SZREG
- REG_L a6, sp, 6 * SZREG
- REG_L a7, sp, 7 * SZREG
- REG_L t3, sp, 8 * SZREG
- REG_L t4, sp, 9 * SZREG
- REG_L t5, sp, 10 * SZREG
- REG_L t6, sp, 11 * SZREG
- REG_L t7, sp, 12 * SZREG
- REG_L t8, sp, 13 * SZREG
- ADDI sp, sp, FRAME_SIZE
-
- b .Lret
- cfi_endproc
- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
- .hidden HIDDEN_JUMPTARGET(__tls_get_addr)
+#include "dl-tlsdesc-dynamic.h"
#endif /* #ifdef SHARED */
@@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
#ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+#ifndef __loongarch_soft_float
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
+#endif
extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
#endif