LoongArch: Add lasx/lsx support for _dl_runtime_profile.
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
Commit Message
---
sysdeps/loongarch/bits/link.h | 24 ++-
sysdeps/loongarch/dl-link.sym | 8 +-
sysdeps/loongarch/dl-machine.h | 11 +-
sysdeps/loongarch/dl-trampoline.S | 175 +--------------------
sysdeps/loongarch/dl-trampoline.h | 242 ++++++++++++++++++++++++++++++
5 files changed, 283 insertions(+), 177 deletions(-)
Comments
I think this characterize as a ABI break, so it should follow what aarch64
did when we fixed BZ#26643 (ce9a68c57c260c8417afc93972849ac9ad243ec4) and
bump LAV_CURRENT (it was not done on this specific commit, but rather on
32612615c58b394c3eb09f020f31310797ad3854 to fix BZ #23734).
So add a loongarch link_lavcurrent.h with value of 3 for
!__loongarch_soft_float.
On 11/09/23 07:34, caiyinyu wrote:
> ---
> sysdeps/loongarch/bits/link.h | 24 ++-
> sysdeps/loongarch/dl-link.sym | 8 +-
> sysdeps/loongarch/dl-machine.h | 11 +-
> sysdeps/loongarch/dl-trampoline.S | 175 +--------------------
> sysdeps/loongarch/dl-trampoline.h | 242 ++++++++++++++++++++++++++++++
> 5 files changed, 283 insertions(+), 177 deletions(-)
>
> diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
> index 7fa6131280..00f6f25f2d 100644
> --- a/sysdeps/loongarch/bits/link.h
> +++ b/sysdeps/loongarch/bits/link.h
> @@ -20,10 +20,26 @@
> #error "Never include <bits/link.h> directly; use <link.h> instead."
> #endif
>
> +#ifndef __loongarch_soft_float
> +typedef float La_loongarch_vr
> + __attribute__ ((__vector_size__ (16), __aligned__ (16)));
> +typedef float La_loongarch_xr
> + __attribute__ ((__vector_size__ (32), __aligned__ (16)));
> +
> +typedef union
> +{
> + double fpreg[4];
> + La_loongarch_vr vr[2];
> + La_loongarch_xr xr[1];
> +} La_loongarch_vector __attribute__ ((__aligned__ (16)));
> +#endif
> +
> typedef struct La_loongarch_regs
> {
> unsigned long int lr_reg[8]; /* a0 - a7 */
> - double lr_fpreg[8]; /* fa0 - fa7 */
> +#ifndef __loongarch_soft_float
> + La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
> +#endif
> unsigned long int lr_ra;
> unsigned long int lr_sp;
> } La_loongarch_regs;
> @@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
> {
> unsigned long int lrv_a0;
> unsigned long int lrv_a1;
> - double lrv_fa0;
> - double lrv_fa1;
> +#ifndef __loongarch_soft_float
> + La_loongarch_vector lrv_vec0;
> + La_loongarch_vector lrv_vec1;
> +#endif
> } La_loongarch_retval;
>
> __BEGIN_DECLS
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index 868ab7c6eb..b534968e30 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
>
> DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
> -DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
> +#ifndef __loongarch_soft_float
> +DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec)
> +#endif
> DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
> DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
>
> DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
> -DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
> +#ifndef __loongarch_soft_float
> +DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0)
> +#endif
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index 066bb233ac..8a2db9de3c 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> #if !defined __loongarch_soft_float
> extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
> extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
> + extern void _dl_runtime_profile_lasx (void) attribute_hidden;
> + extern void _dl_runtime_profile_lsx (void) attribute_hidden;
> #endif
> extern void _dl_runtime_resolve (void) attribute_hidden;
> extern void _dl_runtime_profile (void) attribute_hidden;
> @@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> end in this function. */
> if (profile != 0)
> {
> - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
> +#if !defined __loongarch_soft_float
> + if (SUPPORT_LASX)
> + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
> + else if (SUPPORT_LSX)
> + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
> + else
> +#endif
> + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
>
> if (GLRO(dl_profile) != NULL
> && _dl_name_match_p (GLRO(dl_profile), l))
> diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
> index 8fd9146978..dce1c2f122 100644
> --- a/sysdeps/loongarch/dl-trampoline.S
> +++ b/sysdeps/loongarch/dl-trampoline.S
> @@ -22,190 +22,23 @@
> #if !defined __loongarch_soft_float
> #define USE_LASX
> #define _dl_runtime_resolve _dl_runtime_resolve_lasx
> +#define _dl_runtime_profile _dl_runtime_profile_lasx
> #include "dl-trampoline.h"
> #undef FRAME_SIZE
> #undef USE_LASX
> #undef _dl_runtime_resolve
> +#undef _dl_runtime_profile
>
> #define USE_LSX
> #define _dl_runtime_resolve _dl_runtime_resolve_lsx
> +#define _dl_runtime_profile _dl_runtime_profile_lsx
> #include "dl-trampoline.h"
> #undef FRAME_SIZE
> #undef USE_LSX
> #undef _dl_runtime_resolve
> +#undef _dl_runtime_profile
> #endif
>
> #include "dl-trampoline.h"
>
> -#include "dl-link.h"
>
> -ENTRY (_dl_runtime_profile)
> - /* LoongArch we get called with:
> - t0 linkr_map pointer
> - t1 the scaled offset stored in t0, which can be used
> - to calculate the offset of the current symbol in .rela.plt
> - t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
> - t3 dl resolver entry point, no use in this function
> -
> - Stack frame layout:
> - [sp, #96] La_loongarch_regs
> - [sp, #48] La_loongarch_retval
> - [sp, #40] frame size return from pltenter
> - [sp, #32] dl_profile_call saved a1
> - [sp, #24] dl_profile_call saved a0
> - [sp, #16] T1
> - [sp, #0] ra, fp <- fp
> - */
> -
> -# define OFFSET_T1 16
> -# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
> -# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
> -# define OFFSET_RV OFFSET_FS + 8
> -# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
> -
> -# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
> -
> - /* Save arguments to stack. */
> - ADDI sp, sp, -SF_SIZE
> - REG_S ra, sp, 0
> - REG_S fp, sp, 8
> -
> - or fp, sp, zero
> -
> - REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> - REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> - REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> - REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> - REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> - REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> - REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> - REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> -
> -#ifndef __loongarch_soft_float
> - FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> - FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> - FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> - FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> - FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> - FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> - FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> - FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> -#endif
> -
> - /* Update .got.plt and obtain runtime address of callee. */
> - SLLI a1, t1, 1
> - or a0, t0, zero
> - ADD a1, a1, t1
> - or a2, ra, zero /* return addr */
> - ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
> - ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
> -
> - REG_S a0, fp, OFFSET_SAVED_CALL_A0
> - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> -
> - la t2, _dl_profile_fixup
> - jirl ra, t2, 0
> -
> - REG_L t3, fp, OFFSET_FS
> - bge t3, zero, 1f
> -
> - /* Save the return. */
> - or t4, v0, zero
> -
> - /* Restore arguments from stack. */
> - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> -
> -#ifndef __loongarch_soft_float
> - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> -#endif
> -
> - REG_L ra, fp, 0
> - REG_L fp, fp, SZREG
> -
> - ADDI sp, sp, SF_SIZE
> - jirl zero, t4, 0
> -
> -1:
> - /* The new frame size is in t3. */
> - SUB sp, fp, t3
> - BSTRINS sp, zero, 3, 0
> -
> - REG_S a0, fp, OFFSET_T1
> -
> - or a0, sp, zero
> - ADDI a1, fp, SF_SIZE
> - or a2, t3, zero
> - la t5, memcpy
> - jirl ra, t5, 0
> -
> - REG_L t6, fp, OFFSET_T1
> -
> - /* Call the function. */
> - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> -
> -#ifndef __loongarch_soft_float
> - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> -#endif
> - jirl ra, t6, 0
> -
> - REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
> - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
> -
> -#ifndef __loongarch_soft_float
> - FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
> - FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
> -#endif
> -
> - /* Setup call to pltexit. */
> - REG_L a0, fp, OFFSET_SAVED_CALL_A0
> - REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> - ADDI a2, fp, OFFSET_RG
> - ADDI a3, fp, OFFSET_RV
> - la t7, _dl_audit_pltexit
> - jirl ra, t7, 0
> -
> - REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
> - REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
> -
> -#ifndef __loongarch_soft_float
> - FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
> - FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
> -#endif
> -
> - /* RA from within La_loongarch_reg. */
> - REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
> - or sp, fp, zero
> - ADDI sp, sp, SF_SIZE
> - REG_S fp, fp, SZREG
> -
> - jirl zero, ra, 0
> -
> -END (_dl_runtime_profile)
> diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
> index 02375286f8..cb4a287c65 100644
> --- a/sysdeps/loongarch/dl-trampoline.h
> +++ b/sysdeps/loongarch/dl-trampoline.h
> @@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
> /* Invoke the callee. */
> jirl zero, t1, 0
> END (_dl_runtime_resolve)
> +
> +#include "dl-link.h"
> +
> +ENTRY (_dl_runtime_profile)
> + /* LoongArch we get called with:
> + t0 linkr_map pointer
> + t1 the scaled offset stored in t0, which can be used
> + to calculate the offset of the current symbol in .rela.plt
> + t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
> + t3 dl resolver entry point, no use in this function
> +
> + Stack frame layout:
> + [sp, #208] La_loongarch_regs
> + [sp, #128] La_loongarch_retval // align: 16
> + [sp, #112] frame size return from pltenter
> + [sp, #80 ] dl_profile_call saved vec1
> + [sp, #48 ] dl_profile_call saved vec0 // align: 16
> + [sp, #32 ] dl_profile_call saved a1
> + [sp, #24 ] dl_profile_call saved a0
> + [sp, #16 ] T1
> + [sp, #0 ] ra, fp <- fp
> + */
> +
> +# define OFFSET_T1 16
> +# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
> +# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
> +# define OFFSET_RV OFFSET_FS + 8 + 8
> +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
> +
> +# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
> +
> + /* Save arguments to stack. */
> + ADDI sp, sp, -SF_SIZE
> + REG_S ra, sp, 0
> + REG_S fp, sp, 8
> +
> + or fp, sp, zero
> +
> + REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifdef USE_LASX
> + xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
> + xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
> + xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
> + xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
> + xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
> + xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
> + xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
> + xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
> +#elif defined USE_LSX
> + vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
> + vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
> + vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
> + vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
> + vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
> + vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
> + vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
> + vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
> + FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
> + FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
> + FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
> + FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
> + FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
> + FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
> + FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
> +#endif
> +
> + /* Update .got.plt and obtain runtime address of callee. */
> + SLLI a1, t1, 1
> + or a0, t0, zero
> + ADD a1, a1, t1
> + or a2, ra, zero /* return addr */
> + ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
> + ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
> +
> + REG_S a0, fp, OFFSET_SAVED_CALL_A0
> + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> +
> + la t2, _dl_profile_fixup
> + jirl ra, t2, 0
> +
> + REG_L t3, fp, OFFSET_FS
> + bge t3, zero, 1f
> +
> + /* Save the return. */
> + or t4, v0, zero
> +
> + /* Restore arguments from stack. */
> + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifdef USE_LASX
> + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
> + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
> + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
> + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
> + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
> + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
> + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
> + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
> +#elif defined USE_LSX
> + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
> + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
> + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
> + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
> + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
> + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
> + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
> + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
> + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
> + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
> + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
> + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
> + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
> + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
> + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
> +#endif
> +
> + REG_L ra, fp, 0
> + REG_L fp, fp, SZREG
> +
> + ADDI sp, sp, SF_SIZE
> + jirl zero, t4, 0
> +
> +1:
> + /* The new frame size is in t3. */
> + SUB sp, fp, t3
> + BSTRINS sp, zero, 3, 0
> +
> + REG_S a0, fp, OFFSET_T1
> +
> + or a0, sp, zero
> + ADDI a1, fp, SF_SIZE
> + or a2, t3, zero
> + la t5, memcpy
> + jirl ra, t5, 0
> +
> + REG_L t6, fp, OFFSET_T1
> +
> + /* Call the function. */
> + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifdef USE_LASX
> + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
> + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
> + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
> + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
> + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
> + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
> + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
> + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
> +#elif defined USE_LSX
> + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
> + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
> + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
> + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
> + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
> + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
> + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
> + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
> + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
> + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
> + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
> + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
> + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
> + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
> + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
> +#endif
> +
> + jirl ra, t6, 0
> +
> + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
> + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
> +
> +#ifdef USE_LASX
> + xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
> +#elif defined USE_LSX
> + vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
> +#endif
> +
> + /* Setup call to pltexit. */
> + REG_L a0, fp, OFFSET_SAVED_CALL_A0
> + REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> + ADDI a2, fp, OFFSET_RG
> + ADDI a3, fp, OFFSET_RV
> + la t7, _dl_audit_pltexit
> + jirl ra, t7, 0
> +
> + REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
> + REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
> +
> +#ifdef USE_LASX
> + xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
> +#elif defined USE_LSX
> + vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
> + FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
> +#endif
> +
> + /* RA from within La_loongarch_reg. */
> + REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
> + or sp, fp, zero
> + ADDI sp, sp, SF_SIZE
> + REG_S fp, fp, SZREG
> +
> + jirl zero, ra, 0
> +
> +END (_dl_runtime_profile)
@@ -20,10 +20,26 @@
#error "Never include <bits/link.h> directly; use <link.h> instead."
#endif
+#ifndef __loongarch_soft_float
+typedef float La_loongarch_vr
+ __attribute__ ((__vector_size__ (16), __aligned__ (16)));
+typedef float La_loongarch_xr
+ __attribute__ ((__vector_size__ (32), __aligned__ (16)));
+
+typedef union
+{
+ double fpreg[4];
+ La_loongarch_vr vr[2];
+ La_loongarch_xr xr[1];
+} La_loongarch_vector __attribute__ ((__aligned__ (16)));
+#endif
+
typedef struct La_loongarch_regs
{
unsigned long int lr_reg[8]; /* a0 - a7 */
- double lr_fpreg[8]; /* fa0 - fa7 */
+#ifndef __loongarch_soft_float
+ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
+#endif
unsigned long int lr_ra;
unsigned long int lr_sp;
} La_loongarch_regs;
@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
{
unsigned long int lrv_a0;
unsigned long int lrv_a1;
- double lrv_fa0;
- double lrv_fa1;
+#ifndef __loongarch_soft_float
+ La_loongarch_vector lrv_vec0;
+ La_loongarch_vector lrv_vec1;
+#endif
} La_loongarch_retval;
__BEGIN_DECLS
@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec)
+#endif
DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0)
+#endif
@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
#if !defined __loongarch_soft_float
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+ extern void _dl_runtime_profile_lasx (void) attribute_hidden;
+ extern void _dl_runtime_profile_lsx (void) attribute_hidden;
#endif
extern void _dl_runtime_resolve (void) attribute_hidden;
extern void _dl_runtime_profile (void) attribute_hidden;
@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */
if (profile != 0)
{
- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+ else if (SUPPORT_LSX)
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ else
+#endif
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
if (GLRO(dl_profile) != NULL
&& _dl_name_match_p (GLRO(dl_profile), l))
@@ -22,190 +22,23 @@
#if !defined __loongarch_soft_float
#define USE_LASX
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
+#define _dl_runtime_profile _dl_runtime_profile_lasx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LASX
#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
#define USE_LSX
#define _dl_runtime_resolve _dl_runtime_resolve_lsx
+#define _dl_runtime_profile _dl_runtime_profile_lsx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LSX
#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
#endif
#include "dl-trampoline.h"
-#include "dl-link.h"
-ENTRY (_dl_runtime_profile)
- /* LoongArch we get called with:
- t0 linkr_map pointer
- t1 the scaled offset stored in t0, which can be used
- to calculate the offset of the current symbol in .rela.plt
- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
- t3 dl resolver entry point, no use in this function
-
- Stack frame layout:
- [sp, #96] La_loongarch_regs
- [sp, #48] La_loongarch_retval
- [sp, #40] frame size return from pltenter
- [sp, #32] dl_profile_call saved a1
- [sp, #24] dl_profile_call saved a0
- [sp, #16] T1
- [sp, #0] ra, fp <- fp
- */
-
-# define OFFSET_T1 16
-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
-# define OFFSET_RV OFFSET_FS + 8
-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
-
-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
-
- /* Save arguments to stack. */
- ADDI sp, sp, -SF_SIZE
- REG_S ra, sp, 0
- REG_S fp, sp, 8
-
- or fp, sp, zero
-
- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
-
- /* Update .got.plt and obtain runtime address of callee. */
- SLLI a1, t1, 1
- or a0, t0, zero
- ADD a1, a1, t1
- or a2, ra, zero /* return addr */
- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
-
- REG_S a0, fp, OFFSET_SAVED_CALL_A0
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
-
- la t2, _dl_profile_fixup
- jirl ra, t2, 0
-
- REG_L t3, fp, OFFSET_FS
- bge t3, zero, 1f
-
- /* Save the return. */
- or t4, v0, zero
-
- /* Restore arguments from stack. */
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
-
- REG_L ra, fp, 0
- REG_L fp, fp, SZREG
-
- ADDI sp, sp, SF_SIZE
- jirl zero, t4, 0
-
-1:
- /* The new frame size is in t3. */
- SUB sp, fp, t3
- BSTRINS sp, zero, 3, 0
-
- REG_S a0, fp, OFFSET_T1
-
- or a0, sp, zero
- ADDI a1, fp, SF_SIZE
- or a2, t3, zero
- la t5, memcpy
- jirl ra, t5, 0
-
- REG_L t6, fp, OFFSET_T1
-
- /* Call the function. */
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
- jirl ra, t6, 0
-
- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
-
-#ifndef __loongarch_soft_float
- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
-#endif
-
- /* Setup call to pltexit. */
- REG_L a0, fp, OFFSET_SAVED_CALL_A0
- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
- ADDI a2, fp, OFFSET_RG
- ADDI a3, fp, OFFSET_RV
- la t7, _dl_audit_pltexit
- jirl ra, t7, 0
-
- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
-#endif
-
- /* RA from within La_loongarch_reg. */
- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
- or sp, fp, zero
- ADDI sp, sp, SF_SIZE
- REG_S fp, fp, SZREG
-
- jirl zero, ra, 0
-
-END (_dl_runtime_profile)
@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)
+
+#include "dl-link.h"
+
+ENTRY (_dl_runtime_profile)
+ /* LoongArch we get called with:
+ t0 linkr_map pointer
+ t1 the scaled offset stored in t0, which can be used
+ to calculate the offset of the current symbol in .rela.plt
+ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+ t3 dl resolver entry point, no use in this function
+
+ Stack frame layout:
+ [sp, #208] La_loongarch_regs
+ [sp, #128] La_loongarch_retval // align: 16
+ [sp, #112] frame size return from pltenter
+ [sp, #80 ] dl_profile_call saved vec1
+ [sp, #48 ] dl_profile_call saved vec0 // align: 16
+ [sp, #32 ] dl_profile_call saved a1
+ [sp, #24 ] dl_profile_call saved a0
+ [sp, #16 ] T1
+ [sp, #0 ] ra, fp <- fp
+ */
+
+# define OFFSET_T1 16
+# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
+# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
+# define OFFSET_RV OFFSET_FS + 8 + 8
+# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
+
+# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+
+ /* Save arguments to stack. */
+ ADDI sp, sp, -SF_SIZE
+ REG_S ra, sp, 0
+ REG_S fp, sp, 8
+
+ or fp, sp, zero
+
+ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ /* Update .got.plt and obtain runtime address of callee. */
+ SLLI a1, t1, 1
+ or a0, t0, zero
+ ADD a1, a1, t1
+ or a2, ra, zero /* return addr */
+ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
+ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
+
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+
+ la t2, _dl_profile_fixup
+ jirl ra, t2, 0
+
+ REG_L t3, fp, OFFSET_FS
+ bge t3, zero, 1f
+
+ /* Save the return. */
+ or t4, v0, zero
+
+ /* Restore arguments from stack. */
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ REG_L ra, fp, 0
+ REG_L fp, fp, SZREG
+
+ ADDI sp, sp, SF_SIZE
+ jirl zero, t4, 0
+
+1:
+ /* The new frame size is in t3. */
+ SUB sp, fp, t3
+ BSTRINS sp, zero, 3, 0
+
+ REG_S a0, fp, OFFSET_T1
+
+ or a0, sp, zero
+ ADDI a1, fp, SF_SIZE
+ or a2, t3, zero
+ la t5, memcpy
+ jirl ra, t5, 0
+
+ REG_L t6, fp, OFFSET_T1
+
+ /* Call the function. */
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ jirl ra, t6, 0
+
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+ /* Setup call to pltexit. */
+ REG_L a0, fp, OFFSET_SAVED_CALL_A0
+ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+ ADDI a2, fp, OFFSET_RG
+ ADDI a3, fp, OFFSET_RV
+ la t7, _dl_audit_pltexit
+ jirl ra, t7, 0
+
+ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+ /* RA from within La_loongarch_reg. */
+ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+ or sp, fp, zero
+ ADDI sp, sp, SF_SIZE
+ REG_S fp, fp, SZREG
+
+ jirl zero, ra, 0
+
+END (_dl_runtime_profile)