[2/2] LoongArch: Add vector implementation for _dl_runtime_resolve.

Message ID 20230710094151.3002001-2-caiyinyu@loongson.cn
State New
Headers
Series [1/2] LoongArch: config: Added HAVE_LOONGARCH_VEC_ASM. |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm warning Patch failed to apply
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 warning Patch failed to apply
linaro-tcwg-bot/tcwg_glibc_check--master-arm warning Patch failed to apply
redhat-pt-bot/TryBot-still_applies warning Patch no longer applies to master

Commit Message

caiyinyu July 10, 2023, 9:41 a.m. UTC
  ---
 sysdeps/loongarch/dl-machine.h                |  13 +-
 sysdeps/loongarch/dl-trampoline.S             |  84 +++--------
 sysdeps/loongarch/dl-trampoline.h             | 131 ++++++++++++++++++
 sysdeps/loongarch/ldsodefs.h                  |   1 +
 sysdeps/loongarch/sys/asm.h                   |   2 +
 sysdeps/loongarch/sys/regdef.h                |  18 +++
 .../unix/sysv/linux/loongarch/bits/hwcap.h    |  37 +++++
 .../unix/sysv/linux/loongarch/cpu-features.h  |  29 ++++
 8 files changed, 246 insertions(+), 69 deletions(-)
 create mode 100644 sysdeps/loongarch/dl-trampoline.h
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.h
  

Comments

Xi Ruoyao July 10, 2023, 10:11 a.m. UTC | #1
On Mon, 2023-07-10 at 17:41 +0800, caiyinyu wrote:
> +#ifndef __loongarch_soft_float
> +       FREG_S  fa0, sp, 9*SZREG + 0*SZFREG
> +       FREG_S  fa1, sp, 9*SZREG + 1*SZFREG
> +       FREG_S  fa2, sp, 9*SZREG + 2*SZFREG
> +       FREG_S  fa3, sp, 9*SZREG + 3*SZFREG
> +       FREG_S  fa4, sp, 9*SZREG + 4*SZFREG
> +       FREG_S  fa5, sp, 9*SZREG + 5*SZFREG
> +       FREG_S  fa6, sp, 9*SZREG + 6*SZFREG
> +       FREG_S  fa7, sp, 9*SZREG + 7*SZFREG

fa[x] is aliasing with the lower 64 bits of vr[x], if LSX is available.
So we don't need to save/load fa[x] separately if vr[x] or xr[x] is
saved.

i. e.

#ifdef USE_LASX
xvst ... ...
#else if defined USE_LSX
vst ... ...
#else if !defined __loongarch_soft_float
FREG_S ... ...
#endif

> +#ifdef USE_LASX
> +       xvst    xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
> +       xvst    xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
> +       xvst    xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
> +       xvst    xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
> +       xvst    xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
> +       xvst    xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
> +       xvst    xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
> +       xvst    xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
> +#elif defined USE_LSX
> +       vst     vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
> +       vst     vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
> +       vst     vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
> +       vst     vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
> +       vst     vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
> +       vst     vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
> +       vst     vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
> +       vst     vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
> +#endif
> +#endif
  
caiyinyu July 11, 2023, 1:54 a.m. UTC | #2
在 2023/7/10 下午6:11, Xi Ruoyao 写道:
> On Mon, 2023-07-10 at 17:41 +0800, caiyinyu wrote:
>> +#ifndef __loongarch_soft_float
>> +       FREG_S  fa0, sp, 9*SZREG + 0*SZFREG
>> +       FREG_S  fa1, sp, 9*SZREG + 1*SZFREG
>> +       FREG_S  fa2, sp, 9*SZREG + 2*SZFREG
>> +       FREG_S  fa3, sp, 9*SZREG + 3*SZFREG
>> +       FREG_S  fa4, sp, 9*SZREG + 4*SZFREG
>> +       FREG_S  fa5, sp, 9*SZREG + 5*SZFREG
>> +       FREG_S  fa6, sp, 9*SZREG + 6*SZFREG
>> +       FREG_S  fa7, sp, 9*SZREG + 7*SZFREG
> fa[x] is aliasing with the lower 64 bits of vr[x], if LSX is available.
> So we don't need to save/load fa[x] separately if vr[x] or xr[x] is
> saved.
>
> i. e.
>
> #ifdef USE_LASX
> xvst ... ...
> #else if defined USE_LSX
> vst ... ...
> #else if !defined __loongarch_soft_float
> FREG_S ... ...
> #endif

OK, fixed, thanks!

>
>> +#ifdef USE_LASX
>> +       xvst    xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
>> +       xvst    xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
>> +       xvst    xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
>> +       xvst    xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
>> +       xvst    xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
>> +       xvst    xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
>> +       xvst    xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
>> +       xvst    xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
>> +#elif defined USE_LSX
>> +       vst     vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
>> +       vst     vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
>> +       vst     vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
>> +       vst     vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
>> +       vst     vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
>> +       vst     vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
>> +       vst     vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
>> +       vst     vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
>> +#endif
>> +#endif
  
caiyinyu July 11, 2023, 4:46 a.m. UTC | #3
在 2023/7/11 上午9:54, caiyinyu 写道:
>
> 在 2023/7/10 下午6:11, Xi Ruoyao 写道:
>> On Mon, 2023-07-10 at 17:41 +0800, caiyinyu wrote:
>>> +#ifndef __loongarch_soft_float
>>> +       FREG_S  fa0, sp, 9*SZREG + 0*SZFREG
>>> +       FREG_S  fa1, sp, 9*SZREG + 1*SZFREG
>>> +       FREG_S  fa2, sp, 9*SZREG + 2*SZFREG
>>> +       FREG_S  fa3, sp, 9*SZREG + 3*SZFREG
>>> +       FREG_S  fa4, sp, 9*SZREG + 4*SZFREG
>>> +       FREG_S  fa5, sp, 9*SZREG + 5*SZFREG
>>> +       FREG_S  fa6, sp, 9*SZREG + 6*SZFREG
>>> +       FREG_S  fa7, sp, 9*SZREG + 7*SZFREG
>> fa[x] is aliasing with the lower 64 bits of vr[x], if LSX is available.
>> So we don't need to save/load fa[x] separately if vr[x] or xr[x] is
>> saved.
>>
>> i. e.
>>
>> #ifdef USE_LASX
>> xvst ... ...
>> #else if defined USE_LSX
>> vst ... ...
>> #else if !defined __loongarch_soft_float
>> FREG_S ... ...
>> #endif
>
> OK, fixed, thanks!
>
I made a mistake soft-float abi. See 
https://sourceware.org/pipermail/libc-alpha/2023-July/149932.html


>>
>>> +#ifdef USE_LASX
>>> +       xvst    xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
>>> +       xvst    xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
>>> +       xvst    xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
>>> +       xvst    xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
>>> +       xvst    xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
>>> +       xvst    xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
>>> +       xvst    xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
>>> +       xvst    xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
>>> +#elif defined USE_LSX
>>> +       vst     vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
>>> +       vst     vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
>>> +       vst     vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
>>> +       vst     vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
>>> +       vst     vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
>>> +       vst     vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
>>> +       vst     vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
>>> +       vst     vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
>>> +#endif
>>> +#endif
  

Patch

diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index e217d37c4b..02ce17852c 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -270,6 +270,10 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
   /* If using PLTs, fill in the first two entries of .got.plt.  */
   if (l->l_info[DT_JMPREL])
     {
+#if HAVE_LOONGARCH_VEC_ASM
+      extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+      extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+#endif
       extern void _dl_runtime_resolve (void) attribute_hidden;
       extern void _dl_runtime_profile (void) attribute_hidden;
 
@@ -296,7 +300,14 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
 	     the resolved address.  */
-	  gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve;
+#if HAVE_LOONGARCH_VEC_ASM
+	  if (SUPPORT_LASX)
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+	  else if (SUPPORT_LSX)
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
+	  else
+#endif
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve;
 	}
       gotplt[1] = (ElfW (Addr)) l;
     }
diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
index ed9ec0901c..2a561b7136 100644
--- a/sysdeps/loongarch/dl-trampoline.S
+++ b/sysdeps/loongarch/dl-trampoline.S
@@ -19,77 +19,25 @@ 
 #include <sysdep.h>
 #include <sys/asm.h>
 
-#include "dl-link.h"
-
-/* Assembler veneer called from the PLT header code for lazy loading.
-   The PLT header passes its own args in t0-t2.  */
-#ifdef __loongarch_soft_float
-#define FRAME_SIZE (-((-10 * SZREG) & ALMASK))
-#else
-#define FRAME_SIZE (-((-10 * SZREG - 8 * SZFREG) & ALMASK))
+#if HAVE_LOONGARCH_VEC_ASM
+#define USE_LASX
+#define _dl_runtime_resolve _dl_runtime_resolve_lasx
+#include "dl-trampoline.h"
+#undef FRAME_SIZE
+#undef USE_LASX
+#undef _dl_runtime_resolve
+
+#define USE_LSX
+#define _dl_runtime_resolve _dl_runtime_resolve_lsx
+#include "dl-trampoline.h"
+#undef FRAME_SIZE
+#undef USE_LSX
+#undef _dl_runtime_resolve
 #endif
 
-ENTRY (_dl_runtime_resolve)
-
-	/* Save arguments to stack. */
-	ADDI	sp, sp, -FRAME_SIZE
-	REG_S	ra, sp, 9*SZREG
-	REG_S	a0, sp, 1*SZREG
-	REG_S	a1, sp, 2*SZREG
-	REG_S	a2, sp, 3*SZREG
-	REG_S	a3, sp, 4*SZREG
-	REG_S	a4, sp, 5*SZREG
-	REG_S	a5, sp, 6*SZREG
-	REG_S	a6, sp, 7*SZREG
-	REG_S	a7, sp, 8*SZREG
-
-#ifndef __loongarch_soft_float
-	FREG_S	fa0, sp, 10*SZREG + 0*SZFREG
-	FREG_S	fa1, sp, 10*SZREG + 1*SZFREG
-	FREG_S	fa2, sp, 10*SZREG + 2*SZFREG
-	FREG_S	fa3, sp, 10*SZREG + 3*SZFREG
-	FREG_S	fa4, sp, 10*SZREG + 4*SZFREG
-	FREG_S	fa5, sp, 10*SZREG + 5*SZFREG
-	FREG_S	fa6, sp, 10*SZREG + 6*SZFREG
-	FREG_S	fa7, sp, 10*SZREG + 7*SZFREG
-#endif
-
-	/* Update .got.plt and obtain runtime address of callee */
-	SLLI	a1, t1, 1
-	or	a0, t0, zero
-	ADD	a1, a1, t1
-	la	a2, _dl_fixup
-	jirl	ra, a2, 0
-	or	t1, v0, zero
-
-	/* Restore arguments from stack. */
-	REG_L	ra, sp, 9*SZREG
-	REG_L	a0, sp, 1*SZREG
-	REG_L	a1, sp, 2*SZREG
-	REG_L	a2, sp, 3*SZREG
-	REG_L	a3, sp, 4*SZREG
-	REG_L	a4, sp, 5*SZREG
-	REG_L	a5, sp, 6*SZREG
-	REG_L	a6, sp, 7*SZREG
-	REG_L	a7, sp, 8*SZREG
-
-#ifndef __loongarch_soft_float
-	FREG_L	fa0, sp, 10*SZREG + 0*SZFREG
-	FREG_L	fa1, sp, 10*SZREG + 1*SZFREG
-	FREG_L	fa2, sp, 10*SZREG + 2*SZFREG
-	FREG_L	fa3, sp, 10*SZREG + 3*SZFREG
-	FREG_L	fa4, sp, 10*SZREG + 4*SZFREG
-	FREG_L	fa5, sp, 10*SZREG + 5*SZFREG
-	FREG_L	fa6, sp, 10*SZREG + 6*SZFREG
-	FREG_L	fa7, sp, 10*SZREG + 7*SZFREG
-#endif
-
-	ADDI	sp, sp, FRAME_SIZE
-
-	/* Invoke the callee. */
-	jirl		zero, t1, 0
-END (_dl_runtime_resolve)
+#include "dl-trampoline.h"
 
+#include "dl-link.h"
 
 ENTRY (_dl_runtime_profile)
        /* LoongArch we get called with:
diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
new file mode 100644
index 0000000000..d2833488df
--- /dev/null
+++ b/sysdeps/loongarch/dl-trampoline.h
@@ -0,0 +1,131 @@ 
+/* PLT trampolines.
+   Copyright (C) 2022-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Assembler veneer called from the PLT header code for lazy loading.
+   The PLT header passes its own args in t0-t2.  */
+#ifndef __loongarch_soft_float
+# ifdef USE_LASX
+#  define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK))
+# elif defined USE_LSX
+#  define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK))
+# else
+#  define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK))
+# endif
+#else
+# define FRAME_SIZE (-((-9 * SZREG) & ALMASK))
+#endif
+
+ENTRY (_dl_runtime_resolve)
+
+	/* Save arguments to stack. */
+	ADDI	sp, sp, -FRAME_SIZE
+
+	REG_S	ra, sp, 0*SZREG
+	REG_S	a0, sp, 1*SZREG
+	REG_S	a1, sp, 2*SZREG
+	REG_S	a2, sp, 3*SZREG
+	REG_S	a3, sp, 4*SZREG
+	REG_S	a4, sp, 5*SZREG
+	REG_S	a5, sp, 6*SZREG
+	REG_S	a6, sp, 7*SZREG
+	REG_S	a7, sp, 8*SZREG
+
+#ifndef __loongarch_soft_float
+	FREG_S	fa0, sp, 9*SZREG + 0*SZFREG
+	FREG_S	fa1, sp, 9*SZREG + 1*SZFREG
+	FREG_S	fa2, sp, 9*SZREG + 2*SZFREG
+	FREG_S	fa3, sp, 9*SZREG + 3*SZFREG
+	FREG_S	fa4, sp, 9*SZREG + 4*SZFREG
+	FREG_S	fa5, sp, 9*SZREG + 5*SZFREG
+	FREG_S	fa6, sp, 9*SZREG + 6*SZFREG
+	FREG_S	fa7, sp, 9*SZREG + 7*SZFREG
+#ifdef USE_LASX
+	xvst	xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
+	xvst	xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
+	xvst	xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
+	xvst	xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
+	xvst	xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
+	xvst	xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
+	xvst	xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
+	xvst	xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
+#elif defined USE_LSX
+	vst	vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
+	vst	vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
+	vst	vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
+	vst	vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
+	vst	vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
+	vst	vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
+	vst	vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
+	vst	vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
+#endif
+#endif
+
+	/* Update .got.plt and obtain runtime address of callee */
+	SLLI	a1, t1, 1
+	or	a0, t0, zero
+	ADD	a1, a1, t1
+	la	a2, _dl_fixup
+	jirl	ra, a2, 0
+	or	t1, v0, zero
+
+	/* Restore arguments from stack. */
+	REG_L	ra, sp, 0*SZREG
+	REG_L	a0, sp, 1*SZREG
+	REG_L	a1, sp, 2*SZREG
+	REG_L	a2, sp, 3*SZREG
+	REG_L	a3, sp, 4*SZREG
+	REG_L	a4, sp, 5*SZREG
+	REG_L	a5, sp, 6*SZREG
+	REG_L	a6, sp, 7*SZREG
+	REG_L	a7, sp, 8*SZREG
+
+#ifndef __loongarch_soft_float
+	FREG_L	fa0, sp, 9*SZREG + 0*SZFREG
+	FREG_L	fa1, sp, 9*SZREG + 1*SZFREG
+	FREG_L	fa2, sp, 9*SZREG + 2*SZFREG
+	FREG_L	fa3, sp, 9*SZREG + 3*SZFREG
+	FREG_L	fa4, sp, 9*SZREG + 4*SZFREG
+	FREG_L	fa5, sp, 9*SZREG + 5*SZFREG
+	FREG_L	fa6, sp, 9*SZREG + 6*SZFREG
+	FREG_L	fa7, sp, 9*SZREG + 7*SZFREG
+#ifdef USE_LASX
+	xvld	xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
+	xvld	xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
+	xvld	xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
+	xvld	xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
+	xvld	xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
+	xvld	xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
+	xvld	xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
+	xvld	xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
+#elif defined USE_LSX
+	vld	vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
+	vld	vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
+	vld	vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
+	vld	vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
+	vld	vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
+	vld	vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
+	vld	vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
+	vld	vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
+#endif
+#endif
+
+	ADDI	sp, sp, FRAME_SIZE
+
+	/* Invoke the callee. */
+	jirl	zero, t1, 0
+END (_dl_runtime_resolve)
diff --git a/sysdeps/loongarch/ldsodefs.h b/sysdeps/loongarch/ldsodefs.h
index a8ef803aec..3b7c4ab83d 100644
--- a/sysdeps/loongarch/ldsodefs.h
+++ b/sysdeps/loongarch/ldsodefs.h
@@ -20,6 +20,7 @@ 
 #define _LOONGARCH_LDSODEFS_H 1
 
 #include <elf.h>
+#include <cpu-features.h>
 
 struct La_loongarch_regs;
 struct La_loongarch_retval;
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index 0bb430bb05..d1a279b8fb 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -25,6 +25,8 @@ 
 /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
 #define SZREG 8
 #define SZFREG 8
+#define SZVREG 16
+#define SZXREG 32
 #define REG_L ld.d
 #define REG_S st.d
 #define SRLI srli.d
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
index 91810f5e8e..5100f36d24 100644
--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
@@ -90,4 +90,22 @@ 
 #define fs6 $f30
 #define fs7 $f31
 
+#define vr0 $vr0
+#define vr1 $vr1
+#define vr2 $vr2
+#define vr3 $vr3
+#define vr4 $vr4
+#define vr5 $vr5
+#define vr6 $vr6
+#define vr7 $vr7
+
+#define xr0 $xr0
+#define xr1 $xr1
+#define xr2 $xr2
+#define xr3 $xr3
+#define xr4 $xr4
+#define xr5 $xr5
+#define xr6 $xr6
+#define xr7 $xr7
+
 #endif /* _SYS_REGDEF_H */
diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
new file mode 100644
index 0000000000..5104b69cbc
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
@@ -0,0 +1,37 @@ 
+/* Defines for bits in AT_HWCAP.  LoongArch64 Linux version.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined (_SYS_AUXV_H)
+# error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead."
+#endif
+
+/* The following must match the kernel's <asm/hwcap.h>.  */
+/* HWCAP flags */
+#define HWCAP_LOONGARCH_CPUCFG          (1 << 0)
+#define HWCAP_LOONGARCH_LAM             (1 << 1)
+#define HWCAP_LOONGARCH_UAL             (1 << 2)
+#define HWCAP_LOONGARCH_FPU             (1 << 3)
+#define HWCAP_LOONGARCH_LSX             (1 << 4)
+#define HWCAP_LOONGARCH_LASX            (1 << 5)
+#define HWCAP_LOONGARCH_CRC32           (1 << 6)
+#define HWCAP_LOONGARCH_COMPLEX         (1 << 7)
+#define HWCAP_LOONGARCH_CRYPTO          (1 << 8)
+#define HWCAP_LOONGARCH_LVZ             (1 << 9)
+#define HWCAP_LOONGARCH_LBT_X86         (1 << 10)
+#define HWCAP_LOONGARCH_LBT_ARM         (1 << 11)
+#define HWCAP_LOONGARCH_LBT_MIPS        (1 << 12)
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
new file mode 100644
index 0000000000..e371e13b15
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
@@ -0,0 +1,29 @@ 
+/* Initialize CPU feature data.  LoongArch64 version.
+   This file is part of the GNU C Library.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _CPU_FEATURES_LOONGARCH64_H
+#define _CPU_FEATURES_LOONGARCH64_H
+
+#include <sys/auxv.h>
+
+#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
+#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+
+#endif /* _CPU_FEATURES_LOONGARCH64_H  */
+