[v6,1/6] LoongArch: Add support for LA32 in sysdeps/loongarch

Message ID 20260312014203.1916933-2-mengqinggang@loongson.cn (mailing list archive)
State Under Review
Delegated to: Adhemerval Zanella Netto
Headers
Series Add support for LoongArch32 |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

mengqinggang March 12, 2026, 1:41 a.m. UTC
  ---
 scripts/build-many-glibcs.py           | 13 +++-
 sysdeps/loongarch/configure            | 29 ++++----
 sysdeps/loongarch/configure.ac         | 21 +++---
 sysdeps/loongarch/dl-machine.h         | 16 +++--
 sysdeps/loongarch/dl-tlsdesc-dynamic.h | 37 +++++-----
 sysdeps/loongarch/dl-tlsdesc.S         |  6 +-
 sysdeps/loongarch/dl-trampoline.h      |  4 +-
 sysdeps/loongarch/hp-timing.h          |  9 +++
 sysdeps/loongarch/preconfigure         |  3 +-
 sysdeps/loongarch/preconfigure.ac      |  3 +-
 sysdeps/loongarch/sfp-machine.h        | 33 +++++++++
 sysdeps/loongarch/start.S              | 32 ++++++---
 sysdeps/loongarch/sys/asm.h            | 93 ++++++++++++++++++++++++--
 sysdeps/loongarch/tst-gnu2-tls2.h      | 50 ++++++--------
 14 files changed, 246 insertions(+), 103 deletions(-)
  

Patch

diff --git a/scripts/build-many-glibcs.py b/scripts/build-many-glibcs.py
index 6fabd2db96..04aff68236 100755
--- a/scripts/build-many-glibcs.py
+++ b/scripts/build-many-glibcs.py
@@ -262,12 +262,18 @@  class Context(object):
                         os_name='linux-gnu')
         self.add_config(arch='i686',
                         os_name='gnu')
+        self.add_config(arch='loongarch32',
+                        os_name='linux-gnu',
+                        gcc_cfg=['--with-arch=la32v1.0', '--disable-multilib'])
+        self.add_config(arch='loongarch32',
+                        os_name='linux-gnusf',
+                        gcc_cfg=['--with-arch=la32rv1.0', '--disable-multilib'])
         self.add_config(arch='loongarch64',
-                        os_name='linux-gnuf64',
-                        gcc_cfg=['--disable-multilib'])
+                        os_name='linux-gnu',
+                        gcc_cfg=['--with-arch=loongarch64', '--disable-multilib'])
         self.add_config(arch='loongarch64',
                         os_name='linux-gnusf',
-                        gcc_cfg=['--disable-multilib'])
+                        gcc_cfg=['--with-arch=loongarch64', '--disable-multilib'])
         self.add_config(arch='m68k',
                         os_name='linux-gnu',
                         gcc_cfg=['--disable-multilib'])
@@ -1368,6 +1374,7 @@  def install_linux_headers(policy, cmdlist):
                 'i586': 'x86',
                 'i686': 'x86',
                 'i786': 'x86',
+                'loongarch32': 'loongarch',
                 'loongarch64': 'loongarch',
                 'm68k': 'm68k',
                 'microblaze': 'microblaze',
diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
index d3d9fec910..f32f9ae91a 100644
--- a/sysdeps/loongarch/configure
+++ b/sysdeps/loongarch/configure
@@ -113,7 +113,8 @@  if test $libc_cv_loongarch_vec_asm = no; then
   as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
 fi
 
-
+# Check if compiler support vector instructions.
+# Add -mlsx because la32 gcc disable lsx by default.
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for vector support in compiler" >&5
 printf %s "checking for vector support in compiler... " >&6; }
 if test ${libc_cv_loongarch_vec_com+y}
@@ -121,30 +122,28 @@  then :
   printf %s "(cached) " >&6
 else case e in #(
   e)
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
+cat > conftest.c <<\EOF
 void foo (void)
 {
   asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
-  asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
 }
-
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"
-then :
+EOF
+if { ac_try='${CC-cc} -c -mlsx $CFLAGS conftest.s -o conftest 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
   libc_cv_loongarch_vec_com=yes
-else case e in #(
-  e) libc_cv_loongarch_vec_com=no ;;
-esac
+else
+  libc_cv_loongarch_vec_com=no
 fi
-rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;;
+rm -f conftest* ;;
 esac
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_com" >&5
 printf "%s\n" "$libc_cv_loongarch_vec_com" >&6; }
-if test "$libc_cv_loongarch_vec_com" = yes ;
-then
+if test "$libc_cv_loongarch_vec_com" = yes ; then
   printf "%s\n" "#define HAVE_LOONGARCH_VEC_COM 1" >>confdefs.h
 
 fi
diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
index c56a203574..425416ced7 100644
--- a/sysdeps/loongarch/configure.ac
+++ b/sysdeps/loongarch/configure.ac
@@ -66,19 +66,22 @@  if test $libc_cv_loongarch_vec_asm = no; then
   AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
 fi
 
-AC_CACHE_CHECK([for vector support in compiler],
-		libc_cv_loongarch_vec_com, [
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+# Check if compiler support vector instructions.
+# Add -mlsx because la32 gcc disable lsx by default.
+AC_CACHE_CHECK([for vector support in compiler], libc_cv_loongarch_vec_com, [
+cat > conftest.c <<\EOF
 void foo (void)
 {
   asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
-  asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
 }
-]])],
-                [libc_cv_loongarch_vec_com=yes],
-                [libc_cv_loongarch_vec_com=no])])
-if test "$libc_cv_loongarch_vec_com" = yes ;
-then
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c -mlsx $CFLAGS conftest.s -o conftest 1>&AS_MESSAGE_LOG_FD); then
+  libc_cv_loongarch_vec_com=yes
+else
+  libc_cv_loongarch_vec_com=no
+fi
+rm -f conftest*])
+if test "$libc_cv_loongarch_vec_com" = yes ; then
   AC_DEFINE(HAVE_LOONGARCH_VEC_COM)
 fi
 LIBC_CONFIG_VAR([loongarch-vec-com], [$libc_cv_loongarch_vec_com])
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index b5fb67debb..e8ee50b810 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -98,6 +98,8 @@  static inline ElfW (Addr) elf_machine_dynamic (void)
   return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
 }
 
+#define STRINGXP(X) __STRING (X)
+
 /* Initial entry point code for the dynamic linker.
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.  */
@@ -112,19 +114,19 @@  static inline ElfW (Addr) elf_machine_dynamic (void)
 	# Stash user entry point in s0.   \n\
 	or	$s0, $a0, $zero   \n\
 	# Load the original argument count.   \n\
-	ld.d	$a1, $sp, 0   \n\
+	" STRINGXP (REG_L) "	$a1, $sp, 0   \n\
 	# Call _dl_init (struct link_map *main_map, int argc, \
 			 char **argv, char **env)    \n\
 	la	$a0, _rtld_local   \n\
-	ld.d	$a0, $a0, 0   \n\
-	addi.d	$a2, $sp, 8   \n\
-	slli.d	$a3, $a1, 3   \n\
-	add.d	$a3, $a3, $a2   \n\
-	addi.d	$a3, $a3, 8   \n\
+	" STRINGXP (REG_L) "	$a0, $a0, 0   \n\
+	" STRINGXP (ADDI) "	$a2, $sp, " STRINGXP (SZREG) "   \n\
+	" STRINGXP (SLLI) "	$a3, $a1, " STRINGXP (PTRLOG) "   \n\
+	" STRINGXP (ADD) "	$a3, $a3, $a2   \n\
+	" STRINGXP (ADDI) "	$a3, $a3, " STRINGXP (SZREG) "   \n\
 	# Stash the stack pointer in s1.\n\
 	or	$s1, $sp, $zero	\n\
 	# Adjust $sp for 16-aligned   \n\
-	bstrins.d	$sp, $zero, 3, 0  \n\
+	" REG_ALIGN_C ($sp, 4) " \n\
 	# Call the function to run the initializers.   \n\
 	bl	_dl_init   \n\
 	# Restore the stack pointer for _start.\n\
diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
index 9edae3afc8..78e9087417 100644
--- a/sysdeps/loongarch/dl-tlsdesc-dynamic.h
+++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -53,14 +53,14 @@ 
 _dl_tlsdesc_dynamic:
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
-	ADDI	sp, sp, -32
-	cfi_adjust_cfa_offset (32)
+	ADDI	sp, sp, -(4 * SZREG)
+	cfi_adjust_cfa_offset (4 * SZREG)
 	REG_S	t0, sp, 0
-	REG_S	t1, sp, 8
-	REG_S	t2, sp, 16
+	REG_S	t1, sp, SZREG
+	REG_S	t2, sp, 2 * SZREG
 	cfi_rel_offset (12, 0)
-	cfi_rel_offset (13, 8)
-	cfi_rel_offset (14, 16)
+	cfi_rel_offset (13, SZREG)
+	cfi_rel_offset (14, 2 * SZREG)
 
 /* Runtime Storage Layout of Thread-Local Storage
    TP point to the start of TLS block.
@@ -81,11 +81,11 @@  Hign address	dynamic_block1 <----- dtv5  */
 	bltu	t2, t1, .Lslow
 
 	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
-	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
-	slli.d	t1, t1, 4
-	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
+	/* t1 = t1 * sizeof(dtv_pointer) = t1 * (2 * sizeof(void*)) */
+	SLLI	t1, t1, (PTRLOG + 1)
+	ADD	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
 	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
-	li.d	t2, TLS_DTV_UNALLOCATED
+	LI	t2, TLS_DTV_UNALLOCATED
 	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
 	   goto slow path.  */
 	beq	t1, t2, .Lslow
@@ -93,14 +93,14 @@  Hign address	dynamic_block1 <----- dtv5  */
 	cfi_remember_state
 	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
 	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
-	add.d	a0, t1, t2
+	ADD	a0, t1, t2
 .Lret:
-	sub.d	a0, a0, tp
+	SUB	a0, a0, tp
 	REG_L	t0, sp, 0
-	REG_L	t1, sp, 8
-	REG_L	t2, sp, 16
-	ADDI	sp, sp, 32
-	cfi_adjust_cfa_offset (-32)
+	REG_L	t1, sp, SZREG
+	REG_L	t2, sp, 2 * SZREG
+	ADDI	sp, sp, 4 * SZREG
+	cfi_adjust_cfa_offset (-(4 * SZREG))
 	RET
 
 .Lslow:
@@ -147,7 +147,8 @@  Hign address	dynamic_block1 <----- dtv5  */
 	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
 	   of some fields in fcsr0.  */
 	movfcsr2gr  t0, fcsr0
-	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2.  */
+	/* Use the spare slot above t2.  */
+	st.w	t0, sp, FRAME_SIZE + 3 * SZREG
 
 #ifdef USE_LASX
   #define V_REG_S xvst
@@ -194,7 +195,7 @@  Hign address	dynamic_block1 <----- dtv5  */
 	cfi_adjust_cfa_offset (-V_SPACE)
 
 	/* Restore fcsr0 register.  */
-	ld.w	t0, sp, FRAME_SIZE + 24
+	ld.w	t0, sp, FRAME_SIZE + 3 * SZREG
 	movgr2fcsr  fcsr0, t0
 
 #endif /* #ifndef __loongarch_soft_float */
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
index 38a2218151..47d3766857 100644
--- a/sysdeps/loongarch/dl-tlsdesc.S
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -34,7 +34,7 @@ 
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
-	REG_L  a0, a0, 8
+	REG_L  a0, a0, SZREG
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
@@ -53,8 +53,8 @@  _dl_tlsdesc_return:
 	cfi_startproc
 	.align  2
 _dl_tlsdesc_undefweak:
-	REG_L	a0, a0, 8
-	sub.d	a0, a0, tp
+	REG_L	a0, a0, SZREG
+	SUB	a0, a0, tp
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
index aea6e287c8..578ed8600a 100644
--- a/sysdeps/loongarch/dl-trampoline.h
+++ b/sysdeps/loongarch/dl-trampoline.h
@@ -160,7 +160,7 @@  ENTRY (_dl_runtime_profile)
 	/* Save arguments to stack. */
 	ADDI	sp, sp, -SF_SIZE
 	REG_S	ra, sp, 0
-	REG_S	fp, sp, 8
+	REG_S	fp, sp, SZREG
 
 	or	fp, sp, zero
 
@@ -270,7 +270,7 @@  ENTRY (_dl_runtime_profile)
 1:
 	/* The new frame size is in t3.  */
 	SUB	sp, fp, t3
-	BSTRINS sp, zero, 3, 0
+	REG_ALIGN_ASM (sp, 4)
 
 	REG_S	a0, fp, OFFSET_T1
 
diff --git a/sysdeps/loongarch/hp-timing.h b/sysdeps/loongarch/hp-timing.h
index 9e68e38fa6..8aeb91e4f2 100644
--- a/sysdeps/loongarch/hp-timing.h
+++ b/sysdeps/loongarch/hp-timing.h
@@ -30,12 +30,21 @@ 
 typedef unsigned long long int hp_timing_t;
 
 /* Read the stable counter.  */
+#ifdef __loongarch64
 #define HP_TIMING_NOW(Var) \
   ({ \
     unsigned long long int _count; \
     asm volatile ("rdtime.d\t%0,$r0" : "=r" (_count)); \
     (Var) = _count; \
   })
+#else
+#define HP_TIMING_NOW(Var) \
+  ({ unsigned int _countl,_counth ; \
+     asm volatile ("rdtimel.w\t%0,$r0\n\trdtimeh.w\t%1,$r0" \
+		    : "=r" (_countl), "=r"(_counth)); \
+     (Var) = ((_counth & -1ULL << 32 ) | _countl); \
+  })
+#endif
 
 #include <hp-timing-common.h>
 
diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure
index 6726ab8302..4248091f53 100644
--- a/sysdeps/loongarch/preconfigure
+++ b/sysdeps/loongarch/preconfigure
@@ -26,7 +26,8 @@  loongarch*)
 
     case "$abi" in
     ilp32)
-	as_fn_error 1 "loongarch does not yet support ilp32 ABI!!" "$LINENO" 5
+	grlen=32
+	machine=loongarch/ilp32
 	;;
     lp64)
 	grlen=64
diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac
index 56402261df..2602aa17bf 100644
--- a/sysdeps/loongarch/preconfigure.ac
+++ b/sysdeps/loongarch/preconfigure.ac
@@ -26,7 +26,8 @@  loongarch*)
 
     case "$abi" in
     ilp32)
-	AC_MSG_ERROR([loongarch does not yet support ilp32 ABI!!], 1)
+	grlen=32
+	machine=loongarch/ilp32
 	;;
     lp64)
 	grlen=64
diff --git a/sysdeps/loongarch/sfp-machine.h b/sysdeps/loongarch/sfp-machine.h
index 986883b352..b0ce04add9 100644
--- a/sysdeps/loongarch/sfp-machine.h
+++ b/sysdeps/loongarch/sfp-machine.h
@@ -20,6 +20,37 @@ 
 #include <fenv.h>
 #include <fpu_control.h>
 
+#if __loongarch_grlen == 32
+
+# define _FP_W_TYPE_SIZE	32
+# define _FP_W_TYPE		unsigned long
+# define _FP_WS_TYPE		signed long
+# define _FP_I_TYPE		long
+
+# define _FP_MUL_MEAT_S(R, X, Y)				\
+  _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+# define _FP_MUL_MEAT_D(R, X, Y)				\
+  _FP_MUL_MEAT_2_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+# define _FP_MUL_MEAT_Q(R, X, Y)				\
+  _FP_MUL_MEAT_4_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+# define _FP_MUL_MEAT_DW_S(R, X, Y)					\
+  _FP_MUL_MEAT_DW_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+# define _FP_MUL_MEAT_DW_D(R, X, Y)					\
+  _FP_MUL_MEAT_DW_2_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+# define _FP_MUL_MEAT_DW_Q(R, X, Y)					\
+  _FP_MUL_MEAT_DW_4_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+# define _FP_DIV_MEAT_S(R, X, Y)	_FP_DIV_MEAT_1_udiv_norm (S, R, X, Y)
+# define _FP_DIV_MEAT_D(R, X, Y)	_FP_DIV_MEAT_2_udiv (D, R, X, Y)
+# define _FP_DIV_MEAT_Q(R, X, Y)	_FP_DIV_MEAT_4_udiv (Q, R, X, Y)
+
+# define _FP_NANFRAC_S		_FP_QNANBIT_S
+# define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
+# define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0, 0, 0
+
+#else /* #if __loongarch_grlen == 32 */
+
 #define _FP_W_TYPE_SIZE 64
 #define _FP_W_TYPE unsigned long
 #define _FP_WS_TYPE signed long
@@ -47,6 +78,8 @@ 
 #define _FP_NANFRAC_D _FP_QNANBIT_D
 #define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
 
+#endif /* #if __loongarch_grlen == 32 */
+
 #define _FP_NANSIGN_S 0
 #define _FP_NANSIGN_D 0
 #define _FP_NANSIGN_Q 0
diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
index 72452f5307..4fc1cfe978 100644
--- a/sysdeps/loongarch/start.S
+++ b/sysdeps/loongarch/start.S
@@ -57,23 +57,39 @@  ENTRY (ENTRY_POINT)
 /* Terminate call stack by noting ra is undefined.  Use a dummy
    .cfi_label to force starting the FDE.  */
 	.cfi_label .Ldummy
-	cfi_undefined (1)
+	cfi_undefined	(1)
 	or		a5, a0, zero /* rtld_fini */
 
-	la.pcrel	a0, t0, main
+#	LOAD_ADDR	(a0, main)
+#if defined PIC && !defined SHARED
+	/* Avoid relocation in static PIE since _start is called before it
+	   is relocated.  */
+	la.pcrel	a0, __wrap_main
+#else
+	LA_GOT		(a0, main)
+#endif
+
 	REG_L		a1, sp, 0
 	ADDI		a2, sp, SZREG
 
-	/* Adjust $sp for 16-aligned */
-	BSTRINS		sp, zero, 3, 0
+	/* Adjust $sp for 16-bytes aligned */
+	REG_ALIGN_ASM	(sp, 4)
 
 	move		a3, zero /* used to be init */
 	move		a4, zero /* used to be fini */
 	or		a6, sp, zero /* stack_end */
 
-	la.pcrel	ra, t0, __libc_start_main
-	jirl		ra, ra, 0
+#	LOAD_ADDR	(ra, __libc_start_main)
+#	jirl		ra, ra, 0
+#
+#	LOAD_ADDR	(ra, abort)
+#	jirl		ra, ra, 0
+
+	CALL		(__libc_start_main)
+	CALL		(abort)
 
-	la.pcrel	ra, t0, abort
-	jirl		ra, ra, 0
+#if defined PIC && !defined SHARED
+__wrap_main:
+	TAIL		(main)
+#endif
 END (ENTRY_POINT)
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index afb597a426..3852f82ca7 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -22,22 +22,103 @@ 
 #include <sys/regdef.h>
 #include <sysdeps/generic/sysdep.h>
 
+#define STACK_ALIGN 16
+
 /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
+#if __loongarch_grlen == 64
+#define PTRLOG 3
 #define SZREG 8
-#define SZFREG 8
-#define SZVREG 16
-#define SZXREG 32
 #define REG_L ld.d
 #define REG_S st.d
 #define SRLI srli.d
+#define SRAI srai.d
 #define SLLI slli.d
 #define ADDI addi.d
 #define ADD  add.d
 #define SUB  sub.d
-#define BSTRINS  bstrins.d
 #define LI  li.d
-#define FREG_L fld.d
-#define FREG_S fst.d
+#define BSTRINS  bstrins.d
+
+/* Align reg to 2^n.  Used in assembly.  */
+#define REG_ALIGN_ASM(reg, n) bstrins.d reg, zero, (n-1), 0
+
+/* Align reg to 2^n.  Used in C.  */
+#define REG_ALIGN_C(reg, n) \
+  "bstrins.d\t" __STRING(reg) ", $zero, (" __STRING(n) "-1), 0"
+
+#define LOAD_ADDR(reg, sym) la.pcrel reg, t0, sym
+
+#define LOAD_LOCAL(reg, sym) \
+  pcalau12i reg, %pc_hi20(sym); \
+  ld.d	    reg, reg, %pc_lo12(sym);
+
+#define LOAD_GLOBAL(reg, sym) \
+  la.got    reg, sym; \
+  ld.d	    reg, reg, 0;
+
+#define LA_GOT(reg, sym) la.got reg, t0, sym
+
+#define CALL(sym) call36 sym
+#define TAIL(sym) tail36 t0, sym
+
+#elif __loongarch_grlen == 32
+
+#define PTRLOG 2
+#define SZREG 4
+#define REG_L ld.w
+#define REG_S st.w
+#define SRLI srli.w
+#define SRAI srai.w
+#define SLLI slli.w
+#define ADDI addi.w
+#define ADD  add.w
+#define SUB  sub.w
+#define LI  li.w
+#define BSTRINS  bstrins.w
+
+/* LA32R not have bstrins.w, use srli.w and slli.w on both LA32S and LA32R.  */
+#define REG_ALIGN_ASM(reg, n) \
+  srli.w reg, reg, n; \
+  slli.w reg, reg, n;
+
+#define REG_ALIGN_C(reg, n) \
+  "srli.w\t" __STRING(reg)", " __STRING(reg)", " __STRING(n) "\n\t" \
+  "slli.w\t" __STRING(reg)", " __STRING(reg)", " __STRING(n)
+
+#define LOAD_ADDR(reg, sym) \
+  1: pcaddu12i	reg, %pcadd_hi20(sym); \
+     addi.w	reg, reg, %pcadd_lo12(1b);
+
+#define LOAD_LOCAL(reg, sym) \
+  1: pcaddu12i	reg, %pcadd_hi20(sym); \
+     ld.w	reg, reg, %pcadd_lo12(1b);
+
+#define LOAD_GLOBAL(reg, sym) \
+  1: pcaddu12i	reg, %got_pcadd_hi20(sym); \
+     ld.w	reg, reg, %pcadd_lo12(1b); \
+     ld.w	reg, reg, 0;
+
+#define LA_GOT(reg, sym) la.got reg, sym
+
+#define CALL(sym) call30 sym
+#define TAIL(sym) tail30 t0, sym
+
+#else
+#error __loongarch_grlen must equal 32 or 64
+#endif
+
+#if __loongarch_frlen == 64
+  #define SZFREG 8
+  #define FREG_L fld.d
+  #define FREG_S fst.d
+#elif __loongarch_frlen == 32
+  #define SZFREG 4
+  #define FREG_L fld.s
+  #define FREG_S fst.s
+#endif
+
+#define SZVREG 16
+#define SZXREG 32
 
 /*  Declare leaf routine.
     The usage of macro LEAF/ENTRY is as follows:
diff --git a/sysdeps/loongarch/tst-gnu2-tls2.h b/sysdeps/loongarch/tst-gnu2-tls2.h
index 98a6fb4384..88f1c69a79 100644
--- a/sysdeps/loongarch/tst-gnu2-tls2.h
+++ b/sysdeps/loongarch/tst-gnu2-tls2.h
@@ -25,17 +25,23 @@ 
 /* The instruction between BEFORE_TLSDESC_CALL and _dl_tlsdesc_dynamic,
    and the instruction between _dl_tlsdesc_dynamic and AFTER_TLSDESC_CALL,
    may modified most of the general-purpose register.  */
-#define	SAVE_REGISTER(src)						\
-  asm volatile ("st.d $r3, %0" :"=m"(src) :);
+
+#if LOONGARCH_ABI_GRLEN == 64
+# define SAVE_REGISTER(src) asm volatile ("st.d $r3, %0" :"=m"(src) :);
+#elif LOONGARCH_ABI_GRLEN == 32
+# define SAVE_REGISTER(src) asm volatile ("st.w $r3, %0" :"=m"(src) :);
+#else
+# error LOONGARCH_ABI_GRLEN must equal 32 or 64
+#endif
 
 #ifdef __loongarch_soft_float
 
 #define BEFORE_TLSDESC_CALL()						\
-  uint64_t src;								\
+  long src;								\
   SAVE_REGISTER (src);
 
 #define AFTER_TLSDESC_CALL()						\
-  uint64_t restore;							\
+  long restore;							\
   SAVE_REGISTER (restore);						\
   if (src != restore)							\
     abort ();
@@ -53,12 +59,12 @@ 
   asm volatile ("movcf2gr %0, $fcc7" :"=r"(src[7]));	\
 
 #define LOAD_REGISTER_FCSR()				\
-  uint64_t src_fcsr = 0x01010101;			\
-  asm volatile ("li.d $t0, 0x01010101" ::: "$t0");	\
+  int src_fcsr = 0x01010101;			\
+  asm volatile ("li.w $t0, 0x01010101" ::: "$t0");	\
   asm volatile ("movgr2fcsr $fcsr0, $t0" :::);
 
 #define SAVE_REGISTER_FCSR()						\
-  uint64_t restore_fcsr;						\
+  int restore_fcsr;						\
   asm volatile ("movfcsr2gr %0, $fcsr0" :"=r"(restore_fcsr));		\
   if (src_fcsr != restore_fcsr)						\
     {									\
@@ -70,7 +76,7 @@ 
   unsigned long hwcap = getauxval (AT_HWCAP);
 
 #define	LOAD_REGISTER_FLOAT()						\
-  for (int i = 0; i < 32; i++)						\
+  for (int i = 0; i < 24; i++)						\
     src_float[i] = i + 1;						\
   asm volatile ("fld.d $f0, %0" ::"m"(src_float[0]) :"$f0");		\
   asm volatile ("fld.d $f1, %0" ::"m"(src_float[1]) :"$f1"); 		\
@@ -96,17 +102,9 @@ 
   asm volatile ("fld.d $f21, %0" ::"m"(src_float[21]) :"$f21");		\
   asm volatile ("fld.d $f22, %0" ::"m"(src_float[22]) :"$f22");		\
   asm volatile ("fld.d $f23, %0" ::"m"(src_float[23]) :"$f23");		\
-  asm volatile ("fld.d $f24, %0" ::"m"(src_float[24]) :"$f24");		\
-  asm volatile ("fld.d $f25, %0" ::"m"(src_float[25]) :"$f25");		\
-  asm volatile ("fld.d $f26, %0" ::"m"(src_float[26]) :"$f26");		\
-  asm volatile ("fld.d $f27, %0" ::"m"(src_float[27]) :"$f27");		\
-  asm volatile ("fld.d $f28, %0" ::"m"(src_float[28]) :"$f28");		\
-  asm volatile ("fld.d $f29, %0" ::"m"(src_float[29]) :"$f29");		\
-  asm volatile ("fld.d $f30, %0" ::"m"(src_float[30]) :"$f30");		\
-  asm volatile ("fld.d $f31, %0" ::"m"(src_float[31]) :"$f31");
 
 #define	SAVE_REGISTER_FLOAT()						\
-  double restore_float[32];						\
+  double restore_float[24];						\
   asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]));		\
   asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1])); 		\
   asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2])); 		\
@@ -131,14 +129,6 @@ 
   asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]));		\
   asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]));		\
   asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]));		\
-  asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]));		\
-  asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]));		\
-  asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]));		\
-  asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]));		\
-  asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]));		\
-  asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]));		\
-  asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]));		\
-  asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]));		\
   if (memcmp (src_float, restore_float, sizeof (src_float)) != 0)	\
     {									\
       printf ("Float registers compare failed!\n");			\
@@ -325,9 +315,9 @@ 
 #endif
 
 #define BEFORE_TLSDESC_CALL()						\
-  uint64_t src;								\
-  double src_float[32];							\
-  uint64_t src_fcc[8];							\
+  long src;								\
+  double src_float[24];							\
+  int src_fcc[8];							\
   SAVE_REGISTER (src);							\
 									\
   if (hwcap & HWCAP_LOONGARCH_LASX)					\
@@ -349,8 +339,8 @@ 
 
 
 #define AFTER_TLSDESC_CALL()						\
-  uint64_t restore;							\
-  uint64_t restore_fcc[8];						\
+  long restore;							\
+  int restore_fcc[8];						\
 									\
   SAVE_REGISTER (restore);						\
   if (src != restore)							\