LoongArch: Organize the code related to split move and merge the same functions.

Message ID 20240713090422.7006-1-chenglulu@loongson.cn
State New
Headers
Series LoongArch: Organize the code related to split move and merge the same functions. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

Lulu Cheng July 13, 2024, 9:04 a.m. UTC
  gcc/ChangeLog:

	* config/loongarch/loongarch-protos.h
	(loongarch_split_128bit_move): Delete.
	(loongarch_split_128bit_move_p): Delete.
	(loongarch_split_256bit_move): Delete.
	(loongarch_split_256bit_move_p): Delete.
	(loongarch_split_vector_move): Add a function declaration.
	* config/loongarch/loongarch.cc
	(loongarch_vector_costs::finish_cost): Adjust the code
	formatting.
	(loongarch_split_vector_move_p): Merge
	loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
	(loongarch_split_move_p): Merge code.
	(loongarch_split_move): Likewise.
	(loongarch_split_128bit_move_p): Delete.
	(loongarch_split_256bit_move_p): Delete.
	(loongarch_split_128bit_move): Delete.
	(loongarch_split_vector_move): Merge loongarch_split_128bit_move
	and loongarch_split_256bit_move.
	(loongarch_split_256bit_move): Delete.
	(loongarch_global_init): Remove the extra semicolon at the
	end of the function.
	* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
	condition TARGET_64BIT.
---
 gcc/config/loongarch/loongarch-protos.h |   5 +-
 gcc/config/loongarch/loongarch.cc       | 221 ++++++------------------
 gcc/config/loongarch/loongarch.md       |   1 +
 3 files changed, 58 insertions(+), 169 deletions(-)
  

Comments

Lulu Cheng July 20, 2024, 1:14 a.m. UTC | #1
Pushed to r15-2167.

在 2024/7/13 下午5:04, Lulu Cheng 写道:
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch-protos.h
> 	(loongarch_split_128bit_move): Delete.
> 	(loongarch_split_128bit_move_p): Delete.
> 	(loongarch_split_256bit_move): Delete.
> 	(loongarch_split_256bit_move_p): Delete.
> 	(loongarch_split_vector_move): Add a function declaration.
> 	* config/loongarch/loongarch.cc
> 	(loongarch_vector_costs::finish_cost): Adjust the code
> 	formatting.
> 	(loongarch_split_vector_move_p): Merge
> 	loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
> 	(loongarch_split_move_p): Merge code.
> 	(loongarch_split_move): Likewise.
> 	(loongarch_split_128bit_move_p): Delete.
> 	(loongarch_split_256bit_move_p): Delete.
> 	(loongarch_split_128bit_move): Delete.
> 	(loongarch_split_vector_move): Merge loongarch_split_128bit_move
> 	and loongarch_split_256bit_move.
> 	(loongarch_split_256bit_move): Delete.
> 	(loongarch_global_init): Remove the extra semicolon at the
> 	end of the function.
> 	* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
> 	condition TARGET_64BIT.
> ---
>   gcc/config/loongarch/loongarch-protos.h |   5 +-
>   gcc/config/loongarch/loongarch.cc       | 221 ++++++------------------
>   gcc/config/loongarch/loongarch.md       |   1 +
>   3 files changed, 58 insertions(+), 169 deletions(-)
>
> diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
> index e238d795a73..85f6e894399 100644
> --- a/gcc/config/loongarch/loongarch-protos.h
> +++ b/gcc/config/loongarch/loongarch-protos.h
> @@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
>   extern void loongarch_split_move (rtx, rtx);
>   extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
>   extern void loongarch_split_plus_constant (rtx *, machine_mode);
> -extern void loongarch_split_128bit_move (rtx, rtx);
> -extern bool loongarch_split_128bit_move_p (rtx, rtx);
> -extern void loongarch_split_256bit_move (rtx, rtx);
> -extern bool loongarch_split_256bit_move_p (rtx, rtx);
> +extern void loongarch_split_vector_move (rtx, rtx);
>   extern const char *loongarch_output_move (rtx, rtx);
>   #ifdef RTX_CODE
>   extern void loongarch_expand_scc (rtx *);
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8eb47ff95c3..c7a02103ef5 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4354,10 +4354,10 @@ void
>   loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
>   {
>     loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
> +
>     if (loop_vinfo)
> -    {
> -      m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
> -    }
> +    m_suggested_unroll_factor
> +      = determine_suggested_unroll_factor (loop_vinfo);
>   
>     vector_costs::finish_cost (scalar_costs);
>   }
> @@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
>     return simplify_gen_subreg (word_mode, op, mode, byte);
>   }
>   
> +static bool loongarch_split_vector_move_p (rtx dest, rtx src);
>   /* Return true if a move from SRC to DEST should be split into two.
>      SPLIT_TYPE describes the split condition.  */
>   
> @@ -4444,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)
>   	return false;
>       }
>   
> -  /* Check if LSX moves need splitting.  */
> -  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
> -    return loongarch_split_128bit_move_p (dest, src);
>   
> -  /* Check if LASX moves need splitting.  */
> -  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> -    return loongarch_split_256bit_move_p (dest, src);
> +  /* Check if vector moves need splitting.  */
> +  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
> +      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> +    return loongarch_split_vector_move_p (dest, src);
>   
>     /* Otherwise split all multiword moves.  */
>     return size > UNITS_PER_WORD;
> @@ -4463,10 +4462,9 @@ void
>   loongarch_split_move (rtx dest, rtx src)
>   {
>     gcc_checking_assert (loongarch_split_move_p (dest, src));
> -  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
> -    loongarch_split_128bit_move (dest, src);
> -  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> -    loongarch_split_256bit_move (dest, src);
> +  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
> +      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> +    loongarch_split_vector_move (dest, src);
>     else
>       gcc_unreachable ();
>   }
> @@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
>   
>     return insn[ldr][index-2];
>   }
> -/* Return true if a 128-bit move from SRC to DEST should be split.  */
> -
> -bool
> -loongarch_split_128bit_move_p (rtx dest, rtx src)
> -{
> -  /* LSX-to-LSX moves can be done in a single instruction.  */
> -  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
> -    return false;
> -
> -  /* Check for LSX loads and stores.  */
> -  if (FP_REG_RTX_P (dest) && MEM_P (src))
> -    return false;
> -  if (FP_REG_RTX_P (src) && MEM_P (dest))
> -    return false;
> -
> -  /* Check for LSX set to an immediate const vector with valid replicated
> -     element.  */
> -  if (FP_REG_RTX_P (dest)
> -      && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
> -    return false;
> -
> -  /* Check for LSX load zero immediate.  */
> -  if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
> -    return false;
> -
> -  return true;
> -}
> -
> -/* Return true if a 256-bit move from SRC to DEST should be split.  */
> +/* Return true if a vector move from SRC to DEST should be split.  */
>   
> -bool
> -loongarch_split_256bit_move_p (rtx dest, rtx src)
> +static bool
> +loongarch_split_vector_move_p (rtx dest, rtx src)
>   {
> -  /* LSX-to-LSX moves can be done in a single instruction.  */
> +  /* Vector moves can be done in a single instruction.  */
>     if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
>       return false;
>   
> -  /* Check for LSX loads and stores.  */
> +  /* Check for vector loads and stores.  */
>     if (FP_REG_RTX_P (dest) && MEM_P (src))
>       return false;
>     if (FP_REG_RTX_P (src) && MEM_P (dest))
>       return false;
>   
> -  /* Check for LSX set to an immediate const vector with valid replicated
> +  /* Check for vector set to an immediate const vector with valid replicated
>        element.  */
>     if (FP_REG_RTX_P (dest)
>         && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
>       return false;
>   
> -  /* Check for LSX load zero immediate.  */
> +  /* Check for vector load zero immediate.  */
>     if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
>       return false;
>   
>     return true;
>   }
>   
> -/* Split a 128-bit move from SRC to DEST.  */
> +/* Split a vector move from SRC to DEST.  */
>   
>   void
> -loongarch_split_128bit_move (rtx dest, rtx src)
> +loongarch_split_vector_move (rtx dest, rtx src)
>   {
>     int byte, index;
> -  rtx low_dest, low_src, d, s;
> +  rtx s, d;
> +  machine_mode mode = GET_MODE (dest);
> +  bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
>   
>     if (FP_REG_RTX_P (dest))
>       {
>         gcc_assert (!MEM_P (src));
>   
> -      rtx new_dest = dest;
> -      if (!TARGET_64BIT)
> -	{
> -	  if (GET_MODE (dest) != V4SImode)
> -	    new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
> -	}
> -      else
> -	{
> -	  if (GET_MODE (dest) != V2DImode)
> -	    new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
> -	}
> -
> -      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
> -	   byte += UNITS_PER_WORD, index++)
> -	{
> -	  s = loongarch_subword_at_byte (src, byte);
> -	  if (!TARGET_64BIT)
> -	    emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
> -					    GEN_INT (1 << index)));
> -	  else
> -	    emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
> -					    GEN_INT (1 << index)));
> -	}
> -    }
> -  else if (FP_REG_RTX_P (src))
> -    {
> -      gcc_assert (!MEM_P (dest));
> -
> -      rtx new_src = src;
> -      if (!TARGET_64BIT)
> -	{
> -	  if (GET_MODE (src) != V4SImode)
> -	    new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
> -	}
> -      else
> -	{
> -	  if (GET_MODE (src) != V2DImode)
> -	    new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
> -	}
> +      rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
>   
> -      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
> -	   byte += UNITS_PER_WORD, index++)
> -	{
> -	  d = loongarch_subword_at_byte (dest, byte);
> -	  if (!TARGET_64BIT)
> -	    emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
> -	  else
> -	    emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
> -	}
> -    }
> -  else
> -    {
> -      low_dest = loongarch_subword_at_byte (dest, 0);
> -      low_src = loongarch_subword_at_byte (src, 0);
> -      gcc_assert (REG_P (low_dest) && REG_P (low_src));
> -      /* Make sure the source register is not written before reading.  */
> -      if (REGNO (low_dest) <= REGNO (low_src))
> +      if (lsx_p)
>   	{
> -	  for (byte = 0; byte < GET_MODE_SIZE (TImode);
> -	       byte += UNITS_PER_WORD)
> -	    {
> -	      d = loongarch_subword_at_byte (dest, byte);
> -	      s = loongarch_subword_at_byte (src, byte);
> -	      loongarch_emit_move (d, s);
> -	    }
> +	  mode = V2DImode;
> +	  gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
>   	}
>         else
>   	{
> -	  for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
> -	       byte -= UNITS_PER_WORD)
> -	    {
> -	      d = loongarch_subword_at_byte (dest, byte);
> -	      s = loongarch_subword_at_byte (src, byte);
> -	      loongarch_emit_move (d, s);
> -	    }
> +	  mode = V4DImode;
> +	  gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
>   	}
> -    }
> -}
> -
> -/* Split a 256-bit move from SRC to DEST.  */
> -
> -void
> -loongarch_split_256bit_move (rtx dest, rtx src)
> -{
> -  int byte, index;
> -  rtx low_dest, low_src, d, s;
> -
> -  if (FP_REG_RTX_P (dest))
> -    {
> -      gcc_assert (!MEM_P (src));
>   
>         rtx new_dest = dest;
> -      if (!TARGET_64BIT)
> -	{
> -	  if (GET_MODE (dest) != V8SImode)
> -	    new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
> -	}
> -      else
> -	{
> -	  if (GET_MODE (dest) != V4DImode)
> -	    new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
> -	}
> +
> +      if (GET_MODE (dest) != mode)
> +	new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
>   
>         for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
>   	   byte += UNITS_PER_WORD, index++)
>   	{
>   	  s = loongarch_subword_at_byte (src, byte);
> -	  if (!TARGET_64BIT)
> -	    emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
> -					      GEN_INT (1 << index)));
> -	  else
> -	    emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
> -					      GEN_INT (1 << index)));
> +	  emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
> +					  GEN_INT (1 << index)));
>   	}
>       }
>     else if (FP_REG_RTX_P (src))
>       {
>         gcc_assert (!MEM_P (dest));
>   
> -      rtx new_src = src;
> -      if (!TARGET_64BIT)
> +      rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
> +
> +      if (lsx_p)
>   	{
> -	  if (GET_MODE (src) != V8SImode)
> -	    new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
> +	  mode = V2DImode;
> +	  gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
>   	}
>         else
>   	{
> -	  if (GET_MODE (src) != V4DImode)
> -	    new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
> +	  mode = V4DImode;
> +	  gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
>   	}
>   
> +      rtx new_src = src;
> +      if (GET_MODE (src) != mode)
> +	new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
> +
>         for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
>   	   byte += UNITS_PER_WORD, index++)
>   	{
>   	  d = loongarch_subword_at_byte (dest, byte);
> -	  if (!TARGET_64BIT)
> -	    emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
> -	  else
> -	    emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
> +	  emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
>   	}
>       }
>     else
>       {
> +      /* This part of the code is designed to handle the following situations:
> +	 (set (reg:V2DI 4 $r4)
> +	      (reg:V2DI 6 $r6))
> +	 The trigger test case is lsx-mov-1.c.  */
> +      rtx low_dest, low_src;
> +
>         low_dest = loongarch_subword_at_byte (dest, 0);
>         low_src = loongarch_subword_at_byte (src, 0);
>         gcc_assert (REG_P (low_dest) && REG_P (low_src));
>         /* Make sure the source register is not written before reading.  */
>         if (REGNO (low_dest) <= REGNO (low_src))
>   	{
> -	  for (byte = 0; byte < GET_MODE_SIZE (TImode);
> +	  for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
>   	       byte += UNITS_PER_WORD)
>   	    {
>   	      d = loongarch_subword_at_byte (dest, byte);
> @@ -4815,8 +4706,8 @@ loongarch_split_256bit_move (rtx dest, rtx src)
>   	}
>         else
>   	{
> -	  for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
> -	       byte -= UNITS_PER_WORD)
> +	  for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
> +	       byte >= 0; byte -= UNITS_PER_WORD)
>   	    {
>   	      d = loongarch_subword_at_byte (dest, byte);
>   	      s = loongarch_subword_at_byte (src, byte);
> @@ -7606,7 +7497,7 @@ loongarch_global_init (void)
>   
>     /* Function to allocate machine-dependent function status.  */
>     init_machine_status = &loongarch_init_machine_status;
> -};
> +}
>   
>   static void
>   loongarch_reg_init (void)
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 21890a2d94b..459ad30b9bb 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -2406,6 +2406,7 @@ (define_insn "*movdf_softfloat"
>     [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
>   	(match_operand:DF 1 "move_operand" "rG,m,rG"))]
>     "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT)
> +   && TARGET_64BIT
>      && (register_operand (operands[0], DFmode)
>          || reg_or_0_operand (operands[1], DFmode))"
>     { return loongarch_output_move (operands[0], operands[1]); }
  

Patch

diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index e238d795a73..85f6e894399 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -85,10 +85,7 @@  extern bool loongarch_split_move_p (rtx, rtx);
 extern void loongarch_split_move (rtx, rtx);
 extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
 extern void loongarch_split_plus_constant (rtx *, machine_mode);
-extern void loongarch_split_128bit_move (rtx, rtx);
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
-extern void loongarch_split_256bit_move (rtx, rtx);
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
+extern void loongarch_split_vector_move (rtx, rtx);
 extern const char *loongarch_output_move (rtx, rtx);
 #ifdef RTX_CODE
 extern void loongarch_expand_scc (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 8eb47ff95c3..c7a02103ef5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4354,10 +4354,10 @@  void
 loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
 {
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+
   if (loop_vinfo)
-    {
-      m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
-    }
+    m_suggested_unroll_factor
+      = determine_suggested_unroll_factor (loop_vinfo);
 
   vector_costs::finish_cost (scalar_costs);
 }
@@ -4423,6 +4423,7 @@  loongarch_subword (rtx op, bool high_p)
   return simplify_gen_subreg (word_mode, op, mode, byte);
 }
 
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);
 /* Return true if a move from SRC to DEST should be split into two.
    SPLIT_TYPE describes the split condition.  */
 
@@ -4444,13 +4445,11 @@  loongarch_split_move_p (rtx dest, rtx src)
 	return false;
     }
 
-  /* Check if LSX moves need splitting.  */
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-    return loongarch_split_128bit_move_p (dest, src);
 
-  /* Check if LASX moves need splitting.  */
-  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-    return loongarch_split_256bit_move_p (dest, src);
+  /* Check if vector moves need splitting.  */
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+    return loongarch_split_vector_move_p (dest, src);
 
   /* Otherwise split all multiword moves.  */
   return size > UNITS_PER_WORD;
@@ -4463,10 +4462,9 @@  void
 loongarch_split_move (rtx dest, rtx src)
 {
   gcc_checking_assert (loongarch_split_move_p (dest, src));
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-    loongarch_split_128bit_move (dest, src);
-  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-    loongarch_split_256bit_move (dest, src);
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+    loongarch_split_vector_move (dest, src);
   else
     gcc_unreachable ();
 }
@@ -4588,224 +4586,117 @@  loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
 
   return insn[ldr][index-2];
 }
-/* Return true if a 128-bit move from SRC to DEST should be split.  */
-
-bool
-loongarch_split_128bit_move_p (rtx dest, rtx src)
-{
-  /* LSX-to-LSX moves can be done in a single instruction.  */
-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
-    return false;
-
-  /* Check for LSX loads and stores.  */
-  if (FP_REG_RTX_P (dest) && MEM_P (src))
-    return false;
-  if (FP_REG_RTX_P (src) && MEM_P (dest))
-    return false;
-
-  /* Check for LSX set to an immediate const vector with valid replicated
-     element.  */
-  if (FP_REG_RTX_P (dest)
-      && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
-    return false;
-
-  /* Check for LSX load zero immediate.  */
-  if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
-    return false;
-
-  return true;
-}
-
-/* Return true if a 256-bit move from SRC to DEST should be split.  */
+/* Return true if a vector move from SRC to DEST should be split.  */
 
-bool
-loongarch_split_256bit_move_p (rtx dest, rtx src)
+static bool
+loongarch_split_vector_move_p (rtx dest, rtx src)
 {
-  /* LSX-to-LSX moves can be done in a single instruction.  */
+  /* Vector moves can be done in a single instruction.  */
   if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
     return false;
 
-  /* Check for LSX loads and stores.  */
+  /* Check for vector loads and stores.  */
   if (FP_REG_RTX_P (dest) && MEM_P (src))
     return false;
   if (FP_REG_RTX_P (src) && MEM_P (dest))
     return false;
 
-  /* Check for LSX set to an immediate const vector with valid replicated
+  /* Check for vector set to an immediate const vector with valid replicated
      element.  */
   if (FP_REG_RTX_P (dest)
       && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
     return false;
 
-  /* Check for LSX load zero immediate.  */
+  /* Check for vector load zero immediate.  */
   if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
     return false;
 
   return true;
 }
 
-/* Split a 128-bit move from SRC to DEST.  */
+/* Split a vector move from SRC to DEST.  */
 
 void
-loongarch_split_128bit_move (rtx dest, rtx src)
+loongarch_split_vector_move (rtx dest, rtx src)
 {
   int byte, index;
-  rtx low_dest, low_src, d, s;
+  rtx s, d;
+  machine_mode mode = GET_MODE (dest);
+  bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
 
   if (FP_REG_RTX_P (dest))
     {
       gcc_assert (!MEM_P (src));
 
-      rtx new_dest = dest;
-      if (!TARGET_64BIT)
-	{
-	  if (GET_MODE (dest) != V4SImode)
-	    new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
-	}
-      else
-	{
-	  if (GET_MODE (dest) != V2DImode)
-	    new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
-	}
-
-      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
-	   byte += UNITS_PER_WORD, index++)
-	{
-	  s = loongarch_subword_at_byte (src, byte);
-	  if (!TARGET_64BIT)
-	    emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
-					    GEN_INT (1 << index)));
-	  else
-	    emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
-					    GEN_INT (1 << index)));
-	}
-    }
-  else if (FP_REG_RTX_P (src))
-    {
-      gcc_assert (!MEM_P (dest));
-
-      rtx new_src = src;
-      if (!TARGET_64BIT)
-	{
-	  if (GET_MODE (src) != V4SImode)
-	    new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
-	}
-      else
-	{
-	  if (GET_MODE (src) != V2DImode)
-	    new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
-	}
+      rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
 
-      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
-	   byte += UNITS_PER_WORD, index++)
-	{
-	  d = loongarch_subword_at_byte (dest, byte);
-	  if (!TARGET_64BIT)
-	    emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
-	  else
-	    emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
-	}
-    }
-  else
-    {
-      low_dest = loongarch_subword_at_byte (dest, 0);
-      low_src = loongarch_subword_at_byte (src, 0);
-      gcc_assert (REG_P (low_dest) && REG_P (low_src));
-      /* Make sure the source register is not written before reading.  */
-      if (REGNO (low_dest) <= REGNO (low_src))
+      if (lsx_p)
 	{
-	  for (byte = 0; byte < GET_MODE_SIZE (TImode);
-	       byte += UNITS_PER_WORD)
-	    {
-	      d = loongarch_subword_at_byte (dest, byte);
-	      s = loongarch_subword_at_byte (src, byte);
-	      loongarch_emit_move (d, s);
-	    }
+	  mode = V2DImode;
+	  gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
 	}
       else
 	{
-	  for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
-	       byte -= UNITS_PER_WORD)
-	    {
-	      d = loongarch_subword_at_byte (dest, byte);
-	      s = loongarch_subword_at_byte (src, byte);
-	      loongarch_emit_move (d, s);
-	    }
+	  mode = V4DImode;
+	  gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
 	}
-    }
-}
-
-/* Split a 256-bit move from SRC to DEST.  */
-
-void
-loongarch_split_256bit_move (rtx dest, rtx src)
-{
-  int byte, index;
-  rtx low_dest, low_src, d, s;
-
-  if (FP_REG_RTX_P (dest))
-    {
-      gcc_assert (!MEM_P (src));
 
       rtx new_dest = dest;
-      if (!TARGET_64BIT)
-	{
-	  if (GET_MODE (dest) != V8SImode)
-	    new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
-	}
-      else
-	{
-	  if (GET_MODE (dest) != V4DImode)
-	    new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
-	}
+
+      if (GET_MODE (dest) != mode)
+	new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
 
       for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
 	   byte += UNITS_PER_WORD, index++)
 	{
 	  s = loongarch_subword_at_byte (src, byte);
-	  if (!TARGET_64BIT)
-	    emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
-					      GEN_INT (1 << index)));
-	  else
-	    emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
-					      GEN_INT (1 << index)));
+	  emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
+					  GEN_INT (1 << index)));
 	}
     }
   else if (FP_REG_RTX_P (src))
     {
       gcc_assert (!MEM_P (dest));
 
-      rtx new_src = src;
-      if (!TARGET_64BIT)
+      rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
+
+      if (lsx_p)
 	{
-	  if (GET_MODE (src) != V8SImode)
-	    new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
+	  mode = V2DImode;
+	  gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
 	}
       else
 	{
-	  if (GET_MODE (src) != V4DImode)
-	    new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
+	  mode = V4DImode;
+	  gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
 	}
 
+      rtx new_src = src;
+      if (GET_MODE (src) != mode)
+	new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
+
       for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
 	   byte += UNITS_PER_WORD, index++)
 	{
 	  d = loongarch_subword_at_byte (dest, byte);
-	  if (!TARGET_64BIT)
-	    emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
-	  else
-	    emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
+	  emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
 	}
     }
   else
     {
+      /* This part of the code is designed to handle the following situations:
+	 (set (reg:V2DI 4 $r4)
+	      (reg:V2DI 6 $r6))
+	 The trigger test case is lsx-mov-1.c.  */
+      rtx low_dest, low_src;
+
       low_dest = loongarch_subword_at_byte (dest, 0);
       low_src = loongarch_subword_at_byte (src, 0);
       gcc_assert (REG_P (low_dest) && REG_P (low_src));
       /* Make sure the source register is not written before reading.  */
       if (REGNO (low_dest) <= REGNO (low_src))
 	{
-	  for (byte = 0; byte < GET_MODE_SIZE (TImode);
+	  for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
 	       byte += UNITS_PER_WORD)
 	    {
 	      d = loongarch_subword_at_byte (dest, byte);
@@ -4815,8 +4706,8 @@  loongarch_split_256bit_move (rtx dest, rtx src)
 	}
       else
 	{
-	  for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
-	       byte -= UNITS_PER_WORD)
+	  for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
+	       byte >= 0; byte -= UNITS_PER_WORD)
 	    {
 	      d = loongarch_subword_at_byte (dest, byte);
 	      s = loongarch_subword_at_byte (src, byte);
@@ -7606,7 +7497,7 @@  loongarch_global_init (void)
 
   /* Function to allocate machine-dependent function status.  */
   init_machine_status = &loongarch_init_machine_status;
-};
+}
 
 static void
 loongarch_reg_init (void)
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 21890a2d94b..459ad30b9bb 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2406,6 +2406,7 @@  (define_insn "*movdf_softfloat"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
 	(match_operand:DF 1 "move_operand" "rG,m,rG"))]
   "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT)
+   && TARGET_64BIT
    && (register_operand (operands[0], DFmode)
        || reg_or_0_operand (operands[1], DFmode))"
   { return loongarch_output_move (operands[0], operands[1]); }