LoongArch: Organize the code related to split move and merge the same functions.
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
Commit Message
gcc/ChangeLog:
* config/loongarch/loongarch-protos.h
(loongarch_split_128bit_move): Delete.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_vector_move): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_vector_costs::finish_cost): Adjust the code
formatting.
(loongarch_split_vector_move_p): Merge
loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
(loongarch_split_move_p): Merge code.
(loongarch_split_move): Likewise.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_128bit_move): Delete.
(loongarch_split_vector_move): Merge loongarch_split_128bit_move
and loongarch_split_256bit_move.
(loongarch_split_256bit_move): Delete.
(loongarch_global_init): Remove the extra semicolon at the
end of the function.
* config/loongarch/loongarch.md (*movdf_softfloat): Added a new
condition TARGET_64BIT.
---
gcc/config/loongarch/loongarch-protos.h | 5 +-
gcc/config/loongarch/loongarch.cc | 221 ++++++------------------
gcc/config/loongarch/loongarch.md | 1 +
3 files changed, 58 insertions(+), 169 deletions(-)
Comments
Pushed to r15-2167.
在 2024/7/13 下午5:04, Lulu Cheng 写道:
> gcc/ChangeLog:
>
> * config/loongarch/loongarch-protos.h
> (loongarch_split_128bit_move): Delete.
> (loongarch_split_128bit_move_p): Delete.
> (loongarch_split_256bit_move): Delete.
> (loongarch_split_256bit_move_p): Delete.
> (loongarch_split_vector_move): Add a function declaration.
> * config/loongarch/loongarch.cc
> (loongarch_vector_costs::finish_cost): Adjust the code
> formatting.
> (loongarch_split_vector_move_p): Merge
> loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
> (loongarch_split_move_p): Merge code.
> (loongarch_split_move): Likewise.
> (loongarch_split_128bit_move_p): Delete.
> (loongarch_split_256bit_move_p): Delete.
> (loongarch_split_128bit_move): Delete.
> (loongarch_split_vector_move): Merge loongarch_split_128bit_move
> and loongarch_split_256bit_move.
> (loongarch_split_256bit_move): Delete.
> (loongarch_global_init): Remove the extra semicolon at the
> end of the function.
> * config/loongarch/loongarch.md (*movdf_softfloat): Added a new
> condition TARGET_64BIT.
> ---
> gcc/config/loongarch/loongarch-protos.h | 5 +-
> gcc/config/loongarch/loongarch.cc | 221 ++++++------------------
> gcc/config/loongarch/loongarch.md | 1 +
> 3 files changed, 58 insertions(+), 169 deletions(-)
>
> diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
> index e238d795a73..85f6e894399 100644
> --- a/gcc/config/loongarch/loongarch-protos.h
> +++ b/gcc/config/loongarch/loongarch-protos.h
> @@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
> extern void loongarch_split_move (rtx, rtx);
> extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
> extern void loongarch_split_plus_constant (rtx *, machine_mode);
> -extern void loongarch_split_128bit_move (rtx, rtx);
> -extern bool loongarch_split_128bit_move_p (rtx, rtx);
> -extern void loongarch_split_256bit_move (rtx, rtx);
> -extern bool loongarch_split_256bit_move_p (rtx, rtx);
> +extern void loongarch_split_vector_move (rtx, rtx);
> extern const char *loongarch_output_move (rtx, rtx);
> #ifdef RTX_CODE
> extern void loongarch_expand_scc (rtx *);
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8eb47ff95c3..c7a02103ef5 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4354,10 +4354,10 @@ void
> loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
> {
> loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
> +
> if (loop_vinfo)
> - {
> - m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
> - }
> + m_suggested_unroll_factor
> + = determine_suggested_unroll_factor (loop_vinfo);
>
> vector_costs::finish_cost (scalar_costs);
> }
> @@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
> return simplify_gen_subreg (word_mode, op, mode, byte);
> }
>
> +static bool loongarch_split_vector_move_p (rtx dest, rtx src);
> /* Return true if a move from SRC to DEST should be split into two.
> SPLIT_TYPE describes the split condition. */
>
> @@ -4444,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)
> return false;
> }
>
> - /* Check if LSX moves need splitting. */
> - if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
> - return loongarch_split_128bit_move_p (dest, src);
>
> - /* Check if LASX moves need splitting. */
> - if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> - return loongarch_split_256bit_move_p (dest, src);
> + /* Check if vector moves need splitting. */
> + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
> + || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> + return loongarch_split_vector_move_p (dest, src);
>
> /* Otherwise split all multiword moves. */
> return size > UNITS_PER_WORD;
> @@ -4463,10 +4462,9 @@ void
> loongarch_split_move (rtx dest, rtx src)
> {
> gcc_checking_assert (loongarch_split_move_p (dest, src));
> - if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
> - loongarch_split_128bit_move (dest, src);
> - else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> - loongarch_split_256bit_move (dest, src);
> + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
> + || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
> + loongarch_split_vector_move (dest, src);
> else
> gcc_unreachable ();
> }
> @@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
>
> return insn[ldr][index-2];
> }
> -/* Return true if a 128-bit move from SRC to DEST should be split. */
> -
> -bool
> -loongarch_split_128bit_move_p (rtx dest, rtx src)
> -{
> - /* LSX-to-LSX moves can be done in a single instruction. */
> - if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
> - return false;
> -
> - /* Check for LSX loads and stores. */
> - if (FP_REG_RTX_P (dest) && MEM_P (src))
> - return false;
> - if (FP_REG_RTX_P (src) && MEM_P (dest))
> - return false;
> -
> - /* Check for LSX set to an immediate const vector with valid replicated
> - element. */
> - if (FP_REG_RTX_P (dest)
> - && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
> - return false;
> -
> - /* Check for LSX load zero immediate. */
> - if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
> - return false;
> -
> - return true;
> -}
> -
> -/* Return true if a 256-bit move from SRC to DEST should be split. */
> +/* Return true if a vector move from SRC to DEST should be split. */
>
> -bool
> -loongarch_split_256bit_move_p (rtx dest, rtx src)
> +static bool
> +loongarch_split_vector_move_p (rtx dest, rtx src)
> {
> - /* LSX-to-LSX moves can be done in a single instruction. */
> + /* Vector moves can be done in a single instruction. */
> if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
> return false;
>
> - /* Check for LSX loads and stores. */
> + /* Check for vector loads and stores. */
> if (FP_REG_RTX_P (dest) && MEM_P (src))
> return false;
> if (FP_REG_RTX_P (src) && MEM_P (dest))
> return false;
>
> - /* Check for LSX set to an immediate const vector with valid replicated
> + /* Check for vector set to an immediate const vector with valid replicated
> element. */
> if (FP_REG_RTX_P (dest)
> && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
> return false;
>
> - /* Check for LSX load zero immediate. */
> + /* Check for vector load zero immediate. */
> if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
> return false;
>
> return true;
> }
>
> -/* Split a 128-bit move from SRC to DEST. */
> +/* Split a vector move from SRC to DEST. */
>
> void
> -loongarch_split_128bit_move (rtx dest, rtx src)
> +loongarch_split_vector_move (rtx dest, rtx src)
> {
> int byte, index;
> - rtx low_dest, low_src, d, s;
> + rtx s, d;
> + machine_mode mode = GET_MODE (dest);
> + bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
>
> if (FP_REG_RTX_P (dest))
> {
> gcc_assert (!MEM_P (src));
>
> - rtx new_dest = dest;
> - if (!TARGET_64BIT)
> - {
> - if (GET_MODE (dest) != V4SImode)
> - new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
> - }
> - else
> - {
> - if (GET_MODE (dest) != V2DImode)
> - new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
> - }
> -
> - for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
> - byte += UNITS_PER_WORD, index++)
> - {
> - s = loongarch_subword_at_byte (src, byte);
> - if (!TARGET_64BIT)
> - emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
> - GEN_INT (1 << index)));
> - else
> - emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
> - GEN_INT (1 << index)));
> - }
> - }
> - else if (FP_REG_RTX_P (src))
> - {
> - gcc_assert (!MEM_P (dest));
> -
> - rtx new_src = src;
> - if (!TARGET_64BIT)
> - {
> - if (GET_MODE (src) != V4SImode)
> - new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
> - }
> - else
> - {
> - if (GET_MODE (src) != V2DImode)
> - new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
> - }
> + rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
>
> - for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
> - byte += UNITS_PER_WORD, index++)
> - {
> - d = loongarch_subword_at_byte (dest, byte);
> - if (!TARGET_64BIT)
> - emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
> - else
> - emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
> - }
> - }
> - else
> - {
> - low_dest = loongarch_subword_at_byte (dest, 0);
> - low_src = loongarch_subword_at_byte (src, 0);
> - gcc_assert (REG_P (low_dest) && REG_P (low_src));
> - /* Make sure the source register is not written before reading. */
> - if (REGNO (low_dest) <= REGNO (low_src))
> + if (lsx_p)
> {
> - for (byte = 0; byte < GET_MODE_SIZE (TImode);
> - byte += UNITS_PER_WORD)
> - {
> - d = loongarch_subword_at_byte (dest, byte);
> - s = loongarch_subword_at_byte (src, byte);
> - loongarch_emit_move (d, s);
> - }
> + mode = V2DImode;
> + gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
> }
> else
> {
> - for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
> - byte -= UNITS_PER_WORD)
> - {
> - d = loongarch_subword_at_byte (dest, byte);
> - s = loongarch_subword_at_byte (src, byte);
> - loongarch_emit_move (d, s);
> - }
> + mode = V4DImode;
> + gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
> }
> - }
> -}
> -
> -/* Split a 256-bit move from SRC to DEST. */
> -
> -void
> -loongarch_split_256bit_move (rtx dest, rtx src)
> -{
> - int byte, index;
> - rtx low_dest, low_src, d, s;
> -
> - if (FP_REG_RTX_P (dest))
> - {
> - gcc_assert (!MEM_P (src));
>
> rtx new_dest = dest;
> - if (!TARGET_64BIT)
> - {
> - if (GET_MODE (dest) != V8SImode)
> - new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
> - }
> - else
> - {
> - if (GET_MODE (dest) != V4DImode)
> - new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
> - }
> +
> + if (GET_MODE (dest) != mode)
> + new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
>
> for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
> byte += UNITS_PER_WORD, index++)
> {
> s = loongarch_subword_at_byte (src, byte);
> - if (!TARGET_64BIT)
> - emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
> - GEN_INT (1 << index)));
> - else
> - emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
> - GEN_INT (1 << index)));
> + emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
> + GEN_INT (1 << index)));
> }
> }
> else if (FP_REG_RTX_P (src))
> {
> gcc_assert (!MEM_P (dest));
>
> - rtx new_src = src;
> - if (!TARGET_64BIT)
> + rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
> +
> + if (lsx_p)
> {
> - if (GET_MODE (src) != V8SImode)
> - new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
> + mode = V2DImode;
> + gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
> }
> else
> {
> - if (GET_MODE (src) != V4DImode)
> - new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
> + mode = V4DImode;
> + gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
> }
>
> + rtx new_src = src;
> + if (GET_MODE (src) != mode)
> + new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
> +
> for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
> byte += UNITS_PER_WORD, index++)
> {
> d = loongarch_subword_at_byte (dest, byte);
> - if (!TARGET_64BIT)
> - emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
> - else
> - emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
> + emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
> }
> }
> else
> {
> + /* This part of the code is designed to handle the following situations:
> + (set (reg:V2DI 4 $r4)
> + (reg:V2DI 6 $r6))
> + The trigger test case is lsx-mov-1.c. */
> + rtx low_dest, low_src;
> +
> low_dest = loongarch_subword_at_byte (dest, 0);
> low_src = loongarch_subword_at_byte (src, 0);
> gcc_assert (REG_P (low_dest) && REG_P (low_src));
> /* Make sure the source register is not written before reading. */
> if (REGNO (low_dest) <= REGNO (low_src))
> {
> - for (byte = 0; byte < GET_MODE_SIZE (TImode);
> + for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
> byte += UNITS_PER_WORD)
> {
> d = loongarch_subword_at_byte (dest, byte);
> @@ -4815,8 +4706,8 @@ loongarch_split_256bit_move (rtx dest, rtx src)
> }
> else
> {
> - for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
> - byte -= UNITS_PER_WORD)
> + for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
> + byte >= 0; byte -= UNITS_PER_WORD)
> {
> d = loongarch_subword_at_byte (dest, byte);
> s = loongarch_subword_at_byte (src, byte);
> @@ -7606,7 +7497,7 @@ loongarch_global_init (void)
>
> /* Function to allocate machine-dependent function status. */
> init_machine_status = &loongarch_init_machine_status;
> -};
> +}
>
> static void
> loongarch_reg_init (void)
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 21890a2d94b..459ad30b9bb 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -2406,6 +2406,7 @@ (define_insn "*movdf_softfloat"
> [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
> (match_operand:DF 1 "move_operand" "rG,m,rG"))]
> "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT)
> + && TARGET_64BIT
> && (register_operand (operands[0], DFmode)
> || reg_or_0_operand (operands[1], DFmode))"
> { return loongarch_output_move (operands[0], operands[1]); }
@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
extern void loongarch_split_move (rtx, rtx);
extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
extern void loongarch_split_plus_constant (rtx *, machine_mode);
-extern void loongarch_split_128bit_move (rtx, rtx);
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
-extern void loongarch_split_256bit_move (rtx, rtx);
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
+extern void loongarch_split_vector_move (rtx, rtx);
extern const char *loongarch_output_move (rtx, rtx);
#ifdef RTX_CODE
extern void loongarch_expand_scc (rtx *);
@@ -4354,10 +4354,10 @@ void
loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
{
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+
if (loop_vinfo)
- {
- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
- }
+ m_suggested_unroll_factor
+ = determine_suggested_unroll_factor (loop_vinfo);
vector_costs::finish_cost (scalar_costs);
}
@@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
return simplify_gen_subreg (word_mode, op, mode, byte);
}
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);
/* Return true if a move from SRC to DEST should be split into two.
SPLIT_TYPE describes the split condition. */
@@ -4444,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)
return false;
}
- /* Check if LSX moves need splitting. */
- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
- return loongarch_split_128bit_move_p (dest, src);
- /* Check if LASX moves need splitting. */
- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
- return loongarch_split_256bit_move_p (dest, src);
+ /* Check if vector moves need splitting. */
+ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+ || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+ return loongarch_split_vector_move_p (dest, src);
/* Otherwise split all multiword moves. */
return size > UNITS_PER_WORD;
@@ -4463,10 +4462,9 @@ void
loongarch_split_move (rtx dest, rtx src)
{
gcc_checking_assert (loongarch_split_move_p (dest, src));
- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
- loongarch_split_128bit_move (dest, src);
- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
- loongarch_split_256bit_move (dest, src);
+ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+ || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+ loongarch_split_vector_move (dest, src);
else
gcc_unreachable ();
}
@@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
return insn[ldr][index-2];
}
-/* Return true if a 128-bit move from SRC to DEST should be split. */
-
-bool
-loongarch_split_128bit_move_p (rtx dest, rtx src)
-{
- /* LSX-to-LSX moves can be done in a single instruction. */
- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
- return false;
-
- /* Check for LSX loads and stores. */
- if (FP_REG_RTX_P (dest) && MEM_P (src))
- return false;
- if (FP_REG_RTX_P (src) && MEM_P (dest))
- return false;
-
- /* Check for LSX set to an immediate const vector with valid replicated
- element. */
- if (FP_REG_RTX_P (dest)
- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
- return false;
-
- /* Check for LSX load zero immediate. */
- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
- return false;
-
- return true;
-}
-
-/* Return true if a 256-bit move from SRC to DEST should be split. */
+/* Return true if a vector move from SRC to DEST should be split. */
-bool
-loongarch_split_256bit_move_p (rtx dest, rtx src)
+static bool
+loongarch_split_vector_move_p (rtx dest, rtx src)
{
- /* LSX-to-LSX moves can be done in a single instruction. */
+ /* Vector moves can be done in a single instruction. */
if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
return false;
- /* Check for LSX loads and stores. */
+ /* Check for vector loads and stores. */
if (FP_REG_RTX_P (dest) && MEM_P (src))
return false;
if (FP_REG_RTX_P (src) && MEM_P (dest))
return false;
- /* Check for LSX set to an immediate const vector with valid replicated
+ /* Check for vector set to an immediate const vector with valid replicated
element. */
if (FP_REG_RTX_P (dest)
&& loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
return false;
- /* Check for LSX load zero immediate. */
+ /* Check for vector load zero immediate. */
if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
return false;
return true;
}
-/* Split a 128-bit move from SRC to DEST. */
+/* Split a vector move from SRC to DEST. */
void
-loongarch_split_128bit_move (rtx dest, rtx src)
+loongarch_split_vector_move (rtx dest, rtx src)
{
int byte, index;
- rtx low_dest, low_src, d, s;
+ rtx s, d;
+ machine_mode mode = GET_MODE (dest);
+ bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
if (FP_REG_RTX_P (dest))
{
gcc_assert (!MEM_P (src));
- rtx new_dest = dest;
- if (!TARGET_64BIT)
- {
- if (GET_MODE (dest) != V4SImode)
- new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
- }
- else
- {
- if (GET_MODE (dest) != V2DImode)
- new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
- }
-
- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
- byte += UNITS_PER_WORD, index++)
- {
- s = loongarch_subword_at_byte (src, byte);
- if (!TARGET_64BIT)
- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
- GEN_INT (1 << index)));
- else
- emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
- GEN_INT (1 << index)));
- }
- }
- else if (FP_REG_RTX_P (src))
- {
- gcc_assert (!MEM_P (dest));
-
- rtx new_src = src;
- if (!TARGET_64BIT)
- {
- if (GET_MODE (src) != V4SImode)
- new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
- }
- else
- {
- if (GET_MODE (src) != V2DImode)
- new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
- }
+ rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
- byte += UNITS_PER_WORD, index++)
- {
- d = loongarch_subword_at_byte (dest, byte);
- if (!TARGET_64BIT)
- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
- else
- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
- }
- }
- else
- {
- low_dest = loongarch_subword_at_byte (dest, 0);
- low_src = loongarch_subword_at_byte (src, 0);
- gcc_assert (REG_P (low_dest) && REG_P (low_src));
- /* Make sure the source register is not written before reading. */
- if (REGNO (low_dest) <= REGNO (low_src))
+ if (lsx_p)
{
- for (byte = 0; byte < GET_MODE_SIZE (TImode);
- byte += UNITS_PER_WORD)
- {
- d = loongarch_subword_at_byte (dest, byte);
- s = loongarch_subword_at_byte (src, byte);
- loongarch_emit_move (d, s);
- }
+ mode = V2DImode;
+ gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
}
else
{
- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
- byte -= UNITS_PER_WORD)
- {
- d = loongarch_subword_at_byte (dest, byte);
- s = loongarch_subword_at_byte (src, byte);
- loongarch_emit_move (d, s);
- }
+ mode = V4DImode;
+ gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
}
- }
-}
-
-/* Split a 256-bit move from SRC to DEST. */
-
-void
-loongarch_split_256bit_move (rtx dest, rtx src)
-{
- int byte, index;
- rtx low_dest, low_src, d, s;
-
- if (FP_REG_RTX_P (dest))
- {
- gcc_assert (!MEM_P (src));
rtx new_dest = dest;
- if (!TARGET_64BIT)
- {
- if (GET_MODE (dest) != V8SImode)
- new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
- }
- else
- {
- if (GET_MODE (dest) != V4DImode)
- new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
- }
+
+ if (GET_MODE (dest) != mode)
+ new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
byte += UNITS_PER_WORD, index++)
{
s = loongarch_subword_at_byte (src, byte);
- if (!TARGET_64BIT)
- emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
- GEN_INT (1 << index)));
- else
- emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
- GEN_INT (1 << index)));
+ emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
+ GEN_INT (1 << index)));
}
}
else if (FP_REG_RTX_P (src))
{
gcc_assert (!MEM_P (dest));
- rtx new_src = src;
- if (!TARGET_64BIT)
+ rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
+
+ if (lsx_p)
{
- if (GET_MODE (src) != V8SImode)
- new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
+ mode = V2DImode;
+ gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
}
else
{
- if (GET_MODE (src) != V4DImode)
- new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
+ mode = V4DImode;
+ gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
}
+ rtx new_src = src;
+ if (GET_MODE (src) != mode)
+ new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
+
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
byte += UNITS_PER_WORD, index++)
{
d = loongarch_subword_at_byte (dest, byte);
- if (!TARGET_64BIT)
- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
- else
- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
+ emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
}
}
else
{
+ /* This part of the code is designed to handle the following situations:
+ (set (reg:V2DI 4 $r4)
+ (reg:V2DI 6 $r6))
+ The trigger test case is lsx-mov-1.c. */
+ rtx low_dest, low_src;
+
low_dest = loongarch_subword_at_byte (dest, 0);
low_src = loongarch_subword_at_byte (src, 0);
gcc_assert (REG_P (low_dest) && REG_P (low_src));
/* Make sure the source register is not written before reading. */
if (REGNO (low_dest) <= REGNO (low_src))
{
- for (byte = 0; byte < GET_MODE_SIZE (TImode);
+ for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
byte += UNITS_PER_WORD)
{
d = loongarch_subword_at_byte (dest, byte);
@@ -4815,8 +4706,8 @@ loongarch_split_256bit_move (rtx dest, rtx src)
}
else
{
- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
- byte -= UNITS_PER_WORD)
+ for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
+ byte >= 0; byte -= UNITS_PER_WORD)
{
d = loongarch_subword_at_byte (dest, byte);
s = loongarch_subword_at_byte (src, byte);
@@ -7606,7 +7497,7 @@ loongarch_global_init (void)
/* Function to allocate machine-dependent function status. */
init_machine_status = &loongarch_init_machine_status;
-};
+}
static void
loongarch_reg_init (void)
@@ -2406,6 +2406,7 @@ (define_insn "*movdf_softfloat"
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
(match_operand:DF 1 "move_operand" "rG,m,rG"))]
"(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT)
+ && TARGET_64BIT
&& (register_operand (operands[0], DFmode)
|| reg_or_0_operand (operands[1], DFmode))"
{ return loongarch_output_move (operands[0], operands[1]); }