[v5] RISC-V: Fix register overlap issue for some xtheadvector instructions

Message ID 20240110065111.1185-1-cooper.joshua@linux.alibaba.com
State Superseded
Delegated to: Juzhe Zhong
Headers
Series [v5] RISC-V: Fix register overlap issue for some xtheadvector instructions |

Checks

Context Check Description
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-lint warning Lint failed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-non-multilib fail Build failed
rivoscibot/toolchain-ci-rivos-build--linux-rv32gc_zba_zbb_zbc_zbs-ilp32d-non-multilib fail Build failed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-multilib fail Build failed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib fail Build failed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib fail Build failed
rivoscibot/toolchain-ci-rivos-test fail Testing failed

Commit Message

joshua Jan. 10, 2024, 6:51 a.m. UTC
  For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
and floating-point compare instructions, an illegal instruction
exception will be raised if the destination vector register overlaps
a source vector register group.

To handle this issue, we use "group_overlap" and "enabled" attribute
to disable some alternatives for xtheadvector.

gcc/ChangeLog:

	* config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87,W0):
	(none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled):
	Add group-overlap constraint for xtheadvector.
	* config/riscv/vector.md: 
	Disable alternatives that destination register overlaps
	source register group for xtheadvector.

Co-authored-by: Jin Ma <jinma@linux.alibaba.com>
Co-authored-by: Xianmiao Qu <cooper.qu@linux.alibaba.com>
Co-authored-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 gcc/config/riscv/riscv.md  |  12 +-
 gcc/config/riscv/vector.md | 314 +++++++++++++++++++++----------------
 2 files changed, 190 insertions(+), 136 deletions(-)
  

Comments

juzhe.zhong@rivai.ai Jan. 10, 2024, 6:53 a.m. UTC | #1
LGTM from myside. Give another a few more days that some one want to chime in.



juzhe.zhong@rivai.ai
 
From: Jun Sha (Joshua)
Date: 2024-01-10 14:51
To: gcc-patches
CC: jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; christoph.muellner; juzhe.zhong; Jun Sha (Joshua); Jin Ma; Xianmiao Qu
Subject: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions
For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
and floating-point compare instructions, an illegal instruction
exception will be raised if the destination vector register overlaps
a source vector register group.
 
To handle this issue, we use "group_overlap" and "enabled" attribute
to disable some alternatives for xtheadvector.
 
gcc/ChangeLog:
 
* config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87,W0):
(none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled):
Add group-overlap constraint for xtheadvector.
* config/riscv/vector.md: 
Disable alternatives that destination register overlaps
source register group for xtheadvector.
 
Co-authored-by: Jin Ma <jinma@linux.alibaba.com>
Co-authored-by: Xianmiao Qu <cooper.qu@linux.alibaba.com>
Co-authored-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
gcc/config/riscv/riscv.md  |  12 +-
gcc/config/riscv/vector.md | 314 +++++++++++++++++++++----------------
2 files changed, 190 insertions(+), 136 deletions(-)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 84212430dc0..411d1d17391 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -537,7 +537,7 @@
;; Widening instructions have group-overlap constraints.  Those are only
;; valid for certain register-group sizes.  This attribute marks the
;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
   (const_string "none"))
(define_attr "group_overlap_valid" "no,yes"
@@ -576,7 +576,15 @@
          (and (eq_attr "group_overlap" "W0")
      (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) > 1"))
(const_string "no")
-        ]
+
+ (and (eq_attr "group_overlap" "thv_disabled")
+       (match_test "TARGET_XTHEADVECTOR"))
+ (const_string "no")
+
+ (and (eq_attr "group_overlap" "rvv_disabled")
+       (match_test "TARGET_VECTOR && !TARGET_XTHEADVECTOR"))
+ (const_string "no")
+ ]
        (const_string "yes")))
;; Attribute to control enable or disable instructions.
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3eb6daafbc2..4748ddd34a2 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3260,7 +3260,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none,none")])
(define_insn "@pred_msbc<mode>"
   [(set (match_operand:<VM> 0 "register_operand"        "=vr, vr, &vr")
@@ -3279,7 +3280,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,none")])
(define_insn "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3299,7 +3301,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3319,7 +3322,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_expand "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3368,7 +3372,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "*pred_madc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3389,7 +3394,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_expand "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3438,7 +3444,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "*pred_msbc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"              "=vr, &vr")
@@ -3459,7 +3466,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "@pred_madc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr, &vr")
@@ -3477,7 +3485,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none,none")])
(define_insn "@pred_msbc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, vr, &vr, &vr")
@@ -3495,7 +3504,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,none,none")])
(define_insn "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3514,7 +3524,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3533,7 +3544,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_expand "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3580,7 +3592,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "*pred_madc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3600,7 +3613,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_expand "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3647,7 +3661,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
(define_insn "*pred_msbc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3667,7 +3682,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
;; -------------------------------------------------------------------------------
;; ---- Predicated integer unary operations
@@ -3987,7 +4003,8 @@
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,thv_disabled,none,none,none,thv_disabled,none,none")])
(define_insn "@pred_narrow_<optab><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
@@ -4008,7 +4025,8 @@
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
;; vncvt.x.x.w
(define_insn "@pred_trunc<mode>"
@@ -4032,7 +4050,8 @@
    (set_attr "vl_op_idx" "4")
    (set (attr "ta") (symbol_ref "riscv_vector::get_ta(operands[5])"))
    (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
-   (set (attr "avl_type_idx") (const_int 7))])
+   (set (attr "avl_type_idx") (const_int 7))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
;; -------------------------------------------------------------------------------
;; ---- Predicated fixed-point operations
@@ -4438,7 +4457,8 @@
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none,thv_disabled,thv_disabled,none,none")])
(define_insn "@pred_narrow_clip<v_su><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
@@ -4460,7 +4480,8 @@
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
;; -------------------------------------------------------------------------------
;; ---- Predicated integer comparison operations
@@ -4511,23 +4532,24 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr,   &vr,   &vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "comparison_except_ltge_operator"
-      [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr")
-       (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr,   vi,   vi")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0")))]
+      [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr,   vr,   vr,   vr,   vr")
+       (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr,   vi,   vi,   vr,   vr,   vi,   vi")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,rvv_disabled,rvv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_narrow"
@@ -4547,7 +4569,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
(define_expand "@pred_ltge<mode>"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4591,23 +4614,24 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_ltge<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr,   &vr,   &vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "ltge_operator"
-      [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr")
-       (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr,   vj,   vj")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0")))]
+      [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr,   vr,   vr,   vr,   vr")
+       (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr,   vj,   vj,   vr,   vr,   vj,   vj")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0,    vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,rvv_disabled,rvv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_ltge<mode>_narrow"
@@ -4627,7 +4651,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
(define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4673,24 +4698,25 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-      [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr")
+      [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr,   vr,   vr")
      (vec_duplicate:V_VLSI_QHS
-         (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4711,7 +4737,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
(define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4757,24 +4784,25 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "equality_operator"
     [(vec_duplicate:V_VLSI_QHS
-         (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-       (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))
+       (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr,   vr,   vr")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -4795,7 +4823,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
;; we need to deal with SEW = 64 in RV32 system.
@@ -4922,24 +4951,25 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-      [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr")
+      [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr,   vr,   vr")
      (vec_duplicate:V_VLSI_D
-         (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4960,28 +4990,30 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "equality_operator"
     [(vec_duplicate:V_VLSI_D
-         (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-       (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))
+       (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr,   vr,   vr")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -5002,7 +5034,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
(define_insn "*pred_cmp<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -5031,25 +5064,26 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"          "   rK,   rK")
-      (match_operand 7 "const_int_operand"              "    i,    i")
-      (match_operand 8 "const_int_operand"              "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"              "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"              "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-      [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr")
+      [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr,   vr,   vr")
      (vec_duplicate:V_VLSI_D
        (sign_extend:<VEL>
-           (match_operand:<VSUBEL> 5 "register_operand" "    r,    r")))])
-   (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+           (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    r,    r")))])
+   (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
(define_insn "*pred_cmp<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm,   vr,   vr,  &vr,  &vr")
@@ -5070,7 +5104,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
(define_insn "*pred_eqne<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm")
@@ -5099,25 +5134,26 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"          "   rK,   rK")
-      (match_operand 7 "const_int_operand"              "    i,    i")
-      (match_operand 8 "const_int_operand"              "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"              "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"              "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "equality_operator"
     [(vec_duplicate:V_VLSI_D
        (sign_extend:<VEL>
-           (match_operand:<VSUBEL> 5 "register_operand" "    r,    r")))
-       (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr")])
-   (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+           (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    r,    r")))
+       (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr,   vr,   vr")])
+   (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
(define_insn "*pred_eqne<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                "=vm,   vr,   vr,  &vr,  &vr")
@@ -5138,7 +5174,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
;; GE, vmsge.vx/vmsgeu.vx
;;
@@ -7327,23 +7364,24 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "signed_order_operator"
-      [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
-       (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+      [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")
+       (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr,   vr,   vr")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
(define_insn "*pred_cmp<mode>_narrow_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -7386,7 +7424,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
(define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7432,24 +7471,25 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "signed_order_operator"
-      [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
+      [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")
      (vec_duplicate:V_VLSF
-         (match_operand:<VEL> 5 "register_operand"     "    f,    f"))])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    f,    f,    f,    f"))])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -7470,7 +7510,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
(define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7516,24 +7557,25 @@
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
(if_then_else:<VM>
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-      (match_operand 6 "vector_length_operand"         "   rK,   rK")
-      (match_operand 7 "const_int_operand"             "    i,    i")
-      (match_operand 8 "const_int_operand"             "    i,    i")
+     [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+      (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+      (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+      (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_operator:<VM> 3 "equality_operator"
     [(vec_duplicate:V_VLSF
-         (match_operand:<VEL> 5 "register_operand"     "    f,    f"))
-       (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")])
-   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+         (match_operand:<VEL> 5 "register_operand"     "    f,    f,    f,    f"))
+       (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")])
+   (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -7554,7 +7596,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
;; -------------------------------------------------------------------------------
;; ---- Predicated floating-point merge
@@ -7774,7 +7817,8 @@
   [(set_attr "type" "vfncvtftoi")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
- (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+ (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
(define_insn "@pred_narrow_<fix_cvt><mode>"
   [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vd, vr, vr,  &vr,  &vr")
@@ -7816,7 +7860,8 @@
   [(set_attr "type" "vfncvtitof")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
- (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+ (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
(define_insn "@pred_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
@@ -7839,7 +7884,8 @@
   [(set_attr "type" "vfncvtftof")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode")
- (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+ (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
(define_insn "@pred_rod_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
-- 
2.17.1
  
Robin Dapp Jan. 10, 2024, 1:36 p.m. UTC | #2
Hi Joshua,

> For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
> and floating-point compare instructions, an illegal instruction
> exception will be raised if the destination vector register overlaps
> a source vector register group.
> 
> To handle this issue, we use "group_overlap" and "enabled" attribute
> to disable some alternatives for xtheadvector.

>  ;; Widening instructions have group-overlap constraints.  Those are only
>  ;; valid for certain register-group sizes.  This attribute marks the
>  ;; alternatives not matching the required register-group size as disabled.
> -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
> +(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
>    (const_string "none"))

I realize there have been some discussions before but I find the naming
misleading.  The group_overlap attribute is supposed to specify whether
groups overlap (and mark the respective alternatives accepting
only this overlap).
Then we check if the groups overlap and disable all non-matching
alternatives.  "none" i.e. "no overlap" always matches.

Your first goal seems to be to disable existing non-early-clobber
alternatives for thv.  For this, maybe "full", "same" (or "any"?) would
work?  Please also add a comment in group_overlap_valid then that we
need not actually check for register equality.

For the other insns, I wonder if we could get away with not really
disabling the newly added early-clobber alternatives for RVV but
just disparaging ("?") them?  That way we could re-use "full" for
the thv-disabled alternatives and "none" for the newly added ones.
("none" will still be misleading then, though :/)

If this doesn't work or others feel the separation is not strict
enough, I'd prefer a separate attribute rather than overloading
group_overlap.  Maybe something like "spec_restriction" or similar
with two values "rvv" and "thv"?

Regards
 Robin
  
juzhe.zhong@rivai.ai Jan. 10, 2024, 1:43 p.m. UTC | #3
>> For the other insns, I wonder if we could get away with not really
>>disabling the newly added early-clobber alternatives for RVV but
>>just disparaging ("?") them?  That way we could re-use "full" for
>>the thv-disabled alternatives and "none" for the newly added ones.
>>("none" will still be misleading then, though :/)

I prefer to disable those early-clobber alternatives added of theadvector for RVV,
since disparage still make RA possible reaches the early clobber alternatives.

>>If this doesn't work or others feel the separation is not strict
>>enough, I'd prefer a separate attribute rather than overloading
>>group_overlap.  Maybe something like "spec_restriction" or similar
>>with two values "rvv" and "thv"?

I like this idea, it makes more sense to me. So I think it's better to add an attribute to
disable alternative for theadvector or RVV1.0.



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2024-01-10 21:36
To: Jun Sha (Joshua); gcc-patches
CC: rdapp.gcc; jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; christoph.muellner; juzhe.zhong; Jin Ma; Xianmiao Qu
Subject: Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions
Hi Joshua,
 
> For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
> and floating-point compare instructions, an illegal instruction
> exception will be raised if the destination vector register overlaps
> a source vector register group.
> 
> To handle this issue, we use "group_overlap" and "enabled" attribute
> to disable some alternatives for xtheadvector.
 
>  ;; Widening instructions have group-overlap constraints.  Those are only
>  ;; valid for certain register-group sizes.  This attribute marks the
>  ;; alternatives not matching the required register-group size as disabled.
> -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
> +(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
>    (const_string "none"))
 
I realize there have been some discussions before but I find the naming
misleading.  The group_overlap attribute is supposed to specify whether
groups overlap (and mark the respective alternatives accepting
only this overlap).
Then we check if the groups overlap and disable all non-matching
alternatives.  "none" i.e. "no overlap" always matches.
 
Your first goal seems to be to disable existing non-early-clobber
alternatives for thv.  For this, maybe "full", "same" (or "any"?) would
work?  Please also add a comment in group_overlap_valid then that we
need not actually check for register equality.
 
For the other insns, I wonder if we could get away with not really
disabling the newly added early-clobber alternatives for RVV but
just disparaging ("?") them?  That way we could re-use "full" for
the thv-disabled alternatives and "none" for the newly added ones.
("none" will still be misleading then, though :/)
 
If this doesn't work or others feel the separation is not strict
enough, I'd prefer a separate attribute rather than overloading
group_overlap.  Maybe something like "spec_restriction" or similar
with two values "rvv" and "thv"?
 
Regards
Robin
  
joshua Jan. 11, 2024, 2:40 a.m. UTC | #4
Hi Robin,

Thank you for your suggestions!
The patch has been updated by adding a new attribute to
disable alternative for xtheadvector or RVV1.0 instead of
overlaoding group_overlap.

Joshua






------------------------------------------------------------------
发件人:钟居哲 <juzhe.zhong@rivai.ai>
发送时间:2024年1月10日(星期三) 21:43
收件人:"rdapp.gcc"<rdapp.gcc@gmail.com>; "cooper.joshua"<cooper.joshua@linux.alibaba.com>; "gcc-patches"<gcc-patches@gcc.gnu.org>
抄 送:"rdapp.gcc"<rdapp.gcc@gmail.com>; "jim.wilson.gcc"<jim.wilson.gcc@gmail.com>; palmer<palmer@dabbelt.com>; andrew<andrew@sifive.com>; "philipp.tomsich"<philipp.tomsich@vrull.eu>; Jeff Law<jeffreyalaw@gmail.com>; "Christoph Müllner"<christoph.muellner@vrull.eu>; jinma<jinma@linux.alibaba.com>; Cooper Qu<cooper.qu@linux.alibaba.com>
主 题:Re: Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions


>> For the other insns, I wonder if we could get away with not really
>>disabling the newly added early-clobber alternatives for RVV but
>>just disparaging ("?") them?  That way we could re-use "full" for
>>the thv-disabled alternatives and "none" for the newly added ones.
>>("none" will still be misleading then, though :/)



I prefer to disable those early-clobber alternatives added of theadvector for RVV,
since disparage still make RA possible reaches the early clobber alternatives.


>>If this doesn't work or others feel the separation is not strict
>>enough, I'd prefer a separate attribute rather than overloading
>>group_overlap.  Maybe something like "spec_restriction" or similar
>>with two values "rvv" and "thv"?



I like this idea, it makes more sense to me. So I think it's better to add an attribute to
disable alternative for theadvector or RVV1.0.


juzhe.zhong@rivai.ai

 
From: Robin Dapp
Date: 2024-01-10 21:36
To: Jun Sha (Joshua); gcc-patches
CC: rdapp.gcc; jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; christoph.muellner; juzhe.zhong; Jin Ma; Xianmiao Qu
Subject: Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions

Hi Joshua,
 
> For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
> and floating-point compare instructions, an illegal instruction
> exception will be raised if the destination vector register overlaps
> a source vector register group.
> 
> To handle this issue, we use "group_overlap" and "enabled" attribute
> to disable some alternatives for xtheadvector.
 
>  ;; Widening instructions have group-overlap constraints.  Those are only
>  ;; valid for certain register-group sizes.  This attribute marks the
>  ;; alternatives not matching the required register-group size as disabled.
> -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
> +(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
>    (const_string "none"))
 
I realize there have been some discussions before but I find the naming
misleading.  The group_overlap attribute is supposed to specify whether
groups overlap (and mark the respective alternatives accepting
only this overlap).
Then we check if the groups overlap and disable all non-matching
alternatives.  "none" i.e. "no overlap" always matches.
 
Your first goal seems to be to disable existing non-early-clobber
alternatives for thv.  For this, maybe "full", "same" (or "any"?) would
work?  Please also add a comment in group_overlap_valid then that we
need not actually check for register equality.
 
For the other insns, I wonder if we could get away with not really
disabling the newly added early-clobber alternatives for RVV but
just disparaging ("?") them?  That way we could re-use "full" for
the thv-disabled alternatives and "none" for the newly added ones.
("none" will still be misleading then, though :/)
 
If this doesn't work or others feel the separation is not strict
enough, I'd prefer a separate attribute rather than overloading
group_overlap.  Maybe something like "spec_restriction" or similar
with two values "rvv" and "thv"?
 
Regards
 Robin
  

Patch

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 84212430dc0..411d1d17391 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -537,7 +537,7 @@ 
 ;; Widening instructions have group-overlap constraints.  Those are only
 ;; valid for certain register-group sizes.  This attribute marks the
 ;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
   (const_string "none"))
 
 (define_attr "group_overlap_valid" "no,yes"
@@ -576,7 +576,15 @@ 
          (and (eq_attr "group_overlap" "W0")
 	      (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) > 1"))
 	 (const_string "no")
-        ]
+
+	 (and (eq_attr "group_overlap" "thv_disabled")
+	      (match_test "TARGET_XTHEADVECTOR"))
+	 (const_string "no")
+
+	 (and (eq_attr "group_overlap" "rvv_disabled")
+	      (match_test "TARGET_VECTOR && !TARGET_XTHEADVECTOR"))
+	 (const_string "no")
+	]
        (const_string "yes")))
 
 ;; Attribute to control enable or disable instructions.
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3eb6daafbc2..4748ddd34a2 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3260,7 +3260,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none,none")])
 
 (define_insn "@pred_msbc<mode>"
   [(set (match_operand:<VM> 0 "register_operand"        "=vr, vr, &vr")
@@ -3279,7 +3280,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,none")])
 
 (define_insn "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3299,7 +3301,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3319,7 +3322,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_expand "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3368,7 +3372,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "*pred_madc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3389,7 +3394,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_expand "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3438,7 +3444,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "*pred_msbc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"              "=vr, &vr")
@@ -3459,7 +3466,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "@pred_madc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr, &vr")
@@ -3477,7 +3485,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none,none")])
 
 (define_insn "@pred_msbc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, vr, &vr, &vr")
@@ -3495,7 +3504,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,none,none")])
 
 (define_insn "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3514,7 +3524,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3533,7 +3544,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_expand "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3580,7 +3592,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "*pred_madc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3600,7 +3613,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_expand "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3647,7 +3661,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 (define_insn "*pred_msbc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3667,7 +3682,8 @@ 
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "thv_disabled,none")])
 
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated integer unary operations
@@ -3987,7 +4003,8 @@ 
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,thv_disabled,none,none,none,thv_disabled,none,none")])
 
 (define_insn "@pred_narrow_<optab><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
@@ -4008,7 +4025,8 @@ 
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
 
 ;; vncvt.x.x.w
 (define_insn "@pred_trunc<mode>"
@@ -4032,7 +4050,8 @@ 
    (set_attr "vl_op_idx" "4")
    (set (attr "ta") (symbol_ref "riscv_vector::get_ta(operands[5])"))
    (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
-   (set (attr "avl_type_idx") (const_int 7))])
+   (set (attr "avl_type_idx") (const_int 7))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
 
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated fixed-point operations
@@ -4438,7 +4457,8 @@ 
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "@pred_narrow_clip<v_su><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
@@ -4460,7 +4480,8 @@ 
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
 
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated integer comparison operations
@@ -4511,23 +4532,24 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr,   &vr,   &vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "comparison_except_ltge_operator"
-	     [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr")
-	      (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr,   vi,   vi")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0")))]
+	     [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr,   vr,   vr,   vr,   vr")
+	      (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr,   vi,   vi,   vr,   vr,   vi,   vi")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,rvv_disabled,rvv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_narrow"
@@ -4547,7 +4569,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
 
 (define_expand "@pred_ltge<mode>"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4591,23 +4614,24 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_ltge<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   vr,   vr,   &vr,   &vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "ltge_operator"
-	     [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr")
-	      (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr,   vj,   vj")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0")))]
+	     [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr,   vr,   vr,   vr,   vr,   vr,   vr")
+	      (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr,   vj,   vj,   vr,   vr,   vj,   vj")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   vu,    0,    vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,thv_disabled,thv_disabled,rvv_disabled,rvv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_ltge<mode>_narrow"
@@ -4627,7 +4651,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
 
 (define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4673,24 +4698,25 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-	     [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr")
+	     [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr,   vr,   vr")
 	      (vec_duplicate:V_VLSI_QHS
-	        (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4711,7 +4737,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 (define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4757,24 +4784,25 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "equality_operator"
 	     [(vec_duplicate:V_VLSI_QHS
-	        (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-	      (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))
+	      (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr,   vr,   vr")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -4795,7 +4823,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 ;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
 ;; we need to deal with SEW = 64 in RV32 system.
@@ -4922,24 +4951,25 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-	     [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr")
+	     [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr,   vr,   vr")
 	      (vec_duplicate:V_VLSI_D
-	        (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4960,28 +4990,30 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "equality_operator"
 	     [(vec_duplicate:V_VLSI_D
-	        (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-	      (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    r,    r,    r,    r"))
+	      (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr,   vr,   vr")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -5002,7 +5034,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "*pred_cmp<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -5031,25 +5064,26 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"          "   rK,   rK")
-	     (match_operand 7 "const_int_operand"              "    i,    i")
-	     (match_operand 8 "const_int_operand"              "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"              "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "comparison_except_eqge_operator"
-	     [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr")
+	     [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr,   vr,   vr")
 	      (vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-	          (match_operand:<VSUBEL> 5 "register_operand" "    r,    r")))])
-	  (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+	          (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    r,    r")))])
+	  (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 (define_insn "*pred_cmp<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm,   vr,   vr,  &vr,  &vr")
@@ -5070,7 +5104,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "*pred_eqne<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm")
@@ -5099,25 +5134,26 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"          "   rK,   rK")
-	     (match_operand 7 "const_int_operand"              "    i,    i")
-	     (match_operand 8 "const_int_operand"              "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"              "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "equality_operator"
 	     [(vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-	          (match_operand:<VSUBEL> 5 "register_operand" "    r,    r")))
-	      (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr")])
-	  (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+	          (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    r,    r")))
+	      (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   vr,   vr,   vr")])
+	  (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 (define_insn "*pred_eqne<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                "=vm,   vr,   vr,  &vr,  &vr")
@@ -5138,7 +5174,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && !TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 ;; GE, vmsge.vx/vmsgeu.vx
 ;;
@@ -7327,23 +7364,24 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "signed_order_operator"
-	     [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
-	      (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	     [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")
+	      (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr,   vr,   vr")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 (define_insn "*pred_cmp<mode>_narrow_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -7386,7 +7424,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,thv_disabled,none,none")])
 
 (define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7432,24 +7471,25 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "signed_order_operator"
-	     [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
+	     [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")
 	      (vec_duplicate:V_VLSF
-	        (match_operand:<VEL> 5 "register_operand"     "    f,    f"))])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    f,    f,    f,    f"))])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -7470,7 +7510,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 (define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7516,24 +7557,25 @@ 
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   &vr,   &vr")
 	(if_then_else:<VM>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
-	     (match_operand 8 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operator:<VM> 3 "equality_operator"
 	     [(vec_duplicate:V_VLSF
-	        (match_operand:<VEL> 5 "register_operand"     "    f,    f"))
-	      (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")])
-	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+	        (match_operand:<VEL> 5 "register_operand"     "    f,    f,    f,    f"))
+	      (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   vr,   vr")])
+	  (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "thv_disabled,thv_disabled,rvv_disabled,rvv_disabled")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -7554,7 +7596,8 @@ 
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,thv_disabled,thv_disabled,none,none")])
 
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated floating-point merge
@@ -7774,7 +7817,8 @@ 
   [(set_attr "type" "vfncvtftoi")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
-	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "@pred_narrow_<fix_cvt><mode>"
   [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vd, vr, vr,  &vr,  &vr")
@@ -7816,7 +7860,8 @@ 
   [(set_attr "type" "vfncvtitof")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
-	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "@pred_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
@@ -7839,7 +7884,8 @@ 
   [(set_attr "type" "vfncvtftof")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode")
-	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,thv_disabled,thv_disabled,none,none")])
 
 (define_insn "@pred_rod_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")