From patchwork Fri Dec  1 08:14:10 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>
X-Patchwork-Id: 81081
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 0A70E385B515
	for <patchwork@sourceware.org>; Fri,  1 Dec 2023 08:14:57 +0000 (GMT)
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from smtpbgau2.qq.com (smtpbgau2.qq.com [54.206.34.216])
 by sourceware.org (Postfix) with ESMTPS id 9DC453858C62
 for <gcc-patches@gcc.gnu.org>; Fri,  1 Dec 2023 08:14:33 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 9DC453858C62
Authentication-Results: sourceware.org;
 dmarc=none (p=none dis=none) header.from=rivai.ai
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 9DC453858C62
Authentication-Results: server2.sourceware.org;
 arc=none smtp.remote-ip=54.206.34.216
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1701418479; cv=none;
 b=B7jQ4M0TsxDjTZAyHEGRETvxiyp7cgHegAqbcfm1P03XOxX/xB1lVC03qBx3wj109DvotnI7RMRxrf1RX3KkJURVB5rpPRZaAjmnowbsL9Ids/lo0xgtkW2JOzb9OdX1Jc049pq6hSX/vt/83ElZsgXou7118J3bnEOqyAgS3Kk=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
 t=1701418479; c=relaxed/simple;
 bh=Z/iEpIFfWVGZS/f3nnl4Z+N/YUrtc/r2BNnCeOWp6H8=;
 h=From:To:Subject:Date:Message-Id:MIME-Version;
 b=G2np4GI1YQuGwHaHeANjth04fBOLHtcrHAkiX1vRSwJdWja58tr8UEYL44KlcJeA/aFnxukqbZS0OTAN3DDFxKXmsEFxFLbPq1ZpGkc5Y8Zzm8zPFRsJ2xohDEXcdIQF25SLft/frqfsZ2IcfWIl2d/L9k31NJnR4fIiCBa+Z58=
ARC-Authentication-Results: i=1; server2.sourceware.org
X-QQ-mid: bizesmtp72t1701418452t5oqo3vu
Received: from server1.localdomain ( [58.60.1.26])
 by bizesmtp.qq.com (ESMTP) with
 id ; Fri, 01 Dec 2023 16:14:11 +0800 (CST)
X-QQ-SSF: 01400000000000G0V000000A0000000
X-QQ-FEAT: CR3LFp2JE4lXYeiV3B0qOfzZHBi0hJBj7PZl2P7EHd0liRQv/KlLUlA91uADU
 ZfAqp+itUc6pctpmR6TY8cBGlgoWUE5C/oHzbHe8wds7WheRIrzBUTIL6X3x1pKkZ+KJoGJ
 Smt0WibeD+AUkwM96aXC4KCBH3DI/qqJdodV79ckFMBFKxpzNL3a9PhhXP2+1QZRaYzbQ+f
 mbeAt9r39ZOCz9/tn3RfpGztDL5SCkZemZk1gt3JrVmQaUzV76RBPyINvZYrEiEUuaetGAL
 gvPac3wyiYiNE8GobEPFy6JlcVn7XsDLBH3dcuqDZVVynMVuHriq6T1xQxZTQlGXUCSuXWd
 +GT9FK6FFSBKz+Auw4JMTHb6vEA04myqNpTD692tIrHaj5rIgn4WyaSLCR/5eGRZQrRJBJq
 ox9H4zdO+Sc=
X-QQ-GoodBg: 2
X-BIZMAIL-ID: 10164406415384922057
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com, kito.cheng@sifive.com, jeffreyalaw@gmail.com,
 rdapp.gcc@gmail.com, Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH] RISC-V: Support highpart overlap for indexed load with SRC
 EEW < DEST EEW
Date: Fri,  1 Dec 2023 16:14:10 +0800
Message-Id: <20231201081410.1441609-1-juzhe.zhong@rivai.ai>
X-Mailer: git-send-email 2.36.3
MIME-Version: 1.0
X-QQ-SENDSIZE: 520
Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0
X-Spam-Status: No, score=-10.3 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_SHORT, RCVD_IN_BARRACUDACENTRAL, RCVD_IN_DNSWL_NONE,
 RCVD_IN_MSPIKE_H2, SPF_HELO_PASS, SPF_PASS, TXREP,
 T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.30
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org

Leverage previous approach.

Before this patch:

.L5:
        add     a3,s0,s2
        add     a4,s6,s2
        add     a5,s7,s2
        vsetvli zero,s0,e64,m8,ta,ma
        vle8.v  v4,0(s2)
        vle8.v  v3,0(a3)
        mv      s2,s1
        vle8.v  v2,0(a4)
        vle8.v  v1,0(a5)
        nop
        vluxei8.v       v8,(s1),v4
        vs8r.v  v8,0(sp)              ---> spill
        vluxei8.v       v8,(s1),v3
        vluxei8.v       v16,(s1),v2
        vluxei8.v       v24,(s1),v1
        nop
        vmv.x.s a1,v8
        vl8re64.v       v8,0(sp)     ---> reload
        vmv.x.s a3,v24
        vmv.x.s a2,v16
        vmv.x.s a0,v8
        add     s1,s1,s5
        call    sumation
        add     s3,s3,a0
        bgeu    s4,s1,.L5

After this patch:


.L5:
	add	a3,s0,s2
	add	a4,s6,s2
	add	a5,s7,s2
	vsetvli	zero,s0,e64,m8,ta,ma
	vle8.v	v15,0(s2)
	vle8.v	v23,0(a3)
	mv	s2,s1
	vle8.v	v31,0(a4)
	vle8.v	v7,0(a5)
	vluxei8.v	v8,(s1),v15
	vluxei8.v	v16,(s1),v23
	vluxei8.v	v24,(s1),v31
	vluxei8.v	v0,(s1),v7
	vmv.x.s	a3,v0
	vmv.x.s	a2,v24
	vmv.x.s	a1,v16
	vmv.x.s	a0,v8
	add	s1,s1,s5
	call	sumation
	add	s3,s3,a0
	bgeu	s4,s1,.L5

	PR target/112431

gcc/ChangeLog:

	* config/riscv/vector.md: Support highpart overlap for indexed load.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr112431-28.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-29.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-30.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-31.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-32.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-33.c: New test.
---
 gcc/config/riscv/vector.md                    |  63 ++++++-----
 .../gcc.target/riscv/rvv/base/pr112431-28.c   | 104 ++++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-29.c   |  68 ++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-30.c   |  51 +++++++++
 .../gcc.target/riscv/rvv/base/pr112431-31.c   |  68 ++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-32.c   |  51 +++++++++
 .../gcc.target/riscv/rvv/base/pr112431-33.c   |  51 +++++++++
 7 files changed, 426 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 7a1b22fb58d..09e8a63af07 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2223,67 +2223,70 @@
 
 ;; DEST eew is greater than SOURCE eew.
 (define_insn "@pred_indexed_<order>load<mode>_x2_greater_eew"
-  [(set (match_operand:VEEWEXT2 0 "register_operand"                    "=&vr,  &vr")
+  [(set (match_operand:VEEWEXT2 0 "register_operand"                     "=vr,   vr,   vr,   vr,   vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VEEWEXT2
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "    i,    i")
-	     (match_operand 7 "const_int_operand"                      "    i,    i")
-	     (match_operand 8 "const_int_operand"                      "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "    i,    i,    i,    i,    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "    i,    i,    i,    i,    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:VEEWEXT2
-	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ")
+	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ,   rJ,   rJ,   rJ,   rJ,   rJ,   rJ")
 	     (mem:BLK (scratch))
-	     (match_operand:<VINDEX_DOUBLE_TRUNC> 4 "register_operand" "   vr,   vr")] ORDER)
-	  (match_operand:VEEWEXT2 2 "vector_merge_operand"             "   vu,    0")))]
+	     (match_operand:<VINDEX_DOUBLE_TRUNC> 4 "register_operand" "  W21,  W21,  W42,  W42,  W84,  W84,   vr,   vr")] ORDER)
+	  (match_operand:VEEWEXT2 2 "vector_merge_operand"             "   vu,    0,   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vl<order>xei<double_trunc_sew>.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vld<order>x")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
 
 (define_insn "@pred_indexed_<order>load<mode>_x4_greater_eew"
-  [(set (match_operand:VEEWEXT4 0 "register_operand"                    "=&vr,  &vr")
+  [(set (match_operand:VEEWEXT4 0 "register_operand"                    "=vr,    vr,   vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VEEWEXT4
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "    i,    i")
-	     (match_operand 7 "const_int_operand"                      "    i,    i")
-	     (match_operand 8 "const_int_operand"                      "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  "   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "    i,    i,    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "    i,    i,    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:VEEWEXT4
-	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ")
+	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ,   rJ,   rJ,   rJ,   rJ")
 	     (mem:BLK (scratch))
-	     (match_operand:<VINDEX_QUAD_TRUNC> 4 "register_operand"   "   vr,   vr")] ORDER)
-	  (match_operand:VEEWEXT4 2 "vector_merge_operand"             "   vu,    0")))]
+	     (match_operand:<VINDEX_QUAD_TRUNC> 4 "register_operand"   "  W43,  W43,  W86,  W86,   vr,   vr")] ORDER)
+	  (match_operand:VEEWEXT4 2 "vector_merge_operand"             "   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vl<order>xei<quad_trunc_sew>.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vld<order>x")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
 
 (define_insn "@pred_indexed_<order>load<mode>_x8_greater_eew"
-  [(set (match_operand:VEEWEXT8 0 "register_operand"                    "=&vr,  &vr")
+  [(set (match_operand:VEEWEXT8 0 "register_operand"                    "=vr,    vr, ?&vr, ?&vr")
 	(if_then_else:VEEWEXT8
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "    i,    i")
-	     (match_operand 7 "const_int_operand"                      "    i,    i")
-	     (match_operand 8 "const_int_operand"                      "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "    i,    i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:VEEWEXT8
-	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ")
+	    [(match_operand 3 "pmode_reg_or_0_operand"                 "   rJ,   rJ,   rJ,   rJ")
 	     (mem:BLK (scratch))
-	     (match_operand:<VINDEX_OCT_TRUNC> 4 "register_operand"    "   vr,   vr")] ORDER)
-	  (match_operand:VEEWEXT8 2 "vector_merge_operand"             "   vu,    0")))]
+	     (match_operand:<VINDEX_OCT_TRUNC> 4 "register_operand"    "  W87,  W87,   vr,   vr")] ORDER)
+	  (match_operand:VEEWEXT8 2 "vector_merge_operand"             "   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vl<order>xei<oct_trunc_sew>.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vld<order>x")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W87,W87,none,none")])
 
 ;; DEST eew is smaller than SOURCE eew.
 (define_insn "@pred_indexed_<order>load<mode>_x2_smaller_eew"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
new file mode 100644
index 00000000000..d81afd2610f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+	  size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+	  size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+	 + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+      vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+      vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+      vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+      vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+      vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+      vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+      vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+      vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+      vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+      vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+      vint16m2_t vw11 = __riscv_vluxei8_v_i16m2 ((void *) it, v11, vl);
+      vint16m2_t vw12 = __riscv_vluxei8_v_i16m2 ((void *) it, v12, vl);
+      vint16m2_t vw13 = __riscv_vluxei8_v_i16m2 ((void *) it, v13, vl);
+      vint16m2_t vw14 = __riscv_vluxei8_v_i16m2 ((void *) it, v14, vl);
+      vint16m2_t vw15 = __riscv_vluxei8_v_i16m2 ((void *) it, v15, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+      size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+      size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+      size_t sum10 = __riscv_vmv_x_s_i16m2_i16 (vw10);
+      size_t sum11 = __riscv_vmv_x_s_i16m2_i16 (vw11);
+      size_t sum12 = __riscv_vmv_x_s_i16m2_i16 (vw12);
+      size_t sum13 = __riscv_vmv_x_s_i16m2_i16 (vw13);
+      size_t sum14 = __riscv_vmv_x_s_i16m2_i16 (vw14);
+      size_t sum15 = __riscv_vmv_x_s_i16m2_i16 (vw15);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
+		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c
new file mode 100644
index 00000000000..2f8adb8ebee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m2_t v0 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v1 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v2 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v3 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v4 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v5 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v6 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v7 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint16m4_t vw0 = __riscv_vluxei8_v_i16m4 ((void *) it, v0, vl);
+      vint16m4_t vw1 = __riscv_vluxei8_v_i16m4 ((void *) it, v1, vl);
+      vint16m4_t vw2 = __riscv_vluxei8_v_i16m4 ((void *) it, v2, vl);
+      vint16m4_t vw3 = __riscv_vluxei8_v_i16m4 ((void *) it, v3, vl);
+      vint16m4_t vw4 = __riscv_vluxei8_v_i16m4 ((void *) it, v4, vl);
+      vint16m4_t vw5 = __riscv_vluxei8_v_i16m4 ((void *) it, v5, vl);
+      vint16m4_t vw6 = __riscv_vluxei8_v_i16m4 ((void *) it, v6, vl);
+      vint16m4_t vw7 = __riscv_vluxei8_v_i16m4 ((void *) it, v7, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i16m4_i16 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i16m4_i16 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i16m4_i16 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i16m4_i16 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i16m4_i16 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i16m4_i16 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i16m4_i16 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i16m4_i16 (vw7);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c
new file mode 100644
index 00000000000..d3ce98852db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m4_t v0 = __riscv_vle8_v_u8m4 ((void *) it, vl);
+      it += vl;
+      vuint8m4_t v1 = __riscv_vle8_v_u8m4 ((void *) it, vl);
+      it += vl;
+      vuint8m4_t v2 = __riscv_vle8_v_u8m4 ((void *) it, vl);
+      it += vl;
+      vuint8m4_t v3 = __riscv_vle8_v_u8m4 ((void *) it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint16m8_t vw0 = __riscv_vluxei8_v_i16m8 ((void *) it, v0, vl);
+      vint16m8_t vw1 = __riscv_vluxei8_v_i16m8 ((void *) it, v1, vl);
+      vint16m8_t vw2 = __riscv_vluxei8_v_i16m8 ((void *) it, v2, vl);
+      vint16m8_t vw3 = __riscv_vluxei8_v_i16m8 ((void *) it, v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i16m8_i16 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i16m8_i16 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i16m8_i16 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i16m8_i16 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c
new file mode 100644
index 00000000000..72b928a579b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint32m4_t vw0 = __riscv_vluxei8_v_i32m4 ((void *) it, v0, vl);
+      vint32m4_t vw1 = __riscv_vluxei8_v_i32m4 ((void *) it, v1, vl);
+      vint32m4_t vw2 = __riscv_vluxei8_v_i32m4 ((void *) it, v2, vl);
+      vint32m4_t vw3 = __riscv_vluxei8_v_i32m4 ((void *) it, v3, vl);
+      vint32m4_t vw4 = __riscv_vluxei8_v_i32m4 ((void *) it, v4, vl);
+      vint32m4_t vw5 = __riscv_vluxei8_v_i32m4 ((void *) it, v5, vl);
+      vint32m4_t vw6 = __riscv_vluxei8_v_i32m4 ((void *) it, v6, vl);
+      vint32m4_t vw7 = __riscv_vluxei8_v_i32m4 ((void *) it, v7, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c
new file mode 100644
index 00000000000..273c5fca642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m2_t v0 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v1 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v2 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+      vuint8m2_t v3 = __riscv_vle8_v_u8m2 ((void *) it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint32m8_t vw0 = __riscv_vluxei8_v_i32m8 ((void *) it, v0, vl);
+      vint32m8_t vw1 = __riscv_vluxei8_v_i32m8 ((void *) it, v1, vl);
+      vint32m8_t vw2 = __riscv_vluxei8_v_i32m8 ((void *) it, v2, vl);
+      vint32m8_t vw3 = __riscv_vluxei8_v_i32m8 ((void *) it, v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c
new file mode 100644
index 00000000000..a5c2ad1de62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint64m8_t vw0 = __riscv_vluxei8_v_i64m8 ((void *) it, v0, vl);
+      vint64m8_t vw1 = __riscv_vluxei8_v_i64m8 ((void *) it, v1, vl);
+      vint64m8_t vw2 = __riscv_vluxei8_v_i64m8 ((void *) it, v2, vl);
+      vint64m8_t vw3 = __riscv_vluxei8_v_i64m8 ((void *) it, v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */