From patchwork Thu Nov 30 10:38:36 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>
X-Patchwork-Id: 81021
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 39B33385B522
	for <patchwork@sourceware.org>; Thu, 30 Nov 2023 10:39:03 +0000 (GMT)
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from smtpbg151.qq.com (smtpbg151.qq.com [18.169.211.239])
 by sourceware.org (Postfix) with ESMTPS id ECAFE3858D37
 for <gcc-patches@gcc.gnu.org>; Thu, 30 Nov 2023 10:38:43 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org ECAFE3858D37
Authentication-Results: sourceware.org;
 dmarc=none (p=none dis=none) header.from=rivai.ai
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org ECAFE3858D37
Authentication-Results: server2.sourceware.org;
 arc=none smtp.remote-ip=18.169.211.239
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1701340728; cv=none;
 b=VLmd/oLNNyyDav53K5jSzW06UYNVOvWpNHlbahT73jb/boiHp4pC+FgSgybmkr6V9+xduLMd1KNqnBM4jSBByLpuzj7Fi5woVjOP12EQ08Desk4H5jQFeaM5gT0YEfHu5w2POBrM6Z5V05cJSCXOs6ByNz75ysXuJWcfo1IIn1I=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
 t=1701340728; c=relaxed/simple;
 bh=koIbmnZhBChlnbAqmBlMb1/5RNI0dBSqkHhSi7JjeBU=;
 h=From:To:Subject:Date:Message-Id:MIME-Version;
 b=SpU2Q3ZZ4eSIUD36+o6CUFMqMFFYpMdtpVfQ+mFa6vbIzj4ddHuuA6I6ZOTa9iSRc2vf7C81sRyFoFz/JEAW/+Z5b9HDNnurovB8u/m4409avZ13h7rDb1eT8xoq+NFiVFG69NMg9FlD5Y1lv+TDlYyqDRLwhfqZ6K88E1OQKhA=
ARC-Authentication-Results: i=1; server2.sourceware.org
X-QQ-mid: bizesmtp82t1701340718t6fbya7l
Received: from rios-cad122.hadoop.rioslab.org ( [58.60.1.26])
 by bizesmtp.qq.com (ESMTP) with
 id ; Thu, 30 Nov 2023 18:38:37 +0800 (CST)
X-QQ-SSF: 01400000000000G0V000000A0000000
X-QQ-FEAT: kN2ypXZVqgzZNoAlZeScQ0/JnUH8HilEhxel3TENiK6qPK8WuJnDFrkEzid3f
 iynjemBmKEIa6zEPFchC/zWjgD4ySwaTtJ1zOXkTCWHhKDjK1OJRZ1YSnix37DeLR5Qjvd8
 YRXFi9aMFIKBO7YZ3kVUHdsq1x1TePUt3k2VcTEfPnAMH/bXdKM8AXbIILQjFVoLXyzB/lP
 VUi5faFDR4zni+aIk8tCAFcZ+s6THYbzP3zwqryCidqEDIiBZ4SNhSAZOPD8CKmmWB3Iu1P
 SaivMIzInZow+3PAm3Gmh+uU/rnIhsB7LbFYS5M+W+c2pQEL+t31tc2vQGYgY3kSqXaa67K
 L+Ltoj97Rn7tNg5hcvJRQPKK9h6b0n7Yw7DbNWMsXgsBwtbDwkbGM5PCuQnGoQdMPXzyYxF
 QZT1krB9F3I=
X-QQ-GoodBg: 2
X-BIZMAIL-ID: 1695146524101988222
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com, kito.cheng@sifive.com, jeffreyalaw@gmail.com,
 rdapp.gcc@gmail.com, Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH V2] RISC-V: Remove earlyclobber for wx/wf instructions.
Date: Thu, 30 Nov 2023 18:38:36 +0800
Message-Id: <20231130103836.3913724-1-juzhe.zhong@rivai.ai>
X-Mailer: git-send-email 2.36.3
MIME-Version: 1.0
X-QQ-SENDSIZE: 520
Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0
X-Spam-Status: No, score=-10.3 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_SHORT, RCVD_IN_BARRACUDACENTRAL, RCVD_IN_DNSWL_NONE,
 RCVD_IN_MSPIKE_H2, SPF_HELO_PASS, SPF_PASS, TXREP,
 T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.30
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org

While working on overlap for widening instructions, I realize that we set
vwadd.wx/vfwadd.wf as earlyclobber which is incorrect.

Since according to RVV ISA:
"The destination EEW equals the source EEW."

vwadd.vx widens the first source operand (i.e. 2 * source EEW = dest EEW) while
vwadd.wx only widens the second/scalar source operand.

Therefore overlap is legal for wx but not for vx.

Before this patch (heave spillings):

        csrr    a5,vlenb
        slli    a5,a5,1
        addi    a5,a5,64
        vfwadd.wf       v2,v14,fs0
        add     a5,a5,sp
        vs2r.v  v2,0(a5)
        vl2re32.v       v2,0(a1)
        vfwadd.wf       v14,v12,fs0
        vfwadd.wf       v12,v10,fs0
        vfwadd.wf       v10,v8,fs0
        vfwadd.wf       v8,v6,fs0
        vfwadd.wf       v6,v4,fs0
        vfwadd.wf       v4,v2,fs0
        vfwadd.wf       v2,v16,fs0
        vfwadd.wf       v16,v18,fs0
        vfwadd.wf       v18,v20,fs0
        vfwadd.wf       v20,v22,fs0
        vfwadd.wf       v22,v24,fs0
        vfwadd.wf       v24,v26,fs0
        vfwadd.wf       v26,v28,fs0
        vfwadd.wf       v28,v30,fs0
        vfwadd.wf       v30,v0,fs0
        nop
        vsetvli zero,zero,e32,m2,ta,ma
        csrr    a5,vlenb

After this patch (no spillings):

       	vfwadd.wf	v16,v16,fs0
	vfwadd.wf	v14,v14,fs0
	vfwadd.wf	v12,v12,fs0
	vfwadd.wf	v10,v10,fs0
	vfwadd.wf	v8,v8,fs0
	vfwadd.wf	v6,v6,fs0
	vfwadd.wf	v4,v4,fs0
	vfwadd.wf	v2,v2,fs0
	vfwadd.wf	v18,v18,fs0
	vfwadd.wf	v20,v20,fs0
	vfwadd.wf	v22,v22,fs0
	vfwadd.wf	v24,v24,fs0
	vfwadd.wf	v26,v26,fs0
	vfwadd.wf	v28,v28,fs0
	vfwadd.wf	v30,v30,fs0
	vfwadd.wf	v0,v0,fs0

Confirm the codegen above run successfully on both SPIKE/QEMU.

	PR target/112431

gcc/ChangeLog:

	* config/riscv/vector.md: Remove earlyclobber for wx/wf instructions.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr112431-19.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-20.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-21.c: New test.
---
 gcc/config/riscv/vector.md                    |   4 +-
 .../gcc.target/riscv/rvv/base/pr112431-19.c   | 103 +++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-20.c   | 103 +++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-21.c   | 106 ++++++++++++++++++
 4 files changed, 314 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-19.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-20.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-21.c

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index e5d62c6e58b..b47b9742b62 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3833,7 +3833,7 @@
    (set_attr "mode" "<V_DOUBLE_TRUNC>")])
 
 (define_insn "@pred_single_widen_<plus_minus:optab><any_extend:su><mode>_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"                  "=&vr,&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                   "=vr,   vr")
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
@@ -7114,7 +7114,7 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "@pred_single_widen_<plus_minus:optab><mode>_scalar"
-  [(set (match_operand:VWEXTF 0 "register_operand"                  "=&vr,  &vr")
+  [(set (match_operand:VWEXTF 0 "register_operand"                   "=vr,   vr")
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-19.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-19.c
new file mode 100644
index 00000000000..affe1aaf4f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-19.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+	  size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+	  size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+	 + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t __attribute__ ((noinline))
+foo (short const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = 4;
+  const short *it = buf;
+  for (int i = 0; i < len; i++)
+    {
+      vint16m2_t v0 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v1 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v2 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v3 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v4 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v5 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v6 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v7 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v8 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v9 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v10 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v11 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v12 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v13 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v14 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v15 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint16m2_t vw0 = __riscv_vwadd_wx_i16m2 (v0, 55, vl);
+      vint16m2_t vw1 = __riscv_vwadd_wx_i16m2 (v1, 55, vl);
+      vint16m2_t vw2 = __riscv_vwadd_wx_i16m2 (v2, 55, vl);
+      vint16m2_t vw3 = __riscv_vwadd_wx_i16m2 (v3, 55, vl);
+      vint16m2_t vw4 = __riscv_vwadd_wx_i16m2 (v4, 55, vl);
+      vint16m2_t vw5 = __riscv_vwadd_wx_i16m2 (v5, 55, vl);
+      vint16m2_t vw6 = __riscv_vwadd_wx_i16m2 (v6, 55, vl);
+      vint16m2_t vw7 = __riscv_vwadd_wx_i16m2 (v7, 55, vl);
+      vint16m2_t vw8 = __riscv_vwadd_wx_i16m2 (v8, 55, vl);
+      vint16m2_t vw9 = __riscv_vwadd_wx_i16m2 (v9, 55, vl);
+      vint16m2_t vw10 = __riscv_vwadd_wx_i16m2 (v10, 55, vl);
+      vint16m2_t vw11 = __riscv_vwadd_wx_i16m2 (v11, 55, vl);
+      vint16m2_t vw12 = __riscv_vwadd_wx_i16m2 (v12, 55, vl);
+      vint16m2_t vw13 = __riscv_vwadd_wx_i16m2 (v13, 55, vl);
+      vint16m2_t vw14 = __riscv_vwadd_wx_i16m2 (v14, 55, vl);
+      vint16m2_t vw15 = __riscv_vwadd_wx_i16m2 (v15, 55, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+      size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+      size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+      size_t sum10 = __riscv_vmv_x_s_i16m2_i16 (vw10);
+      size_t sum11 = __riscv_vmv_x_s_i16m2_i16 (vw11);
+      size_t sum12 = __riscv_vmv_x_s_i16m2_i16 (vw12);
+      size_t sum13 = __riscv_vmv_x_s_i16m2_i16 (vw13);
+      size_t sum14 = __riscv_vmv_x_s_i16m2_i16 (vw14);
+      size_t sum15 = __riscv_vmv_x_s_i16m2_i16 (vw15);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
+		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-20.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-20.c
new file mode 100644
index 00000000000..72f3644e592
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-20.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+	  size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+	  size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+	 + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t __attribute__ ((noinline))
+foo (float const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = 4;
+  const float *it = buf;
+  for (int i = 0; i < len; i++)
+    {
+      vfloat32m2_t v0 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v1 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v2 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v3 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v4 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v5 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v6 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v7 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v8 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v9 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v10 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v11 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v12 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v13 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v14 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+      vfloat32m2_t v15 = __riscv_vle32_v_f32m2 (it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vfloat32m2_t vw0 = __riscv_vfwadd_wf_f32m2 (v0, 55, vl);
+      vfloat32m2_t vw1 = __riscv_vfwadd_wf_f32m2 (v1, 55, vl);
+      vfloat32m2_t vw2 = __riscv_vfwadd_wf_f32m2 (v2, 55, vl);
+      vfloat32m2_t vw3 = __riscv_vfwadd_wf_f32m2 (v3, 55, vl);
+      vfloat32m2_t vw4 = __riscv_vfwadd_wf_f32m2 (v4, 55, vl);
+      vfloat32m2_t vw5 = __riscv_vfwadd_wf_f32m2 (v5, 55, vl);
+      vfloat32m2_t vw6 = __riscv_vfwadd_wf_f32m2 (v6, 55, vl);
+      vfloat32m2_t vw7 = __riscv_vfwadd_wf_f32m2 (v7, 55, vl);
+      vfloat32m2_t vw8 = __riscv_vfwadd_wf_f32m2 (v8, 55, vl);
+      vfloat32m2_t vw9 = __riscv_vfwadd_wf_f32m2 (v9, 55, vl);
+      vfloat32m2_t vw10 = __riscv_vfwadd_wf_f32m2 (v10, 55, vl);
+      vfloat32m2_t vw11 = __riscv_vfwadd_wf_f32m2 (v11, 55, vl);
+      vfloat32m2_t vw12 = __riscv_vfwadd_wf_f32m2 (v12, 55, vl);
+      vfloat32m2_t vw13 = __riscv_vfwadd_wf_f32m2 (v13, 55, vl);
+      vfloat32m2_t vw14 = __riscv_vfwadd_wf_f32m2 (v14, 55, vl);
+      vfloat32m2_t vw15 = __riscv_vfwadd_wf_f32m2 (v15, 55, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vfmv_f_s_f32m2_f32 (vw0);
+      size_t sum1 = __riscv_vfmv_f_s_f32m2_f32 (vw1);
+      size_t sum2 = __riscv_vfmv_f_s_f32m2_f32 (vw2);
+      size_t sum3 = __riscv_vfmv_f_s_f32m2_f32 (vw3);
+      size_t sum4 = __riscv_vfmv_f_s_f32m2_f32 (vw4);
+      size_t sum5 = __riscv_vfmv_f_s_f32m2_f32 (vw5);
+      size_t sum6 = __riscv_vfmv_f_s_f32m2_f32 (vw6);
+      size_t sum7 = __riscv_vfmv_f_s_f32m2_f32 (vw7);
+      size_t sum8 = __riscv_vfmv_f_s_f32m2_f32 (vw8);
+      size_t sum9 = __riscv_vfmv_f_s_f32m2_f32 (vw9);
+      size_t sum10 = __riscv_vfmv_f_s_f32m2_f32 (vw10);
+      size_t sum11 = __riscv_vfmv_f_s_f32m2_f32 (vw11);
+      size_t sum12 = __riscv_vfmv_f_s_f32m2_f32 (vw12);
+      size_t sum13 = __riscv_vfmv_f_s_f32m2_f32 (vw13);
+      size_t sum14 = __riscv_vfmv_f_s_f32m2_f32 (vw14);
+      size_t sum15 = __riscv_vfmv_f_s_f32m2_f32 (vw15);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
+		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-21.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-21.c
new file mode 100644
index 00000000000..3e43c949509
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-21.c
@@ -0,0 +1,106 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -ansi -pedantic-errors -std=gnu99" } */
+
+#include <riscv_vector.h>
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+          size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+          size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+          size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+         + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t __attribute__ ((noinline))
+foo (short const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = 4;
+  const short *it = buf;
+  for (int i = 0; i < len; i++)
+    {
+      vint16m2_t v0 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v1 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v2 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v3 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v4 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v5 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v6 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v7 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v8 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v9 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v10 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v11 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v12 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v13 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v14 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+      vint16m2_t v15 = __riscv_vle16_v_i16m2 (it, vl);
+      it += vl;
+
+      asm volatile ("" ::: "memory");
+      vint16m2_t vw0 = __riscv_vwadd_wx_i16m2 (v0, 55, vl);
+      vint16m2_t vw1 = __riscv_vwadd_wx_i16m2 (v1, 55, vl);
+      vint16m2_t vw2 = __riscv_vwadd_wx_i16m2 (v2, 55, vl);
+      vint16m2_t vw3 = __riscv_vwadd_wx_i16m2 (v3, 55, vl);
+      vint16m2_t vw4 = __riscv_vwadd_wx_i16m2 (v4, 55, vl);
+      vint16m2_t vw5 = __riscv_vwadd_wx_i16m2 (v5, 55, vl);
+      vint16m2_t vw6 = __riscv_vwadd_wx_i16m2 (v6, 55, vl);
+      vint16m2_t vw7 = __riscv_vwadd_wx_i16m2 (v7, 55, vl);
+      vint16m2_t vw8 = __riscv_vwadd_wx_i16m2 (v8, 55, vl);
+      vint16m2_t vw9 = __riscv_vwadd_wx_i16m2 (v9, 55, vl);
+      vint16m2_t vw10 = __riscv_vwadd_wx_i16m2 (v10, 55, vl);
+      vint16m2_t vw11 = __riscv_vwadd_wx_i16m2 (v11, 55, vl);
+      vint16m2_t vw12 = __riscv_vwadd_wx_i16m2 (v12, 55, vl);
+      vint16m2_t vw13 = __riscv_vwadd_wx_i16m2 (v13, 55, vl);
+      vint16m2_t vw14 = __riscv_vwadd_wx_i16m2 (v14, 55, vl);
+      vint16m2_t vw15 = __riscv_vwadd_wx_i16m2 (v15, 55, vl);
+
+      asm volatile ("" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+      size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+      size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+      size_t sum10 = __riscv_vmv_x_s_i16m2_i16 (vw10);
+      size_t sum11 = __riscv_vmv_x_s_i16m2_i16 (vw11);
+      size_t sum12 = __riscv_vmv_x_s_i16m2_i16 (vw12);
+      size_t sum13 = __riscv_vmv_x_s_i16m2_i16 (vw13);
+      size_t sum14 = __riscv_vmv_x_s_i16m2_i16 (vw14);
+      size_t sum15 = __riscv_vmv_x_s_i16m2_i16 (vw15);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
+                       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
+    }
+  return sum;
+}
+
+int
+main (int in, char **out)
+{
+  short const buf[1000];
+  int i = foo (buf, 4);
+  **out = i;
+  return 0;
+}