[v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
Checks
Context |
Check |
Description |
rivoscibot/toolchain-ci-rivos-lint |
success
|
Lint passed
|
rivoscibot/toolchain-ci-rivos-apply-patch |
success
|
Patch applied
|
rivoscibot/toolchain-ci-rivos-test |
warning
|
Testing skipped
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:
Form 1:
#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
NT __attribute__((noinline)) \
sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
{ \
bool overflow = x > (WT)(NT)(-1); \
return ((NT)x) | (NT)-overflow; \
}
DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
Before this patch:
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ _Bool overflow;
8 │ short unsigned int _1;
9 │ short unsigned int _2;
10 │ short unsigned int _3;
11 │ uint16_t _6;
12 │
13 │ ;; basic block 2, loop depth 0
14 │ ;; pred: ENTRY
15 │ overflow_5 = x_4(D) > 65535;
16 │ _1 = (short unsigned int) x_4(D);
17 │ _2 = (short unsigned int) overflow_5;
18 │ _3 = -_2;
19 │ _6 = _1 | _3;
20 │ return _6;
21 │ ;; succ: EXIT
22 │
23 │ }
After this patch:
3 │
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ uint16_t _6;
8 │
9 │ ;; basic block 2, loop depth 0
10 │ ;; pred: ENTRY
11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call]
12 │ return _6;
13 │ ;; succ: EXIT
14 │
15 │ }
The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc
gcc/ChangeLog:
* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
quad truncation.
(ANYI_OCT_TRUNC): New iterator for oct truncation.
(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
(anyi_quad_truncated): Ditto but for lower case.
(anyi_oct_truncated): Ditto but for lower case.
* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
Add new pattern for quad truncation.
(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
the expand dump check times.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-4.c: New test.
* gcc.target/riscv/sat_u_trunc-5.c: New test.
* gcc.target/riscv/sat_u_trunc-6.c: New test.
* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
* gcc.target/riscv/sat_u_trunc-run-6.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/iterators.md | 20 ++++++++
gcc/config/riscv/riscv.md | 20 ++++++++
.../rvv/autovec/unop/vec_sat_u_trunc-2.c | 2 +-
.../rvv/autovec/unop/vec_sat_u_trunc-3.c | 2 +-
.../gcc.target/riscv/sat_arith_data.h | 51 +++++++++++++++++++
.../gcc.target/riscv/sat_u_trunc-4.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-5.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-6.c | 20 ++++++++
.../gcc.target/riscv/sat_u_trunc-run-4.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-5.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-6.c | 16 ++++++
11 files changed, 195 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
Comments
Kindly ping.
Pan
-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com>
Sent: Tuesday, July 23, 2024 1:06 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
From: Pan Li <pan2.li@intel.com>
This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:
Form 1:
#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
NT __attribute__((noinline)) \
sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
{ \
bool overflow = x > (WT)(NT)(-1); \
return ((NT)x) | (NT)-overflow; \
}
DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
Before this patch:
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ _Bool overflow;
8 │ short unsigned int _1;
9 │ short unsigned int _2;
10 │ short unsigned int _3;
11 │ uint16_t _6;
12 │
13 │ ;; basic block 2, loop depth 0
14 │ ;; pred: ENTRY
15 │ overflow_5 = x_4(D) > 65535;
16 │ _1 = (short unsigned int) x_4(D);
17 │ _2 = (short unsigned int) overflow_5;
18 │ _3 = -_2;
19 │ _6 = _1 | _3;
20 │ return _6;
21 │ ;; succ: EXIT
22 │
23 │ }
After this patch:
3 │
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ uint16_t _6;
8 │
9 │ ;; basic block 2, loop depth 0
10 │ ;; pred: ENTRY
11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call]
12 │ return _6;
13 │ ;; succ: EXIT
14 │
15 │ }
The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc
gcc/ChangeLog:
* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
quad truncation.
(ANYI_OCT_TRUNC): New iterator for oct truncation.
(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
(anyi_quad_truncated): Ditto but for lower case.
(anyi_oct_truncated): Ditto but for lower case.
* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
Add new pattern for quad truncation.
(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
the expand dump check times.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-4.c: New test.
* gcc.target/riscv/sat_u_trunc-5.c: New test.
* gcc.target/riscv/sat_u_trunc-6.c: New test.
* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
* gcc.target/riscv/sat_u_trunc-run-6.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/iterators.md | 20 ++++++++
gcc/config/riscv/riscv.md | 20 ++++++++
.../rvv/autovec/unop/vec_sat_u_trunc-2.c | 2 +-
.../rvv/autovec/unop/vec_sat_u_trunc-3.c | 2 +-
.../gcc.target/riscv/sat_arith_data.h | 51 +++++++++++++++++++
.../gcc.target/riscv/sat_u_trunc-4.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-5.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-6.c | 20 ++++++++
.../gcc.target/riscv/sat_u_trunc-run-4.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-5.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-6.c | 16 ++++++
11 files changed, 195 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
(define_mode_attr ANYI_DOUBLE_TRUNCATED [
(HI "QI") (SI "HI") (DI "SI")
])
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+ (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+ (DI "QI")
+])
+
(define_mode_attr anyi_double_truncated [
(HI "qi") (SI "hi") (DI "si")
])
+(define_mode_attr anyi_quad_truncated [
+ (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+ (DI "qi")
+])
+
;; Iterator for hardware-supported floating-point modes.
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d9f6c1765d0..cab643f7d82 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4325,6 +4325,26 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2"
}
)
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+ [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+ [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_OCT_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
;; These are forms of (x << C1) + C2, potentially canonicalized from
;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may
;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
index f1b1cc7e5d9..2516468fd16 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
@@ -18,4 +18,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
index 30e4a15eca0..5df05f72cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
@@ -20,4 +20,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index b991f8aa955..52e4e2b5f9f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -14,7 +14,10 @@
#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
TEST_UNARY_STRUCT (uint32_t, uint64_t)
TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@ TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
{255, 65535},
};
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+ TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+ TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+ {255, 4294967296},
+ {255, 18446744073709551614u},
+ {255, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
TEST_UNARY_DATA(uint16_t, uint32_t)[] =
{
@@ -41,6 +77,21 @@ TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
{65535, 4294967295},
};
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+ TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 5, 5},
+ {65534, 65534},
+ {65535, 65535},
+ {65535, 65536},
+ {65535, 4294967294},
+ {65535, 4294967295},
+ {65535, 4294967296},
+ {65535, 18446744073709551614u},
+ {65535, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
TEST_UNARY_DATA(uint32_t, uint64_t)[] =
{
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
new file mode 100644
index 00000000000..7d84b2689ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
new file mode 100644
index 00000000000..87e6c649fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
new file mode 100644
index 00000000000..af410916fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
new file mode 100644
index 00000000000..609f3540555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
new file mode 100644
index 00000000000..8629b3e956e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
new file mode 100644
index 00000000000..6d25869d239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
--
2.34.1
Kindly ping++.
Pan
-----Original Message-----
From: Li, Pan2
Sent: Wednesday, July 31, 2024 9:12 AM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
Kindly ping.
Pan
-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com>
Sent: Tuesday, July 23, 2024 1:06 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
From: Pan Li <pan2.li@intel.com>
This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:
Form 1:
#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
NT __attribute__((noinline)) \
sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
{ \
bool overflow = x > (WT)(NT)(-1); \
return ((NT)x) | (NT)-overflow; \
}
DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
Before this patch:
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ _Bool overflow;
8 │ short unsigned int _1;
9 │ short unsigned int _2;
10 │ short unsigned int _3;
11 │ uint16_t _6;
12 │
13 │ ;; basic block 2, loop depth 0
14 │ ;; pred: ENTRY
15 │ overflow_5 = x_4(D) > 65535;
16 │ _1 = (short unsigned int) x_4(D);
17 │ _2 = (short unsigned int) overflow_5;
18 │ _3 = -_2;
19 │ _6 = _1 | _3;
20 │ return _6;
21 │ ;; succ: EXIT
22 │
23 │ }
After this patch:
3 │
4 │ __attribute__((noinline))
5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
6 │ {
7 │ uint16_t _6;
8 │
9 │ ;; basic block 2, loop depth 0
10 │ ;; pred: ENTRY
11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call]
12 │ return _6;
13 │ ;; succ: EXIT
14 │
15 │ }
The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc
gcc/ChangeLog:
* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
quad truncation.
(ANYI_OCT_TRUNC): New iterator for oct truncation.
(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
(anyi_quad_truncated): Ditto but for lower case.
(anyi_oct_truncated): Ditto but for lower case.
* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
Add new pattern for quad truncation.
(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
the expand dump check times.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-4.c: New test.
* gcc.target/riscv/sat_u_trunc-5.c: New test.
* gcc.target/riscv/sat_u_trunc-6.c: New test.
* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
* gcc.target/riscv/sat_u_trunc-run-6.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/iterators.md | 20 ++++++++
gcc/config/riscv/riscv.md | 20 ++++++++
.../rvv/autovec/unop/vec_sat_u_trunc-2.c | 2 +-
.../rvv/autovec/unop/vec_sat_u_trunc-3.c | 2 +-
.../gcc.target/riscv/sat_arith_data.h | 51 +++++++++++++++++++
.../gcc.target/riscv/sat_u_trunc-4.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-5.c | 17 +++++++
.../gcc.target/riscv/sat_u_trunc-6.c | 20 ++++++++
.../gcc.target/riscv/sat_u_trunc-run-4.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-5.c | 16 ++++++
.../gcc.target/riscv/sat_u_trunc-run-6.c | 16 ++++++
11 files changed, 195 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
(define_mode_attr ANYI_DOUBLE_TRUNCATED [
(HI "QI") (SI "HI") (DI "SI")
])
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+ (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+ (DI "QI")
+])
+
(define_mode_attr anyi_double_truncated [
(HI "qi") (SI "hi") (DI "si")
])
+(define_mode_attr anyi_quad_truncated [
+ (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+ (DI "qi")
+])
+
;; Iterator for hardware-supported floating-point modes.
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d9f6c1765d0..cab643f7d82 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4325,6 +4325,26 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2"
}
)
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+ [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+ [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_OCT_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
;; These are forms of (x << C1) + C2, potentially canonicalized from
;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may
;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
index f1b1cc7e5d9..2516468fd16 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
@@ -18,4 +18,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
index 30e4a15eca0..5df05f72cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
@@ -20,4 +20,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index b991f8aa955..52e4e2b5f9f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -14,7 +14,10 @@
#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
TEST_UNARY_STRUCT (uint32_t, uint64_t)
TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@ TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
{255, 65535},
};
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+ TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+ TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+ {255, 4294967296},
+ {255, 18446744073709551614u},
+ {255, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
TEST_UNARY_DATA(uint16_t, uint32_t)[] =
{
@@ -41,6 +77,21 @@ TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
{65535, 4294967295},
};
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+ TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 5, 5},
+ {65534, 65534},
+ {65535, 65535},
+ {65535, 65536},
+ {65535, 4294967294},
+ {65535, 4294967295},
+ {65535, 4294967296},
+ {65535, 18446744073709551614u},
+ {65535, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
TEST_UNARY_DATA(uint32_t, uint64_t)[] =
{
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
new file mode 100644
index 00000000000..7d84b2689ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
new file mode 100644
index 00000000000..87e6c649fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
new file mode 100644
index 00000000000..af410916fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
new file mode 100644
index 00000000000..609f3540555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
new file mode 100644
index 00000000000..8629b3e956e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
new file mode 100644
index 00000000000..6d25869d239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
--
2.34.1
On 7/22/24 11:06 PM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to implement the quad and oct .SAT_TRUNC pattern
> in the riscv backend. Aka:
>
> Form 1:
> #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
> NT __attribute__((noinline)) \
> sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
> { \
> bool overflow = x > (WT)(NT)(-1); \
> return ((NT)x) | (NT)-overflow; \
> }
>
> DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
>
> Before this patch:
> 4 │ __attribute__((noinline))
> 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
> 6 │ {
> 7 │ _Bool overflow;
> 8 │ short unsigned int _1;
> 9 │ short unsigned int _2;
> 10 │ short unsigned int _3;
> 11 │ uint16_t _6;
> 12 │
> 13 │ ;; basic block 2, loop depth 0
> 14 │ ;; pred: ENTRY
> 15 │ overflow_5 = x_4(D) > 65535;
> 16 │ _1 = (short unsigned int) x_4(D);
> 17 │ _2 = (short unsigned int) overflow_5;
> 18 │ _3 = -_2;
> 19 │ _6 = _1 | _3;
> 20 │ return _6;
> 21 │ ;; succ: EXIT
> 22 │
> 23 │ }
>
> After this patch:
> 3 │
> 4 │ __attribute__((noinline))
> 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
> 6 │ {
> 7 │ uint16_t _6;
> 8 │
> 9 │ ;; basic block 2, loop depth 0
> 10 │ ;; pred: ENTRY
> 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call]
> 12 │ return _6;
> 13 │ ;; succ: EXIT
> 14 │
> 15 │ }
>
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
>
> gcc/ChangeLog:
>
> * config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
> quad truncation.
> (ANYI_OCT_TRUNC): New iterator for oct truncation.
> (ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
> (ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
> (anyi_quad_truncated): Ditto but for lower case.
> (anyi_oct_truncated): Ditto but for lower case.
> * config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
> Add new pattern for quad truncation.
> (ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
> the expand dump check times.
> * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
> * gcc.target/riscv/sat_arith_data.h: Add test helper macros.
> * gcc.target/riscv/sat_u_trunc-4.c: New test.
> * gcc.target/riscv/sat_u_trunc-5.c: New test.
> * gcc.target/riscv/sat_u_trunc-6.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-4.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-5.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-6.c: New test.
OK. Sorry for the delays here. I wanted to make sure we had the issues
WRT operand extension resolved before diving into this. But in
retrospect, this probably could have moved forward independently.
Jeff
> OK. Sorry for the delays here. I wanted to make sure we had the issues
> WRT operand extension resolved before diving into this. But in
> retrospect, this probably could have moved forward independently.
That make much sense to me, thanks a lot.
Pan
-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com>
Sent: Sunday, August 18, 2024 2:21 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
On 7/22/24 11:06 PM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to implement the quad and oct .SAT_TRUNC pattern
> in the riscv backend. Aka:
>
> Form 1:
> #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
> NT __attribute__((noinline)) \
> sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
> { \
> bool overflow = x > (WT)(NT)(-1); \
> return ((NT)x) | (NT)-overflow; \
> }
>
> DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
>
> Before this patch:
> 4 │ __attribute__((noinline))
> 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
> 6 │ {
> 7 │ _Bool overflow;
> 8 │ short unsigned int _1;
> 9 │ short unsigned int _2;
> 10 │ short unsigned int _3;
> 11 │ uint16_t _6;
> 12 │
> 13 │ ;; basic block 2, loop depth 0
> 14 │ ;; pred: ENTRY
> 15 │ overflow_5 = x_4(D) > 65535;
> 16 │ _1 = (short unsigned int) x_4(D);
> 17 │ _2 = (short unsigned int) overflow_5;
> 18 │ _3 = -_2;
> 19 │ _6 = _1 | _3;
> 20 │ return _6;
> 21 │ ;; succ: EXIT
> 22 │
> 23 │ }
>
> After this patch:
> 3 │
> 4 │ __attribute__((noinline))
> 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
> 6 │ {
> 7 │ uint16_t _6;
> 8 │
> 9 │ ;; basic block 2, loop depth 0
> 10 │ ;; pred: ENTRY
> 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call]
> 12 │ return _6;
> 13 │ ;; succ: EXIT
> 14 │
> 15 │ }
>
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
>
> gcc/ChangeLog:
>
> * config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
> quad truncation.
> (ANYI_OCT_TRUNC): New iterator for oct truncation.
> (ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
> (ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
> (anyi_quad_truncated): Ditto but for lower case.
> (anyi_oct_truncated): Ditto but for lower case.
> * config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
> Add new pattern for quad truncation.
> (ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
> the expand dump check times.
> * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
> * gcc.target/riscv/sat_arith_data.h: Add test helper macros.
> * gcc.target/riscv/sat_u_trunc-4.c: New test.
> * gcc.target/riscv/sat_u_trunc-5.c: New test.
> * gcc.target/riscv/sat_u_trunc-6.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-4.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-5.c: New test.
> * gcc.target/riscv/sat_u_trunc-run-6.c: New test.
OK. Sorry for the delays here. I wanted to make sure we had the issues
WRT operand extension resolved before diving into this. But in
retrospect, this probably could have moved forward independently.
Jeff
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
(define_mode_attr ANYI_DOUBLE_TRUNCATED [
(HI "QI") (SI "HI") (DI "SI")
])
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+ (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+ (DI "QI")
+])
+
(define_mode_attr anyi_double_truncated [
(HI "qi") (SI "hi") (DI "si")
])
+(define_mode_attr anyi_quad_truncated [
+ (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+ (DI "qi")
+])
+
;; Iterator for hardware-supported floating-point modes.
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
@@ -4325,6 +4325,26 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2"
}
)
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+ [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+ [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_OCT_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_ustrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
;; These are forms of (x << C1) + C2, potentially canonicalized from
;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may
;; want to go ahead and recognize this form as C2 may be cheaper to
@@ -18,4 +18,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
@@ -20,4 +20,4 @@
*/
DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
@@ -14,7 +14,10 @@
#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
TEST_UNARY_STRUCT (uint32_t, uint64_t)
TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@ TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
{255, 65535},
};
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+ TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+ TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 2, 2},
+ {254, 254},
+ {255, 255},
+ {255, 256},
+ {255, 65534},
+ {255, 65535},
+ {255, 65536},
+ {255, 4294967294},
+ {255, 4294967295},
+ {255, 4294967296},
+ {255, 18446744073709551614u},
+ {255, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
TEST_UNARY_DATA(uint16_t, uint32_t)[] =
{
@@ -41,6 +77,21 @@ TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
{65535, 4294967295},
};
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+ TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+ { 0, 0},
+ { 5, 5},
+ {65534, 65534},
+ {65535, 65535},
+ {65535, 65536},
+ {65535, 4294967294},
+ {65535, 4294967295},
+ {65535, 4294967296},
+ {65535, 18446744073709551614u},
+ {65535, 18446744073709551615u},
+};
+
TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
TEST_UNARY_DATA(uint32_t, uint64_t)[] =
{
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA TEST_UNARY_DATA_WRAP(T1, T2)
+#define T TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x) RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"