tree-optimization: [PR103855] Fold (type)X / (type)Y
Commit Message
This pattern converts (trunc_div (convert a) (convert b)) to
(convert (trunc_div a b)) when:
1. type, a, and b all have unsigned integeral types
2. a and b have the same type precision
3. type has type precision at least as larger as a and b
This is useful as wider divisions are typically more expensive.
To illustrate the effects, consider the following code snippet:
unsigned long long f(unsigned int a, unsigned int b) {
unsigned long long all = a;
return all / b;
}
Without the pattern, g++ -std=c++20 -O2 generates the following
assembly:
f(unsigned int, unsigned int):
mov eax, edi
mov esi, esi
xor edx, edx
div rsi
ret
With the pattern, it generates this:
f(unsigned int, unsigned int):
mov eax, edi
xor edx, edx
div esi
ret
This is identical to what clang++ -std=c++20 -O2 generates.
Signed-off-by: Zhao Wei Liew <zhaoweiliew@gmail.com>
PR tree-optimization/103855
gcc/ChangeLog:
* match.pd: Add pattern for (type)X / (type)Y.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/divide-8.c: New test.
* gcc.dg/tree-ssa/divide-9.c: New test.
---
gcc/match.pd | 15 +++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/divide-8.c | 9 +++++++++
gcc/testsuite/gcc.dg/tree-ssa/divide-9.c | 10 ++++++++++
3 files changed, 34 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
Comments
> On 19 Feb 2022, at 5:36 PM, Zhao Wei Liew <zhaoweiliew@gmail.com> wrote:
>
> This pattern converts (trunc_div (convert a) (convert b)) to
> (convert (trunc_div a b)) when:
>
> 1. type, a, and b all have unsigned integeral types
> 2. a and b have the same type precision
> 3. type has type precision at least as larger as a and b
>
> This is useful as wider divisions are typically more expensive.
>
> To illustrate the effects, consider the following code snippet:
>
> unsigned long long f(unsigned int a, unsigned int b) {
> unsigned long long all = a;
> return all / b;
> }
>
> Without the pattern, g++ -std=c++20 -O2 generates the following
> assembly:
>
> f(unsigned int, unsigned int):
> mov eax, edi
> mov esi, esi
> xor edx, edx
> div rsi
> ret
>
> With the pattern, it generates this:
>
> f(unsigned int, unsigned int):
> mov eax, edi
> xor edx, edx
> div esi
> ret
>
> This is identical to what clang++ -std=c++20 -O2 generates.
>
> Signed-off-by: Zhao Wei Liew <zhaoweiliew@gmail.com>
>
> PR tree-optimization/103855
>
> gcc/ChangeLog:
>
> * match.pd: Add pattern for (type)X / (type)Y.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/divide-8.c: New test.
> * gcc.dg/tree-ssa/divide-9.c: New test.
> ---
> gcc/match.pd | 15 +++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/divide-8.c | 9 +++++++++
> gcc/testsuite/gcc.dg/tree-ssa/divide-9.c | 10 ++++++++++
> 3 files changed, 34 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 10f62284862..393b43756dd 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -684,6 +684,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
> (convert (trunc_mod @0 @1))))
>
> +/* (type)X / (type)Y -> (type)(X / Y)
> + when the resulting type is at least precise as the original types
> + and when all the types are unsigned integral types. */
> +(simplify
> + (trunc_div (convert @0) (convert @1))
> + (if (INTEGRAL_TYPE_P (type)
> + && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + && INTEGRAL_TYPE_P (TREE_TYPE (@1))
> + && TYPE_UNSIGNED (type)
> + && TYPE_UNSIGNED (TREE_TYPE (@0))
> + && TYPE_UNSIGNED (TREE_TYPE (@1))
> + && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1))
> + && TYPE_PRECISION (type) >= TYPE_PRECISION (TREE_TYPE (@0)))
> + (convert (trunc_div @0 @1))))
> +
> /* x * (1 + y / x) - y -> x - y % x */
> (simplify
> (minus (mult:cs @0 (plus:s (trunc_div:s @1 @0) integer_onep)) @1)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
> new file mode 100644
> index 00000000000..489604c4eb6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
> @@ -0,0 +1,9 @@
> +/* PR tree-optimization/103855 */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +
> +unsigned int f(unsigned int a, unsigned int b) {
> + unsigned long long all = a;
> + return all / b;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "\(unsigned long long int)" "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
> new file mode 100644
> index 00000000000..3e75a49b509
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
> @@ -0,0 +1,10 @@
> +/* PR tree-optimization/103855 */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +
> +unsigned long long f(unsigned int a, unsigned int b) {
> + unsigned long long all = a;
> + return all / b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\\(unsigned long long int\\\)" 1 "optimized" } } */
> +
> --
> 2.35.1
>
Sorry, I noticed issues with the test cases when running a regression test.
I’ll complete regression testing before uploading a v2.
@@ -684,6 +684,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
(convert (trunc_mod @0 @1))))
+/* (type)X / (type)Y -> (type)(X / Y)
+ when the resulting type is at least precise as the original types
+ and when all the types are unsigned integral types. */
+(simplify
+ (trunc_div (convert @0) (convert @1))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ && TYPE_UNSIGNED (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && TYPE_UNSIGNED (TREE_TYPE (@1))
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1))
+ && TYPE_PRECISION (type) >= TYPE_PRECISION (TREE_TYPE (@0)))
+ (convert (trunc_div @0 @1))))
+
/* x * (1 + y / x) - y -> x - y % x */
(simplify
(minus (mult:cs @0 (plus:s (trunc_div:s @1 @0) integer_onep)) @1)
new file mode 100644
@@ -0,0 +1,9 @@
+/* PR tree-optimization/103855 */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+unsigned int f(unsigned int a, unsigned int b) {
+ unsigned long long all = a;
+ return all / b;
+}
+
+/* { dg-final { scan-tree-dump-not "\(unsigned long long int)" "optimized" } } */
new file mode 100644
@@ -0,0 +1,10 @@
+/* PR tree-optimization/103855 */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+unsigned long long f(unsigned int a, unsigned int b) {
+ unsigned long long all = a;
+ return all / b;
+}
+
+/* { dg-final { scan-tree-dump-times "\\\(unsigned long long int\\\)" 1 "optimized" } } */
+