[Bug,tree-optimization/109429] ivopts: fixed complexities
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
From 0130d3cb01fd9d5c1c997003245ed57bbdeb00a2 Mon Sep 17 00:00:00 2001
From: Aleksandar <aleksandar.rakic@htecgroup.com>
Date: Fri, 23 Aug 2024 11:36:50 +0200
Subject: [PATCH] [Bug tree-optimization/109429] ivopts: fixed complexities
This patch addresses a bug introduced in commit f9f69dd by
correcting the complexity calculation in ivopts. The fix involves
complexity computation reordering and proper invariant variables
handling in address expressions. These changes align with the
approach used in parent commit c2b64ce. The improved complexity
calculations ensure better candidate selection and reduced code
size, particularly for RISC CPUs.
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
Signed-off-by: Jovan Dmitrovic <jovan.dmitrovic@htecgroup.com>
gcc/ChangeLog:
* tree-ssa-loop-ivopts.cc (get_address_cost): Fixed
complexity calculation.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/bug_tree-optimization_109429.c: New
test.
---
.../tree-ssa/bug_tree-optimization_109429.c | 25 +++++++++++++++++++
gcc/tree-ssa-loop-ivopts.cc | 15 +++++++----
2 files changed, 35 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bug_tree-optimization_109429.c
Comments
On Wed, Sep 4, 2024 at 4:01 PM Aleksandar Rakic
<aleksandar.rakic@htecgroup.com> wrote:
>
> From 0130d3cb01fd9d5c1c997003245ed57bbdeb00a2 Mon Sep 17 00:00:00 2001
> From: Aleksandar <aleksandar.rakic@htecgroup.com>
> Date: Fri, 23 Aug 2024 11:36:50 +0200
> Subject: [PATCH] [Bug tree-optimization/109429] ivopts: fixed complexities
>
> This patch addresses a bug introduced in commit f9f69dd by
> correcting the complexity calculation in ivopts. The fix involves
> complexity computation reordering and proper invariant variables
> handling in address expressions. These changes align with the
> approach used in parent commit c2b64ce. The improved complexity
> calculations ensure better candidate selection and reduced code
> size, particularly for RISC CPUs.
>
> Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
> Signed-off-by: Jovan Dmitrovic <jovan.dmitrovic@htecgroup.com>
>
> gcc/ChangeLog:
>
> * tree-ssa-loop-ivopts.cc (get_address_cost): Fixed
> complexity calculation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/bug_tree-optimization_109429.c: New
> test.
> ---
> .../tree-ssa/bug_tree-optimization_109429.c | 25 +++++++++++++++++++
> gcc/tree-ssa-loop-ivopts.cc | 15 +++++++----
> 2 files changed, 35 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bug_tree-optimization_109429.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bug_tree-optimization_109429.c b/gcc/testsuite/gcc.dg/tree-ssa/bug_tree-optimization_109429.c
> new file mode 100644
> index 00000000000..516ce39d486
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bug_tree-optimization_109429.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target mips64-r6-linux-gnu } } */
> +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
> +/* This test ensures that complexity must be greater than zero if there is
> + an invariant variable or an invariant expression in the address
> + expression. */
> +
> +void daxpy (float *vector1, float *vector2, int n, float fp_const)
> +{
> + for (int i = 0; i < n; ++i)
> + vector1[i] += fp_const * vector2[i];
> +}
> +
> +void dgefa (float *vector, int m, int n, int l)
> +{
> + for (int i = 0; i < n - 1; ++i)
> + {
> + for (int j = i + 1; j < n; ++j)
> + {
> + float t = vector[m * j + l];
> + daxpy (&vector[m * i + i + 1],
> + &vector[m * j + i + 1], n - (i + 1), t);
> + }
> + }
> +}
> +
> +
> +/* { dg-final { scan-tree-dump-not "Processing loop 3(.*\n)*<IV Groups>:(.*\n)*Group 1:\n Type:.*ADDRESS(.*\n)*Group 1:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*(.+\t.+\t0\t\\d+(, \\d+)*;\t.+\n)(.*\n)*Group 2:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*Selected IV set for loop 3" "ivopts" { target { mips64-r6-linux-gnu } } } } */
> +
> +
> +/* { dg-final { scan-tree-dump-not "Processing loop 3(.*\n)*<IV Groups>:(.*\n)*Group 1:\n Type:.*ADDRESS(.*\n)*Group 1:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*(.+\t.+\t0\t.+\t\\d+(, \\d+)*\n)(.*\n)*Group 2:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*Selected IV set for loop 3" "ivopts" { target { mips64-r6-linux-gnu } } } } */
> +
> +
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index 7cae5bdefea..84c33103938 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -4733,6 +4733,7 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
> /* Only true if ratio != 1. */
> bool ok_with_ratio_p = false;
> bool ok_without_ratio_p = false;
> + bool inv_present = false;
> code_helper code = ERROR_MARK;
>
> if (use->type == USE_PTR_ADDRESS)
> @@ -4744,6 +4745,7 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>
> if (!aff_combination_const_p (aff_inv))
> {
> + inv_present = true;
> parts.index = integer_one_node;
> /* Addressing mode "base + index". */
> ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
> @@ -4755,8 +4757,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
> if (!ok_with_ratio_p)
> parts.step = NULL_TREE;
> }
> - if (ok_with_ratio_p || ok_without_ratio_p)
> - {
> + if (!(ok_with_ratio_p || ok_without_ratio_p))
> + parts.index = NULL_TREE;
Indent below is definitely off now.
I have a hard time reverse engineering what 'inv_present' really means given
the code does not add or adjust any comments.
> if (maybe_ne (aff_inv->offset, 0))
> {
> parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
> @@ -4770,7 +4772,10 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
> move_fixed_address_to_symbol (&parts, aff_inv);
> /* Base is fixed address and is moved to symbol part. */
> if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
> + {
> parts.base = NULL_TREE;
> + inv_present = false;
> + }
>
> /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
> if (parts.symbol != NULL_TREE
> @@ -4783,10 +4788,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
> simple_inv = false;
> /* Symbol part is moved back to base part, it can't be NULL. */
> parts.base = integer_one_node;
> + inv_present = true;
> }
> - }
> - else
> - parts.index = NULL_TREE;
> }
> else
> {
> @@ -4856,6 +4859,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>
> if (parts.symbol != NULL_TREE)
> cost.complexity += 1;
> + if (inv_present)
> + cost.complexity += 1;
Can one express inv_present as a condition on the actual parts?
> /* Don't increase the complexity of adding a scaled index if it's
> the only kind of index that the target allows. */
> if (parts.step != NULL_TREE && ok_without_ratio_p)
> --
> 2.34.1
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile { target mips64-r6-linux-gnu } } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+/* This test ensures that complexity must be greater than zero if there is
+ an invariant variable or an invariant expression in the address
+ expression. */
+
+void daxpy (float *vector1, float *vector2, int n, float fp_const)
+{
+ for (int i = 0; i < n; ++i)
+ vector1[i] += fp_const * vector2[i];
+}
+
+void dgefa (float *vector, int m, int n, int l)
+{
+ for (int i = 0; i < n - 1; ++i)
+ {
+ for (int j = i + 1; j < n; ++j)
+ {
+ float t = vector[m * j + l];
+ daxpy (&vector[m * i + i + 1],
+ &vector[m * j + i + 1], n - (i + 1), t);
+ }
+ }
+}
+
+
+/* { dg-final { scan-tree-dump-not "Processing loop 3(.*\n)*<IV Groups>:(.*\n)*Group 1:\n Type:.*ADDRESS(.*\n)*Group 1:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*(.+\t.+\t0\t\\d+(, \\d+)*;\t.+\n)(.*\n)*Group 2:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*Selected IV set for loop 3" "ivopts" { target { mips64-r6-linux-gnu } } } } */
+
+
+/* { dg-final { scan-tree-dump-not "Processing loop 3(.*\n)*<IV Groups>:(.*\n)*Group 1:\n Type:.*ADDRESS(.*\n)*Group 1:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*(.+\t.+\t0\t.+\t\\d+(, \\d+)*\n)(.*\n)*Group 2:\n cand\tcost\tcompl\.\tinv\.expr\.\tinv\.vars\n(.*\n)*Selected IV set for loop 3" "ivopts" { target { mips64-r6-linux-gnu } } } } */
+
+
@@ -4733,6 +4733,7 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
/* Only true if ratio != 1. */
bool ok_with_ratio_p = false;
bool ok_without_ratio_p = false;
+ bool inv_present = false;
code_helper code = ERROR_MARK;
if (use->type == USE_PTR_ADDRESS)
@@ -4744,6 +4745,7 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
if (!aff_combination_const_p (aff_inv))
{
+ inv_present = true;
parts.index = integer_one_node;
/* Addressing mode "base + index". */
ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
@@ -4755,8 +4757,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
if (!ok_with_ratio_p)
parts.step = NULL_TREE;
}
- if (ok_with_ratio_p || ok_without_ratio_p)
- {
+ if (!(ok_with_ratio_p || ok_without_ratio_p))
+ parts.index = NULL_TREE;
if (maybe_ne (aff_inv->offset, 0))
{
parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
@@ -4770,7 +4772,10 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
move_fixed_address_to_symbol (&parts, aff_inv);
/* Base is fixed address and is moved to symbol part. */
if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
+ {
parts.base = NULL_TREE;
+ inv_present = false;
+ }
/* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
if (parts.symbol != NULL_TREE
@@ -4783,10 +4788,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
simple_inv = false;
/* Symbol part is moved back to base part, it can't be NULL. */
parts.base = integer_one_node;
+ inv_present = true;
}
- }
- else
- parts.index = NULL_TREE;
}
else
{
@@ -4856,6 +4859,8 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
if (parts.symbol != NULL_TREE)
cost.complexity += 1;
+ if (inv_present)
+ cost.complexity += 1;
/* Don't increase the complexity of adding a scaled index if it's
the only kind of index that the target allows. */
if (parts.step != NULL_TREE && ok_without_ratio_p)