[v3] i386: Check AX input in any_mul_highpart peepholes
Commit Message
When applying peephole optimization to transform
mov imm, %reg0
mov %reg1, %AX_REG
imul %reg0
to
mov imm, %AX_REG
imul %reg1
disable peephole optimization if reg1 == AX_REG.
gcc/
PR bootstrap/103785
* config/i386/i386.md: Swap operand order in comments and check
AX input in any_mul_highpart peepholes.
gcc/testsuite/
PR bootstrap/103785
* gcc.target/i386/pr103785.c: New test.
---
gcc/config/i386/i386.md | 9 ++++--
gcc/testsuite/gcc.target/i386/pr103785.c | 38 ++++++++++++++++++++++++
2 files changed, 44 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c
Comments
On Sat, Dec 25, 2021 at 3:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> When applying peephole optimization to transform
>
> mov imm, %reg0
> mov %reg1, %AX_REG
> imul %reg0
>
> to
>
> mov imm, %AX_REG
> imul %reg1
>
> disable peephole optimization if reg1 == AX_REG.
>
> gcc/
>
> PR bootstrap/103785
> * config/i386/i386.md: Swap operand order in comments and check
> AX input in any_mul_highpart peepholes.
>
> gcc/testsuite/
>
> PR bootstrap/103785
> * gcc.target/i386/pr103785.c: New test.
> ---
> gcc/config/i386/i386.md | 9 ++++--
> gcc/testsuite/gcc.target/i386/pr103785.c | 38 ++++++++++++++++++++++++
> 2 files changed, 44 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 284b9507466..1c1ec227f71 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -8578,7 +8578,7 @@
> (set_attr "mode" "SI")])
>
> ;; Highpart multiplication peephole2s to tweak register allocation.
> -;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx -> mov %rax,imm; imulq %rdi
> +;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
> (define_peephole2
> [(set (match_operand:SWI48 0 "general_reg_operand")
> (match_operand:SWI48 1 "immediate_operand"))
> @@ -8588,7 +8588,8 @@
> (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
> (clobber (match_dup 2))
> (clobber (reg:CC FLAGS_REG))])]
> - "REGNO (operands[0]) != REGNO (operands[2])
> + "REGNO (operands[3]) != AX_REG
> + && REGNO (operands[0]) != REGNO (operands[2])
> && REGNO (operands[0]) != REGNO (operands[3])
> && (REGNO (operands[0]) == REGNO (operands[4])
> || peep2_reg_dead_p (3, operands[0]))"
> @@ -8608,7 +8609,9 @@
> (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
> (clobber (match_dup 2))
> (clobber (reg:CC FLAGS_REG))])]
> - "REGNO (operands[0]) != REGNO (operands[2])
> + "REGNO (operands[3]) != AX_REG
Please also add TARGET_64BIT here, this DImode zero-extend pattern
applies to 64bit targets only.
OK with the above change.
Thanks,
Uros.
> + && REGNO (operands[0]) != REGNO (operands[2])
> + && REGNO (operands[2]) != REGNO (operands[3])
> && REGNO (operands[0]) != REGNO (operands[3])
> && (REGNO (operands[0]) == REGNO (operands[4])
> || peep2_reg_dead_p (3, operands[0]))"
> diff --git a/gcc/testsuite/gcc.target/i386/pr103785.c b/gcc/testsuite/gcc.target/i386/pr103785.c
> new file mode 100644
> index 00000000000..5503b965256
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103785.c
> @@ -0,0 +1,38 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdlib.h>
> +
> +struct wrapper_t
> +{
> + long k;
> + long e;
> +};
> +
> +struct wrapper_t **table;
> +
> +__attribute__ ((weak, regparm (2)))
> +void
> +update (long k, long e)
> +{
> + struct wrapper_t *elmt;
> +
> + elmt = table[k % 3079];
> + if (elmt == 0)
> + return;
> + elmt->e = e;
> +}
> +
> +int
> +main ()
> +{
> + table = (struct wrapper_t **) malloc (20 * sizeof (struct wrapper_t *));
> + for (int i = 0; i < 20; i++)
> + table[i] = (struct wrapper_t *) calloc (sizeof (struct wrapper_t), 1);
> + if (table[10]->e != 0)
> + abort ();
> + update (10, 20);
> + if (table[10]->e != 20)
> + abort ();
> + return 0;
> +}
> --
> 2.33.1
>
On Sun, Dec 26, 2021 at 2:31 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Sat, Dec 25, 2021 at 3:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > When applying peephole optimization to transform
> >
> > mov imm, %reg0
> > mov %reg1, %AX_REG
> > imul %reg0
> >
> > to
> >
> > mov imm, %AX_REG
> > imul %reg1
> >
> > disable peephole optimization if reg1 == AX_REG.
> >
> > gcc/
> >
> > PR bootstrap/103785
> > * config/i386/i386.md: Swap operand order in comments and check
> > AX input in any_mul_highpart peepholes.
> >
> > gcc/testsuite/
> >
> > PR bootstrap/103785
> > * gcc.target/i386/pr103785.c: New test.
> > ---
> > gcc/config/i386/i386.md | 9 ++++--
> > gcc/testsuite/gcc.target/i386/pr103785.c | 38 ++++++++++++++++++++++++
> > 2 files changed, 44 insertions(+), 3 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 284b9507466..1c1ec227f71 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -8578,7 +8578,7 @@
> > (set_attr "mode" "SI")])
> >
> > ;; Highpart multiplication peephole2s to tweak register allocation.
> > -;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx -> mov %rax,imm; imulq %rdi
> > +;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
> > (define_peephole2
> > [(set (match_operand:SWI48 0 "general_reg_operand")
> > (match_operand:SWI48 1 "immediate_operand"))
> > @@ -8588,7 +8588,8 @@
> > (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
> > (clobber (match_dup 2))
> > (clobber (reg:CC FLAGS_REG))])]
> > - "REGNO (operands[0]) != REGNO (operands[2])
> > + "REGNO (operands[3]) != AX_REG
> > + && REGNO (operands[0]) != REGNO (operands[2])
> > && REGNO (operands[0]) != REGNO (operands[3])
> > && (REGNO (operands[0]) == REGNO (operands[4])
> > || peep2_reg_dead_p (3, operands[0]))"
> > @@ -8608,7 +8609,9 @@
> > (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
> > (clobber (match_dup 2))
> > (clobber (reg:CC FLAGS_REG))])]
> > - "REGNO (operands[0]) != REGNO (operands[2])
> > + "REGNO (operands[3]) != AX_REG
>
> Please also add TARGET_64BIT here, this DImode zero-extend pattern
> applies to 64bit targets only.
>
> OK with the above change.
This is the patch I am checking in.
Thanks.
> Thanks,
> Uros.
>
> > + && REGNO (operands[0]) != REGNO (operands[2])
> > + && REGNO (operands[2]) != REGNO (operands[3])
> > && REGNO (operands[0]) != REGNO (operands[3])
> > && (REGNO (operands[0]) == REGNO (operands[4])
> > || peep2_reg_dead_p (3, operands[0]))"
> > diff --git a/gcc/testsuite/gcc.target/i386/pr103785.c b/gcc/testsuite/gcc.target/i386/pr103785.c
> > new file mode 100644
> > index 00000000000..5503b965256
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr103785.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdlib.h>
> > +
> > +struct wrapper_t
> > +{
> > + long k;
> > + long e;
> > +};
> > +
> > +struct wrapper_t **table;
> > +
> > +__attribute__ ((weak, regparm (2)))
> > +void
> > +update (long k, long e)
> > +{
> > + struct wrapper_t *elmt;
> > +
> > + elmt = table[k % 3079];
> > + if (elmt == 0)
> > + return;
> > + elmt->e = e;
> > +}
> > +
> > +int
> > +main ()
> > +{
> > + table = (struct wrapper_t **) malloc (20 * sizeof (struct wrapper_t *));
> > + for (int i = 0; i < 20; i++)
> > + table[i] = (struct wrapper_t *) calloc (sizeof (struct wrapper_t), 1);
> > + if (table[10]->e != 0)
> > + abort ();
> > + update (10, 20);
> > + if (table[10]->e != 20)
> > + abort ();
> > + return 0;
> > +}
> > --
> > 2.33.1
> >
@@ -8578,7 +8578,7 @@
(set_attr "mode" "SI")])
;; Highpart multiplication peephole2s to tweak register allocation.
-;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx -> mov %rax,imm; imulq %rdi
+;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
(define_peephole2
[(set (match_operand:SWI48 0 "general_reg_operand")
(match_operand:SWI48 1 "immediate_operand"))
@@ -8588,7 +8588,8 @@
(any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
(clobber (match_dup 2))
(clobber (reg:CC FLAGS_REG))])]
- "REGNO (operands[0]) != REGNO (operands[2])
+ "REGNO (operands[3]) != AX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
&& REGNO (operands[0]) != REGNO (operands[3])
&& (REGNO (operands[0]) == REGNO (operands[4])
|| peep2_reg_dead_p (3, operands[0]))"
@@ -8608,7 +8609,9 @@
(any_mul_highpart:SI (match_dup 2) (match_dup 0))))
(clobber (match_dup 2))
(clobber (reg:CC FLAGS_REG))])]
- "REGNO (operands[0]) != REGNO (operands[2])
+ "REGNO (operands[3]) != AX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
+ && REGNO (operands[2]) != REGNO (operands[3])
&& REGNO (operands[0]) != REGNO (operands[3])
&& (REGNO (operands[0]) == REGNO (operands[4])
|| peep2_reg_dead_p (3, operands[0]))"
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <stdlib.h>
+
+struct wrapper_t
+{
+ long k;
+ long e;
+};
+
+struct wrapper_t **table;
+
+__attribute__ ((weak, regparm (2)))
+void
+update (long k, long e)
+{
+ struct wrapper_t *elmt;
+
+ elmt = table[k % 3079];
+ if (elmt == 0)
+ return;
+ elmt->e = e;
+}
+
+int
+main ()
+{
+ table = (struct wrapper_t **) malloc (20 * sizeof (struct wrapper_t *));
+ for (int i = 0; i < 20; i++)
+ table[i] = (struct wrapper_t *) calloc (sizeof (struct wrapper_t), 1);
+ if (table[10]->e != 0)
+ abort ();
+ update (10, 20);
+ if (table[10]->e != 20)
+ abort ();
+ return 0;
+}