diff mbox series

[v1] RTL: Bugfix ICE after allow vector type in DSE

Message ID	20240226032558.587912-1-pan2.li@intel.com
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 0DE8B3858D1E From: pan2.li@intel.com To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, richard.guenther@gmail.com, yanzhang.wang@intel.com, rdapp.gcc@gmail.com, Pan Li <pan2.li@intel.com> Subject: [PATCH v1] RTL: Bugfix ICE after allow vector type in DSE Date: Mon, 26 Feb 2024 11:25:58 +0800 Message-Id: <20240226032558.587912-1-pan2.li@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Series	[v1] RTL: Bugfix ICE after allow vector type in DSE \| [v1] RTL: Bugfix ICE after allow vector type in DSE

Checks

Context	Check	Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm	success	Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64	success	Testing passed

Commit Message

Li, Pan2 Feb. 26, 2024, 3:25 a.m. UTC

  From: Pan Li <pan2.li@intel.com>

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately, we missed to adjust the
validate_subreg part accordingly.  For vector type, we don't need to
restrict the mode size is greater than the vector register size.

Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
will have NULL_RTX(of course ICE after that) because of the mode size
is less than vector register size.  That also explain that gen_lowpart
from E_V8SFmode to E_V16QImode is valid here.

This patch would like to remove the the restriction for vector mode, to
rid of the ICE when gen_lowpart because of validate_subreg fails.

The below test are passed for this patch:

* The X86 bootstrap test.
* The fully riscv regression tests.

gcc/ChangeLog:

	* emit-rtl.cc (validate_subreg): Bypass register size check
	if the mode is vector.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
	the ICE.
	* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/emit-rtl.cc                               |  3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
 .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

Comments

Hongtao Liu Feb. 26, 2024, 3:41 a.m. UTC | #1

On Mon, Feb 26, 2024 at 11:26 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  For vector type, we don't need to
> restrict the mode size is greater than the vector register size.
>
> Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
> will have NULL_RTX(of course ICE after that) because of the mode size
> is less than vector register size.  That also explain that gen_lowpart
> from E_V8SFmode to E_V16QImode is valid here.
>
> This patch would like to remove the the restriction for vector mode, to
> rid of the ICE when gen_lowpart because of validate_subreg fails.
Be Careful, It may regresses some other backend.
>
> The below test are passed for this patch:
>
> * The X86 bootstrap test.
> * The fully riscv regression tests.
>
> gcc/ChangeLog:
>
>         * emit-rtl.cc (validate_subreg): Bypass register size check
>         if the mode is vector.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
>         the ICE.
>         * gcc.target/riscv/rvv/base/bug-6.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/emit-rtl.cc                               |  3 ++-
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>  3 files changed, 25 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
>
> diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> index 1856fa4884f..45c6301b487 100644
> --- a/gcc/emit-rtl.cc
> +++ b/gcc/emit-rtl.cc
> @@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
>      ;
>    /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
>       is the culprit here, and not the backends.  */
> -  else if (known_ge (osize, regsize) && known_ge (isize, osize))
> +  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
> +    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
>      ;
>    /* Allow component subregs of complex and vector.  Though given the below
>       extraction rules, it's not always clear what that means.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>
>  struct A { float x, y; };
>  struct B { struct A u; };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> new file mode 100644
> index 00000000000..5bb00b8f587
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> @@ -0,0 +1,22 @@
> +/* Test that we do not have ice when compile */
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> +
> +struct A { float x, y; };
> +struct B { struct A u; };
> +
> +extern void bar (struct A *);
> +
> +float
> +f3 (struct B *x, int y)
> +{
> +  struct A p = {1.0f, 2.0f};
> +  struct A *q = &x[y].u;
> +
> +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> +
> +  bar (&p);
> +
> +  return x[y].u.x + x[y].u.y;
> +}
> --
> 2.34.1
>

Li, Pan2 Feb. 26, 2024, 3:42 a.m. UTC | #2

> Be Careful, It may regresses some other backend.

Thanks Hongtao, how about take INNER_MODE here for regsize. Currently it will be the whole vector register when comparation.

poly_uint64 regsize = REGMODE_NATURAL_SIZE (imode);

Pan

-----Original Message-----
From: Hongtao Liu <crazylht@gmail.com> 
Sent: Monday, February 26, 2024 11:41 AM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RTL: Bugfix ICE after allow vector type in DSE

On Mon, Feb 26, 2024 at 11:26 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  For vector type, we don't need to
> restrict the mode size is greater than the vector register size.
>
> Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
> will have NULL_RTX(of course ICE after that) because of the mode size
> is less than vector register size.  That also explain that gen_lowpart
> from E_V8SFmode to E_V16QImode is valid here.
>
> This patch would like to remove the the restriction for vector mode, to
> rid of the ICE when gen_lowpart because of validate_subreg fails.
Be Careful, It may regresses some other backend.
>
> The below test are passed for this patch:
>
> * The X86 bootstrap test.
> * The fully riscv regression tests.
>
> gcc/ChangeLog:
>
>         * emit-rtl.cc (validate_subreg): Bypass register size check
>         if the mode is vector.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
>         the ICE.
>         * gcc.target/riscv/rvv/base/bug-6.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/emit-rtl.cc                               |  3 ++-
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>  3 files changed, 25 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
>
> diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> index 1856fa4884f..45c6301b487 100644
> --- a/gcc/emit-rtl.cc
> +++ b/gcc/emit-rtl.cc
> @@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
>      ;
>    /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
>       is the culprit here, and not the backends.  */
> -  else if (known_ge (osize, regsize) && known_ge (isize, osize))
> +  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
> +    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
>      ;
>    /* Allow component subregs of complex and vector.  Though given the below
>       extraction rules, it's not always clear what that means.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>
>  struct A { float x, y; };
>  struct B { struct A u; };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> new file mode 100644
> index 00000000000..5bb00b8f587
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> @@ -0,0 +1,22 @@
> +/* Test that we do not have ice when compile */
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> +
> +struct A { float x, y; };
> +struct B { struct A u; };
> +
> +extern void bar (struct A *);
> +
> +float
> +f3 (struct B *x, int y)
> +{
> +  struct A p = {1.0f, 2.0f};
> +  struct A *q = &x[y].u;
> +
> +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> +
> +  bar (&p);
> +
> +  return x[y].u.x + x[y].u.y;
> +}
> --
> 2.34.1
>


-- 
BR,
Hongtao

Hongtao Liu Feb. 26, 2024, 5:13 a.m. UTC | #3

On Mon, Feb 26, 2024 at 11:42 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > Be Careful, It may regresses some other backend.
>
> Thanks Hongtao, how about take INNER_MODE here for regsize. Currently it will be the whole vector register when comparation.
>
> poly_uint64 regsize = REGMODE_NATURAL_SIZE (imode);
>
> Pan
>
> -----Original Message-----
> From: Hongtao Liu <crazylht@gmail.com>
> Sent: Monday, February 26, 2024 11:41 AM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] RTL: Bugfix ICE after allow vector type in DSE
>
> On Mon, Feb 26, 2024 at 11:26 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > We allowed vector type for get_stored_val when read is less than or
> > equal to store in previous.  Unfortunately, we missed to adjust the
> > validate_subreg part accordingly.  For vector type, we don't need to
> > restrict the mode size is greater than the vector register size.
> >
> > Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
> > will have NULL_RTX(of course ICE after that) because of the mode size
> > is less than vector register size.  That also explain that gen_lowpart
> > from E_V8SFmode to E_V16QImode is valid here.
> >
> > This patch would like to remove the the restriction for vector mode, to
> > rid of the ICE when gen_lowpart because of validate_subreg fails.
> Be Careful, It may regresses some other backend.
The related thread.
https://gcc.gnu.org/pipermail/gcc-patches/2021-August/578466.html
> >
> > The below test are passed for this patch:
> >
> > * The X86 bootstrap test.
> > * The fully riscv regression tests.
> >
> > gcc/ChangeLog:
> >
> >         * emit-rtl.cc (validate_subreg): Bypass register size check
> >         if the mode is vector.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
> >         the ICE.
> >         * gcc.target/riscv/rvv/base/bug-6.c: New test.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/emit-rtl.cc                               |  3 ++-
> >  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
> >  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
> >  3 files changed, 25 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> >
> > diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> > index 1856fa4884f..45c6301b487 100644
> > --- a/gcc/emit-rtl.cc
> > +++ b/gcc/emit-rtl.cc
> > @@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
> >      ;
> >    /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
> >       is the culprit here, and not the backends.  */
> > -  else if (known_ge (osize, regsize) && known_ge (isize, osize))
> > +  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
> > +    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
> >      ;
> >    /* Allow component subregs of complex and vector.  Though given the below
> >       extraction rules, it's not always clear what that means.  */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> > index f79b4c142ae..624a00a4f32 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O -fdump-tree-fre1" } */
> > +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
> >
> >  struct A { float x, y; };
> >  struct B { struct A u; };
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> > new file mode 100644
> > index 00000000000..5bb00b8f587
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> > @@ -0,0 +1,22 @@
> > +/* Test that we do not have ice when compile */
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> > +
> > +struct A { float x, y; };
> > +struct B { struct A u; };
> > +
> > +extern void bar (struct A *);
> > +
> > +float
> > +f3 (struct B *x, int y)
> > +{
> > +  struct A p = {1.0f, 2.0f};
> > +  struct A *q = &x[y].u;
> > +
> > +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> > +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> > +
> > +  bar (&p);
> > +
> > +  return x[y].u.x + x[y].u.y;
> > +}
> > --
> > 2.34.1
> >
>
>
> --
> BR,
> Hongtao



--
BR,
Hongtao

Richard Biener Feb. 26, 2024, 7:38 a.m. UTC | #4

On Mon, Feb 26, 2024 at 4:26 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  For vector type, we don't need to
> restrict the mode size is greater than the vector register size.
>
> Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
> will have NULL_RTX(of course ICE after that) because of the mode size
> is less than vector register size.  That also explain that gen_lowpart
> from E_V8SFmode to E_V16QImode is valid here.
>
> This patch would like to remove the the restriction for vector mode, to
> rid of the ICE when gen_lowpart because of validate_subreg fails.

validate_subreg is a can of worms, can you try to fix the issue in DSE
by avoiding to form the subreg in the first place?

> The below test are passed for this patch:
>
> * The X86 bootstrap test.
> * The fully riscv regression tests.
>
> gcc/ChangeLog:
>
>         * emit-rtl.cc (validate_subreg): Bypass register size check
>         if the mode is vector.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
>         the ICE.
>         * gcc.target/riscv/rvv/base/bug-6.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/emit-rtl.cc                               |  3 ++-
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>  3 files changed, 25 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
>
> diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> index 1856fa4884f..45c6301b487 100644
> --- a/gcc/emit-rtl.cc
> +++ b/gcc/emit-rtl.cc
> @@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
>      ;
>    /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
>       is the culprit here, and not the backends.  */
> -  else if (known_ge (osize, regsize) && known_ge (isize, osize))
> +  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
> +    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
>      ;
>    /* Allow component subregs of complex and vector.  Though given the below
>       extraction rules, it's not always clear what that means.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>
>  struct A { float x, y; };
>  struct B { struct A u; };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> new file mode 100644
> index 00000000000..5bb00b8f587
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> @@ -0,0 +1,22 @@
> +/* Test that we do not have ice when compile */
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> +
> +struct A { float x, y; };
> +struct B { struct A u; };
> +
> +extern void bar (struct A *);
> +
> +float
> +f3 (struct B *x, int y)
> +{
> +  struct A p = {1.0f, 2.0f};
> +  struct A *q = &x[y].u;
> +
> +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> +
> +  bar (&p);
> +
> +  return x[y].u.x + x[y].u.y;
> +}
> --
> 2.34.1
>

Li, Pan2 Feb. 26, 2024, 7:41 a.m. UTC | #5

> validate_subreg is a can of worms, can you try to fix the issue in DSE
> by avoiding to form the subreg in the first place?

Sure thing, will have a try in v2.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Monday, February 26, 2024 3:38 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RTL: Bugfix ICE after allow vector type in DSE

On Mon, Feb 26, 2024 at 4:26 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  For vector type, we don't need to
> restrict the mode size is greater than the vector register size.
>
> Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
> will have NULL_RTX(of course ICE after that) because of the mode size
> is less than vector register size.  That also explain that gen_lowpart
> from E_V8SFmode to E_V16QImode is valid here.
>
> This patch would like to remove the the restriction for vector mode, to
> rid of the ICE when gen_lowpart because of validate_subreg fails.

validate_subreg is a can of worms, can you try to fix the issue in DSE
by avoiding to form the subreg in the first place?

> The below test are passed for this patch:
>
> * The X86 bootstrap test.
> * The fully riscv regression tests.
>
> gcc/ChangeLog:
>
>         * emit-rtl.cc (validate_subreg): Bypass register size check
>         if the mode is vector.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
>         the ICE.
>         * gcc.target/riscv/rvv/base/bug-6.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/emit-rtl.cc                               |  3 ++-
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>  3 files changed, 25 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
>
> diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> index 1856fa4884f..45c6301b487 100644
> --- a/gcc/emit-rtl.cc
> +++ b/gcc/emit-rtl.cc
> @@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
>      ;
>    /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
>       is the culprit here, and not the backends.  */
> -  else if (known_ge (osize, regsize) && known_ge (isize, osize))
> +  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
> +    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
>      ;
>    /* Allow component subregs of complex and vector.  Though given the below
>       extraction rules, it's not always clear what that means.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>
>  struct A { float x, y; };
>  struct B { struct A u; };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> new file mode 100644
> index 00000000000..5bb00b8f587
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> @@ -0,0 +1,22 @@
> +/* Test that we do not have ice when compile */
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> +
> +struct A { float x, y; };
> +struct B { struct A u; };
> +
> +extern void bar (struct A *);
> +
> +float
> +f3 (struct B *x, int y)
> +{
> +  struct A p = {1.0f, 2.0f};
> +  struct A *q = &x[y].u;
> +
> +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> +
> +  bar (&p);
> +
> +  return x[y].u.x + x[y].u.y;
> +}
> --
> 2.34.1
>

diff mbox series

Patch

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 1856fa4884f..45c6301b487 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -934,7 +934,8 @@  validate_subreg (machine_mode omode, machine_mode imode,
     ;
   /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
      is the culprit here, and not the backends.  */
-  else if (known_ge (osize, regsize) && known_ge (isize, osize))
+  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
+    || (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode))))
     ;
   /* Allow component subregs of complex and vector.  Though given the below
      extraction rules, it's not always clear what that means.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
index f79b4c142ae..624a00a4f32 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
 
 struct A { float x, y; };
 struct B { struct A u; };
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 00000000000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@ 
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = &x[y].u;
+
+  __builtin_memcpy (&q->x, &p.x, sizeof (float));
+  __builtin_memcpy (&q->y, &p.y, sizeof (float));
+
+  bar (&p);
+
+  return x[y].u.x + x[y].u.y;
+}