[aarch64] PR107920 - Fix incorrect handling of virtual operands in svld1rq_impl::fold

Message ID CAAgBjMnaLY=sigq_+fXpBZ++UpEw4AD_XdNL4H-1Gy4Knp+cAw@mail.gmail.com
State Committed
Headers
Series [aarch64] PR107920 - Fix incorrect handling of virtual operands in svld1rq_impl::fold |

Commit Message

Prathamesh Kulkarni Dec. 2, 2022, 7:21 a.m. UTC
  Hi,
The following test:

#include "arm_sve.h"

svint8_t
test_s8(int8_t *x)
{
  return svld1rq_s8 (svptrue_b8 (), &x[0]);
}

ICE's with -march=armv8.2-a+sve -O1 -fno-tree-ccp -fno-tree-forwprop:
during GIMPLE pass: fre
pr107920.c: In function ‘test_s8’:
pr107920.c:7:1: internal compiler error: in execute_todo, at passes.cc:2140
    7 | }
      | ^
0x7b03d0 execute_todo
        ../../gcc/gcc/passes.cc:2140

because of incorrect handling of virtual operands in svld1rq_impl::fold:
 # VUSE <.MEM>
  _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
  _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, ... }>;
  # VUSE <.MEM_2(D)>
  return _4;

The attached patch tries to fix the issue by building the replacement
statements in gimple_seq, and passing it to gsi_replace_with_seq_vops,
which resolves the ICE, and results in:
  <bb 2> :
  # VUSE <.MEM_2(D)>
  _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
  _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, ... }>;
  # VUSE <.MEM_2(D)>
  return _4;

Bootstrapped+tested on aarch64-linux-gnu.
OK to commit ?

Thanks,
Prathamesh
  

Comments

Richard Sandiford Dec. 5, 2022, 6:38 p.m. UTC | #1
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> Hi,
> The following test:
>
> #include "arm_sve.h"
>
> svint8_t
> test_s8(int8_t *x)
> {
>   return svld1rq_s8 (svptrue_b8 (), &x[0]);
> }
>
> ICE's with -march=armv8.2-a+sve -O1 -fno-tree-ccp -fno-tree-forwprop:
> during GIMPLE pass: fre
> pr107920.c: In function ‘test_s8’:
> pr107920.c:7:1: internal compiler error: in execute_todo, at passes.cc:2140
>     7 | }
>       | ^
> 0x7b03d0 execute_todo
>         ../../gcc/gcc/passes.cc:2140
>
> because of incorrect handling of virtual operands in svld1rq_impl::fold:
>  # VUSE <.MEM>
>   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
>   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
> 12, 13, 14, 15, ... }>;
>   # VUSE <.MEM_2(D)>
>   return _4;
>
> The attached patch tries to fix the issue by building the replacement
> statements in gimple_seq, and passing it to gsi_replace_with_seq_vops,
> which resolves the ICE, and results in:
>   <bb 2> :
>   # VUSE <.MEM_2(D)>
>   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
>   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
> 12, 13, 14, 15, ... }>;
>   # VUSE <.MEM_2(D)>
>   return _4;
>
> Bootstrapped+tested on aarch64-linux-gnu.
> OK to commit ?

Looks good, but we also need to deal with the -fnon-call-exceptions
point that Andrew made in the PR trail.  An easy way of testing
would be to make sure that:

#include "arm_sve.h"

svint8_t
test_s8(int8_t *x)
{
  try
    {
      return svld1rq_s8 (svptrue_b8 (), &x[0]);
    }
  catch (...)
    {
      return svdup_s8 (1);
    }
}

compiled with -fnon-call-exceptions still has a call to __cxa_begin_catch.

I don't think it's worth optimising this case.  Let's just add
!flag_non_call_exceptions to the test.

The patch is missing a changelog btw.

Thanks,
Richard

> Thanks,
> Prathamesh
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 6347407555f..f5546a65d22 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -45,6 +45,7 @@
>  #include "aarch64-sve-builtins-base.h"
>  #include "aarch64-sve-builtins-functions.h"
>  #include "ssa.h"
> +#include "gimple-fold.h"
>  
>  using namespace aarch64_sve;
>  
> @@ -1232,7 +1233,9 @@ public:
>  	tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
>  	gimple *mem_ref_stmt
>  	  = gimple_build_assign (mem_ref_lhs, mem_ref_op);
> -	gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
> +
> +	gimple_seq stmts = NULL;
> +	gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
>  
>  	int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
>  	vec_perm_builder sel (lhs_len, source_nelts, 1);
> @@ -1245,8 +1248,11 @@ public:
>  						   indices));
>  	tree mask_type = build_vector_type (ssizetype, lhs_len);
>  	tree mask = vec_perm_indices_to_tree (mask_type, indices);
> -	return gimple_build_assign (lhs, VEC_PERM_EXPR,
> -				    mem_ref_lhs, mem_ref_lhs, mask);
> +	gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
> +					  mem_ref_lhs, mem_ref_lhs, mask);
> +	gimple_seq_add_stmt_without_update (&stmts, g2);
> +	gsi_replace_with_seq_vops (f.gsi, stmts);
> +	return g2;
>        }
>  
>      return NULL;
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index c2d9c806aee..03cdb2f9f49 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -591,7 +591,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
>     If the statement has a lhs the last stmt in the sequence is expected
>     to assign to that lhs.  */
>  
> -static void
> +void
>  gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
>  {
>    gimple *stmt = gsi_stmt (*si_p);
> diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> index 7d29ee9a9a4..87ed4e56d25 100644
> --- a/gcc/gimple-fold.h
> +++ b/gcc/gimple-fold.h
> @@ -63,6 +63,7 @@ extern bool arith_code_with_undefined_signed_overflow (tree_code);
>  extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
>  extern void replace_call_with_value (gimple_stmt_iterator *, tree);
>  extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
> +extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
>  
>  /* gimple_build, functionally matching fold_buildN, outputs stmts
>     int the provided sequence, matching and simplifying them on-the-fly.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> new file mode 100644
> index 00000000000..11448ed5e68
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
> +
> +#include "arm_sve.h"
> +
> +svint8_t
> +test_s8(int8_t *x)
> +{
> +  return svld1rq_s8 (svptrue_b8 (), &x[0]);
> +}
  
Prathamesh Kulkarni Dec. 6, 2022, 2:13 a.m. UTC | #2
On Tue, 6 Dec 2022 at 00:08, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > Hi,
> > The following test:
> >
> > #include "arm_sve.h"
> >
> > svint8_t
> > test_s8(int8_t *x)
> > {
> >   return svld1rq_s8 (svptrue_b8 (), &x[0]);
> > }
> >
> > ICE's with -march=armv8.2-a+sve -O1 -fno-tree-ccp -fno-tree-forwprop:
> > during GIMPLE pass: fre
> > pr107920.c: In function ‘test_s8’:
> > pr107920.c:7:1: internal compiler error: in execute_todo, at passes.cc:2140
> >     7 | }
> >       | ^
> > 0x7b03d0 execute_todo
> >         ../../gcc/gcc/passes.cc:2140
> >
> > because of incorrect handling of virtual operands in svld1rq_impl::fold:
> >  # VUSE <.MEM>
> >   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
> >   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
> > 12, 13, 14, 15, ... }>;
> >   # VUSE <.MEM_2(D)>
> >   return _4;
> >
> > The attached patch tries to fix the issue by building the replacement
> > statements in gimple_seq, and passing it to gsi_replace_with_seq_vops,
> > which resolves the ICE, and results in:
> >   <bb 2> :
> >   # VUSE <.MEM_2(D)>
> >   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
> >   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
> > 12, 13, 14, 15, ... }>;
> >   # VUSE <.MEM_2(D)>
> >   return _4;
> >
> > Bootstrapped+tested on aarch64-linux-gnu.
> > OK to commit ?
>
> Looks good, but we also need to deal with the -fnon-call-exceptions
> point that Andrew made in the PR trail.  An easy way of testing
> would be to make sure that:
>
> #include "arm_sve.h"
>
> svint8_t
> test_s8(int8_t *x)
> {
>   try
>     {
>       return svld1rq_s8 (svptrue_b8 (), &x[0]);
>     }
>   catch (...)
>     {
>       return svdup_s8 (1);
>     }
> }
>
> compiled with -fnon-call-exceptions still has a call to __cxa_begin_catch.
>
> I don't think it's worth optimising this case.  Let's just add
> !flag_non_call_exceptions to the test.
>
> The patch is missing a changelog btw.
Thanks for the suggestions. Is the attached patch OK to commit after
bootstrap+test ?

Thanks,
Prathamesh
>
> Thanks,
> Richard
>
> > Thanks,
> > Prathamesh
> >
> > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > index 6347407555f..f5546a65d22 100644
> > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > @@ -45,6 +45,7 @@
> >  #include "aarch64-sve-builtins-base.h"
> >  #include "aarch64-sve-builtins-functions.h"
> >  #include "ssa.h"
> > +#include "gimple-fold.h"
> >
> >  using namespace aarch64_sve;
> >
> > @@ -1232,7 +1233,9 @@ public:
> >       tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
> >       gimple *mem_ref_stmt
> >         = gimple_build_assign (mem_ref_lhs, mem_ref_op);
> > -     gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
> > +
> > +     gimple_seq stmts = NULL;
> > +     gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
> >
> >       int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
> >       vec_perm_builder sel (lhs_len, source_nelts, 1);
> > @@ -1245,8 +1248,11 @@ public:
> >                                                  indices));
> >       tree mask_type = build_vector_type (ssizetype, lhs_len);
> >       tree mask = vec_perm_indices_to_tree (mask_type, indices);
> > -     return gimple_build_assign (lhs, VEC_PERM_EXPR,
> > -                                 mem_ref_lhs, mem_ref_lhs, mask);
> > +     gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
> > +                                       mem_ref_lhs, mem_ref_lhs, mask);
> > +     gimple_seq_add_stmt_without_update (&stmts, g2);
> > +     gsi_replace_with_seq_vops (f.gsi, stmts);
> > +     return g2;
> >        }
> >
> >      return NULL;
> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> > index c2d9c806aee..03cdb2f9f49 100644
> > --- a/gcc/gimple-fold.cc
> > +++ b/gcc/gimple-fold.cc
> > @@ -591,7 +591,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
> >     If the statement has a lhs the last stmt in the sequence is expected
> >     to assign to that lhs.  */
> >
> > -static void
> > +void
> >  gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
> >  {
> >    gimple *stmt = gsi_stmt (*si_p);
> > diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> > index 7d29ee9a9a4..87ed4e56d25 100644
> > --- a/gcc/gimple-fold.h
> > +++ b/gcc/gimple-fold.h
> > @@ -63,6 +63,7 @@ extern bool arith_code_with_undefined_signed_overflow (tree_code);
> >  extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
> >  extern void replace_call_with_value (gimple_stmt_iterator *, tree);
> >  extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
> > +extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
> >
> >  /* gimple_build, functionally matching fold_buildN, outputs stmts
> >     int the provided sequence, matching and simplifying them on-the-fly.
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> > new file mode 100644
> > index 00000000000..11448ed5e68
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
> > +
> > +#include "arm_sve.h"
> > +
> > +svint8_t
> > +test_s8(int8_t *x)
> > +{
> > +  return svld1rq_s8 (svptrue_b8 (), &x[0]);
> > +}
gcc/ChangeLog:
	PR target/107920
	* config/aarch64/aarch64-sve-builtins-base.cc: Use
	gsi_replace_with_seq_vops to handle virtual operands, and gate
	the transform on !flag_non_call_exceptions.
	* gimple-fold.cc (gsi_replace_with_seq_vops): Make function non static.
	* gimple-fold.h (gsi_replace_with_seq_vops): Declare.

gcc/testsuite/ChangeLog:
	PR target/107920
	* gcc.target/aarch64/sve/acle/general/pr107920.c: New test.

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 6347407555f..d52ec083ed0 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -45,6 +45,7 @@
 #include "aarch64-sve-builtins-base.h"
 #include "aarch64-sve-builtins-functions.h"
 #include "ssa.h"
+#include "gimple-fold.h"
 
 using namespace aarch64_sve;
 
@@ -1209,7 +1210,8 @@ public:
        vectype is the corresponding ADVSIMD type.  */
 
     if (!BYTES_BIG_ENDIAN
-	&& integer_all_onesp (arg0))
+	&& integer_all_onesp (arg0)
+	&& !flag_non_call_exceptions)
       {
 	tree lhs = gimple_call_lhs (f.call);
 	tree lhs_type = TREE_TYPE (lhs);
@@ -1232,7 +1234,9 @@ public:
 	tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
 	gimple *mem_ref_stmt
 	  = gimple_build_assign (mem_ref_lhs, mem_ref_op);
-	gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
+
+	gimple_seq stmts = NULL;
+	gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
 
 	int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
 	vec_perm_builder sel (lhs_len, source_nelts, 1);
@@ -1245,8 +1249,11 @@ public:
 						   indices));
 	tree mask_type = build_vector_type (ssizetype, lhs_len);
 	tree mask = vec_perm_indices_to_tree (mask_type, indices);
-	return gimple_build_assign (lhs, VEC_PERM_EXPR,
-				    mem_ref_lhs, mem_ref_lhs, mask);
+	gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
+					  mem_ref_lhs, mem_ref_lhs, mask);
+	gimple_seq_add_stmt_without_update (&stmts, g2);
+	gsi_replace_with_seq_vops (f.gsi, stmts);
+	return g2;
       }
 
     return NULL;
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index c2d9c806aee..03cdb2f9f49 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -591,7 +591,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
    If the statement has a lhs the last stmt in the sequence is expected
    to assign to that lhs.  */
 
-static void
+void
 gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
 {
   gimple *stmt = gsi_stmt (*si_p);
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 7d29ee9a9a4..87ed4e56d25 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -63,6 +63,7 @@ extern bool arith_code_with_undefined_signed_overflow (tree_code);
 extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
 extern void replace_call_with_value (gimple_stmt_iterator *, tree);
 extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
+extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
 
 /* gimple_build, functionally matching fold_buildN, outputs stmts
    int the provided sequence, matching and simplifying them on-the-fly.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
new file mode 100644
index 00000000000..11448ed5e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
+
+#include "arm_sve.h"
+
+svint8_t
+test_s8(int8_t *x)
+{
+  return svld1rq_s8 (svptrue_b8 (), &x[0]);
+}
  
Richard Sandiford Dec. 6, 2022, 7:25 a.m. UTC | #3
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> On Tue, 6 Dec 2022 at 00:08, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
>> > Hi,
>> > The following test:
>> >
>> > #include "arm_sve.h"
>> >
>> > svint8_t
>> > test_s8(int8_t *x)
>> > {
>> >   return svld1rq_s8 (svptrue_b8 (), &x[0]);
>> > }
>> >
>> > ICE's with -march=armv8.2-a+sve -O1 -fno-tree-ccp -fno-tree-forwprop:
>> > during GIMPLE pass: fre
>> > pr107920.c: In function ‘test_s8’:
>> > pr107920.c:7:1: internal compiler error: in execute_todo, at passes.cc:2140
>> >     7 | }
>> >       | ^
>> > 0x7b03d0 execute_todo
>> >         ../../gcc/gcc/passes.cc:2140
>> >
>> > because of incorrect handling of virtual operands in svld1rq_impl::fold:
>> >  # VUSE <.MEM>
>> >   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
>> >   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
>> > 12, 13, 14, 15, ... }>;
>> >   # VUSE <.MEM_2(D)>
>> >   return _4;
>> >
>> > The attached patch tries to fix the issue by building the replacement
>> > statements in gimple_seq, and passing it to gsi_replace_with_seq_vops,
>> > which resolves the ICE, and results in:
>> >   <bb 2> :
>> >   # VUSE <.MEM_2(D)>
>> >   _5 = MEM <vector(16) signed char> [(signed char * {ref-all})x_3(D)];
>> >   _4 = VEC_PERM_EXPR <_5, _5, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
>> > 12, 13, 14, 15, ... }>;
>> >   # VUSE <.MEM_2(D)>
>> >   return _4;
>> >
>> > Bootstrapped+tested on aarch64-linux-gnu.
>> > OK to commit ?
>>
>> Looks good, but we also need to deal with the -fnon-call-exceptions
>> point that Andrew made in the PR trail.  An easy way of testing
>> would be to make sure that:
>>
>> #include "arm_sve.h"
>>
>> svint8_t
>> test_s8(int8_t *x)
>> {
>>   try
>>     {
>>       return svld1rq_s8 (svptrue_b8 (), &x[0]);
>>     }
>>   catch (...)
>>     {
>>       return svdup_s8 (1);
>>     }
>> }
>>
>> compiled with -fnon-call-exceptions still has a call to __cxa_begin_catch.
>>
>> I don't think it's worth optimising this case.  Let's just add
>> !flag_non_call_exceptions to the test.
>>
>> The patch is missing a changelog btw.
> Thanks for the suggestions. Is the attached patch OK to commit after
> bootstrap+test ?

Please add the test above to g++.target/aarch64/sve, with a scan-assembler
for __cxa_begin_catch.  OK with that change, thanks.

Richard

>
> Thanks,
> Prathamesh
>>
>> Thanks,
>> Richard
>>
>> > Thanks,
>> > Prathamesh
>> >
>> > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > index 6347407555f..f5546a65d22 100644
>> > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > @@ -45,6 +45,7 @@
>> >  #include "aarch64-sve-builtins-base.h"
>> >  #include "aarch64-sve-builtins-functions.h"
>> >  #include "ssa.h"
>> > +#include "gimple-fold.h"
>> >
>> >  using namespace aarch64_sve;
>> >
>> > @@ -1232,7 +1233,9 @@ public:
>> >       tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
>> >       gimple *mem_ref_stmt
>> >         = gimple_build_assign (mem_ref_lhs, mem_ref_op);
>> > -     gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
>> > +
>> > +     gimple_seq stmts = NULL;
>> > +     gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
>> >
>> >       int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
>> >       vec_perm_builder sel (lhs_len, source_nelts, 1);
>> > @@ -1245,8 +1248,11 @@ public:
>> >                                                  indices));
>> >       tree mask_type = build_vector_type (ssizetype, lhs_len);
>> >       tree mask = vec_perm_indices_to_tree (mask_type, indices);
>> > -     return gimple_build_assign (lhs, VEC_PERM_EXPR,
>> > -                                 mem_ref_lhs, mem_ref_lhs, mask);
>> > +     gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
>> > +                                       mem_ref_lhs, mem_ref_lhs, mask);
>> > +     gimple_seq_add_stmt_without_update (&stmts, g2);
>> > +     gsi_replace_with_seq_vops (f.gsi, stmts);
>> > +     return g2;
>> >        }
>> >
>> >      return NULL;
>> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
>> > index c2d9c806aee..03cdb2f9f49 100644
>> > --- a/gcc/gimple-fold.cc
>> > +++ b/gcc/gimple-fold.cc
>> > @@ -591,7 +591,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
>> >     If the statement has a lhs the last stmt in the sequence is expected
>> >     to assign to that lhs.  */
>> >
>> > -static void
>> > +void
>> >  gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
>> >  {
>> >    gimple *stmt = gsi_stmt (*si_p);
>> > diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
>> > index 7d29ee9a9a4..87ed4e56d25 100644
>> > --- a/gcc/gimple-fold.h
>> > +++ b/gcc/gimple-fold.h
>> > @@ -63,6 +63,7 @@ extern bool arith_code_with_undefined_signed_overflow (tree_code);
>> >  extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
>> >  extern void replace_call_with_value (gimple_stmt_iterator *, tree);
>> >  extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
>> > +extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
>> >
>> >  /* gimple_build, functionally matching fold_buildN, outputs stmts
>> >     int the provided sequence, matching and simplifying them on-the-fly.
>> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
>> > new file mode 100644
>> > index 00000000000..11448ed5e68
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
>> > @@ -0,0 +1,10 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
>> > +
>> > +#include "arm_sve.h"
>> > +
>> > +svint8_t
>> > +test_s8(int8_t *x)
>> > +{
>> > +  return svld1rq_s8 (svptrue_b8 (), &x[0]);
>> > +}
>
> gcc/ChangeLog:
> 	PR target/107920
> 	* config/aarch64/aarch64-sve-builtins-base.cc: Use
> 	gsi_replace_with_seq_vops to handle virtual operands, and gate
> 	the transform on !flag_non_call_exceptions.
> 	* gimple-fold.cc (gsi_replace_with_seq_vops): Make function non static.
> 	* gimple-fold.h (gsi_replace_with_seq_vops): Declare.
>
> gcc/testsuite/ChangeLog:
> 	PR target/107920
> 	* gcc.target/aarch64/sve/acle/general/pr107920.c: New test.
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 6347407555f..d52ec083ed0 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -45,6 +45,7 @@
>  #include "aarch64-sve-builtins-base.h"
>  #include "aarch64-sve-builtins-functions.h"
>  #include "ssa.h"
> +#include "gimple-fold.h"
>  
>  using namespace aarch64_sve;
>  
> @@ -1209,7 +1210,8 @@ public:
>         vectype is the corresponding ADVSIMD type.  */
>  
>      if (!BYTES_BIG_ENDIAN
> -	&& integer_all_onesp (arg0))
> +	&& integer_all_onesp (arg0)
> +	&& !flag_non_call_exceptions)
>        {
>  	tree lhs = gimple_call_lhs (f.call);
>  	tree lhs_type = TREE_TYPE (lhs);
> @@ -1232,7 +1234,9 @@ public:
>  	tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
>  	gimple *mem_ref_stmt
>  	  = gimple_build_assign (mem_ref_lhs, mem_ref_op);
> -	gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
> +
> +	gimple_seq stmts = NULL;
> +	gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
>  
>  	int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
>  	vec_perm_builder sel (lhs_len, source_nelts, 1);
> @@ -1245,8 +1249,11 @@ public:
>  						   indices));
>  	tree mask_type = build_vector_type (ssizetype, lhs_len);
>  	tree mask = vec_perm_indices_to_tree (mask_type, indices);
> -	return gimple_build_assign (lhs, VEC_PERM_EXPR,
> -				    mem_ref_lhs, mem_ref_lhs, mask);
> +	gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
> +					  mem_ref_lhs, mem_ref_lhs, mask);
> +	gimple_seq_add_stmt_without_update (&stmts, g2);
> +	gsi_replace_with_seq_vops (f.gsi, stmts);
> +	return g2;
>        }
>  
>      return NULL;
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index c2d9c806aee..03cdb2f9f49 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -591,7 +591,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
>     If the statement has a lhs the last stmt in the sequence is expected
>     to assign to that lhs.  */
>  
> -static void
> +void
>  gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
>  {
>    gimple *stmt = gsi_stmt (*si_p);
> diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> index 7d29ee9a9a4..87ed4e56d25 100644
> --- a/gcc/gimple-fold.h
> +++ b/gcc/gimple-fold.h
> @@ -63,6 +63,7 @@ extern bool arith_code_with_undefined_signed_overflow (tree_code);
>  extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
>  extern void replace_call_with_value (gimple_stmt_iterator *, tree);
>  extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
> +extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
>  
>  /* gimple_build, functionally matching fold_buildN, outputs stmts
>     int the provided sequence, matching and simplifying them on-the-fly.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> new file mode 100644
> index 00000000000..11448ed5e68
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
> +
> +#include "arm_sve.h"
> +
> +svint8_t
> +test_s8(int8_t *x)
> +{
> +  return svld1rq_s8 (svptrue_b8 (), &x[0]);
> +}
  

Patch

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 6347407555f..f5546a65d22 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -45,6 +45,7 @@ 
 #include "aarch64-sve-builtins-base.h"
 #include "aarch64-sve-builtins-functions.h"
 #include "ssa.h"
+#include "gimple-fold.h"
 
 using namespace aarch64_sve;
 
@@ -1232,7 +1233,9 @@  public:
 	tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
 	gimple *mem_ref_stmt
 	  = gimple_build_assign (mem_ref_lhs, mem_ref_op);
-	gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
+
+	gimple_seq stmts = NULL;
+	gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
 
 	int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
 	vec_perm_builder sel (lhs_len, source_nelts, 1);
@@ -1245,8 +1248,11 @@  public:
 						   indices));
 	tree mask_type = build_vector_type (ssizetype, lhs_len);
 	tree mask = vec_perm_indices_to_tree (mask_type, indices);
-	return gimple_build_assign (lhs, VEC_PERM_EXPR,
-				    mem_ref_lhs, mem_ref_lhs, mask);
+	gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
+					  mem_ref_lhs, mem_ref_lhs, mask);
+	gimple_seq_add_stmt_without_update (&stmts, g2);
+	gsi_replace_with_seq_vops (f.gsi, stmts);
+	return g2;
       }
 
     return NULL;
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index c2d9c806aee..03cdb2f9f49 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -591,7 +591,7 @@  fold_gimple_assign (gimple_stmt_iterator *si)
    If the statement has a lhs the last stmt in the sequence is expected
    to assign to that lhs.  */
 
-static void
+void
 gsi_replace_with_seq_vops (gimple_stmt_iterator *si_p, gimple_seq stmts)
 {
   gimple *stmt = gsi_stmt (*si_p);
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 7d29ee9a9a4..87ed4e56d25 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -63,6 +63,7 @@  extern bool arith_code_with_undefined_signed_overflow (tree_code);
 extern gimple_seq rewrite_to_defined_overflow (gimple *, bool = false);
 extern void replace_call_with_value (gimple_stmt_iterator *, tree);
 extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
+extern void gsi_replace_with_seq_vops (gimple_stmt_iterator *, gimple_seq);
 
 /* gimple_build, functionally matching fold_buildN, outputs stmts
    int the provided sequence, matching and simplifying them on-the-fly.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
new file mode 100644
index 00000000000..11448ed5e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr107920.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-ccp -fno-tree-forwprop" } */
+
+#include "arm_sve.h"
+
+svint8_t
+test_s8(int8_t *x)
+{
+  return svld1rq_s8 (svptrue_b8 (), &x[0]);
+}