tree-optimization/109170 - bogus use-after-free with __builtin_expect

Message ID 20230317121833.16A961346F@imap2.suse-dmz.suse.de
State New
Headers
Series tree-optimization/109170 - bogus use-after-free with __builtin_expect |

Commit Message

Richard Biener March 17, 2023, 12:18 p.m. UTC
  The following adds a missing range-op for __builtin_expect which
helps -Wuse-after-free to detect the case a realloc original
pointer is used when the result was NULL.

Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?

	PR tree-optimization/109170
	* gimple-range-op.cc (cfn_expect): New.
	(gimple_range_op_handler::maybe_builtin_call): Handle
	__builtin_expect.

	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
---
 gcc/gimple-range-op.cc                        | 25 +++++++++++++++++++
 .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
  

Comments

Jakub Jelinek March 17, 2023, 12:43 p.m. UTC | #1
On Fri, Mar 17, 2023 at 01:18:32PM +0100, Richard Biener wrote:
> The following adds a missing range-op for __builtin_expect which
> helps -Wuse-after-free to detect the case a realloc original
> pointer is used when the result was NULL.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> 
> 	PR tree-optimization/109170
> 	* gimple-range-op.cc (cfn_expect): New.
> 	(gimple_range_op_handler::maybe_builtin_call): Handle
> 	__builtin_expect.
> 
> 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.

Shouldn't that be something we handle generically for all
ERF_RETURNS_ARG calls (and not just for irange, but for any
supported ranges)?

Though, admittedly __builtin_expect probably doesn't set that
and all the other current builtins with ERF_RETURNS_ARG return
pointers I think.

So, I think LGTM, but give Andrew/Aldy a chance to chime in.

> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
> index a5d625387e7..f3d6e29e58d 100644
> --- a/gcc/gimple-range-op.cc
> +++ b/gcc/gimple-range-op.cc
> @@ -309,6 +309,25 @@ public:
>    }
>  } op_cfn_constant_p;
>  
> +// Implement range operator for integral CFN_BUILT_IN_EXPECT.
> +class cfn_expect : public range_operator
> +{
> +public:
> +  using range_operator::fold_range;
> +  virtual bool fold_range (irange &r, tree, const irange &lh,
> +			   const irange &, relation_trio) const
> +  {
> +    r = lh;
> +    return true;
> +  }
> +  virtual bool op1_range (irange &r, tree, const irange &lhs,
> +			  const irange &, relation_trio) const
> +  {
> +    r = lhs;
> +    return true;
> +  }
> +} op_cfn_expect;
> +
>  // Implement range operator for CFN_BUILT_IN_SIGNBIT.
>  class cfn_signbit : public range_operator_float
>  {
> @@ -966,6 +985,12 @@ gimple_range_op_handler::maybe_builtin_call ()
>        m_int = &op_cfn_parity;
>        break;
>  
> +    case CFN_BUILT_IN_EXPECT:
> +      m_valid = true;
> +      m_op1 = gimple_call_arg (call, 0);
> +      m_int = &op_cfn_expect;
> +      break;
> +
>      default:
>        break;
>      }
> diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> new file mode 100644
> index 00000000000..fa7dc66d66c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -Wuse-after-free" } */
> +
> +unsigned long bufmax = 0;
> +unsigned long __open_catalog_bufmax;
> +void *realloc(void *, __SIZE_TYPE__);
> +void free(void *);
> +
> +void __open_catalog(char *buf)
> +{
> +  char *old_buf = buf;
> +  buf = realloc (buf, bufmax);
> +  if (__builtin_expect ((buf == ((void *)0)), 0))
> +    free (old_buf); /* { dg-bogus "used after" } */
> +}
> -- 
> 2.35.3

	Jakub
  
Richard Biener March 17, 2023, 12:53 p.m. UTC | #2
On Fri, 17 Mar 2023, Jakub Jelinek wrote:

> On Fri, Mar 17, 2023 at 01:18:32PM +0100, Richard Biener wrote:
> > The following adds a missing range-op for __builtin_expect which
> > helps -Wuse-after-free to detect the case a realloc original
> > pointer is used when the result was NULL.
> > 
> > Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> > 
> > 	PR tree-optimization/109170
> > 	* gimple-range-op.cc (cfn_expect): New.
> > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > 	__builtin_expect.
> > 
> > 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> 
> Shouldn't that be something we handle generically for all
> ERF_RETURNS_ARG calls (and not just for irange, but for any
> supported ranges)?
> 
> Though, admittedly __builtin_expect probably doesn't set that
> and all the other current builtins with ERF_RETURNS_ARG return
> pointers I think.

Looking at builtin_fnspec we're indeed missing BUILT_IN_EXPECT,
but we could indeed use gimple_call_fnspec and look for a
returned argument.  If it's not the first handling this
generically is going to be interesting wrt op?_range though,
so we'd need a range operator for each case (returns arg 1,
returns arg 2, more args are not supported?).  Currently
all returns-arg builtins return the first arg, but eventually
modref or the fortran frontend will end up with calls returning
sth else.

If a float arg is returned it also needs to be a frange operator,
right?

Richard.

> So, I think LGTM, but give Andrew/Aldy a chance to chime in.
> 
> > diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
> > index a5d625387e7..f3d6e29e58d 100644
> > --- a/gcc/gimple-range-op.cc
> > +++ b/gcc/gimple-range-op.cc
> > @@ -309,6 +309,25 @@ public:
> >    }
> >  } op_cfn_constant_p;
> >  
> > +// Implement range operator for integral CFN_BUILT_IN_EXPECT.
> > +class cfn_expect : public range_operator
> > +{
> > +public:
> > +  using range_operator::fold_range;
> > +  virtual bool fold_range (irange &r, tree, const irange &lh,
> > +			   const irange &, relation_trio) const
> > +  {
> > +    r = lh;
> > +    return true;
> > +  }
> > +  virtual bool op1_range (irange &r, tree, const irange &lhs,
> > +			  const irange &, relation_trio) const
> > +  {
> > +    r = lhs;
> > +    return true;
> > +  }
> > +} op_cfn_expect;
> > +
> >  // Implement range operator for CFN_BUILT_IN_SIGNBIT.
> >  class cfn_signbit : public range_operator_float
> >  {
> > @@ -966,6 +985,12 @@ gimple_range_op_handler::maybe_builtin_call ()
> >        m_int = &op_cfn_parity;
> >        break;
> >  
> > +    case CFN_BUILT_IN_EXPECT:
> > +      m_valid = true;
> > +      m_op1 = gimple_call_arg (call, 0);
> > +      m_int = &op_cfn_expect;
> > +      break;
> > +
> >      default:
> >        break;
> >      }
> > diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> > new file mode 100644
> > index 00000000000..fa7dc66d66c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -Wuse-after-free" } */
> > +
> > +unsigned long bufmax = 0;
> > +unsigned long __open_catalog_bufmax;
> > +void *realloc(void *, __SIZE_TYPE__);
> > +void free(void *);
> > +
> > +void __open_catalog(char *buf)
> > +{
> > +  char *old_buf = buf;
> > +  buf = realloc (buf, bufmax);
> > +  if (__builtin_expect ((buf == ((void *)0)), 0))
> > +    free (old_buf); /* { dg-bogus "used after" } */
> > +}
> > -- 
> > 2.35.3
> 
> 	Jakub
> 
>
  
Jakub Jelinek March 17, 2023, 12:59 p.m. UTC | #3
On Fri, Mar 17, 2023 at 12:53:48PM +0000, Richard Biener wrote:
> On Fri, 17 Mar 2023, Jakub Jelinek wrote:
> 
> > On Fri, Mar 17, 2023 at 01:18:32PM +0100, Richard Biener wrote:
> > > The following adds a missing range-op for __builtin_expect which
> > > helps -Wuse-after-free to detect the case a realloc original
> > > pointer is used when the result was NULL.
> > > 
> > > Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> > > 
> > > 	PR tree-optimization/109170
> > > 	* gimple-range-op.cc (cfn_expect): New.
> > > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > > 	__builtin_expect.
> > > 
> > > 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> > 
> > Shouldn't that be something we handle generically for all
> > ERF_RETURNS_ARG calls (and not just for irange, but for any
> > supported ranges)?
> > 
> > Though, admittedly __builtin_expect probably doesn't set that
> > and all the other current builtins with ERF_RETURNS_ARG return
> > pointers I think.
> 
> Looking at builtin_fnspec we're indeed missing BUILT_IN_EXPECT,
> but we could indeed use gimple_call_fnspec and look for a
> returned argument.  If it's not the first handling this
> generically is going to be interesting wrt op?_range though,
> so we'd need a range operator for each case (returns arg 1,
> returns arg 2, more args are not supported?).  Currently

I think fnspec supports 1-4, but nothing actually uses anything but 1
or none; I could be wrong.

Anyway, I think it is fine to implement __builtin_expect this way
for now, ERF_RETURNS_ARG will be more important for pointers, especially if
we propagate something more than just maybe be/can't be/must be null.
Don't you need to handle BUILT_IN_EXPECT_WITH_PROBABILITY the same though?

> all returns-arg builtins return the first arg, but eventually
> modref or the fortran frontend will end up with calls returning
> sth else.
> 
> If a float arg is returned it also needs to be a frange operator,
> right?

	Jakub
  
Richard Biener March 17, 2023, 1:55 p.m. UTC | #4
On Fri, 17 Mar 2023, Jakub Jelinek wrote:

> On Fri, Mar 17, 2023 at 12:53:48PM +0000, Richard Biener wrote:
> > On Fri, 17 Mar 2023, Jakub Jelinek wrote:
> > 
> > > On Fri, Mar 17, 2023 at 01:18:32PM +0100, Richard Biener wrote:
> > > > The following adds a missing range-op for __builtin_expect which
> > > > helps -Wuse-after-free to detect the case a realloc original
> > > > pointer is used when the result was NULL.
> > > > 
> > > > Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> > > > 
> > > > 	PR tree-optimization/109170
> > > > 	* gimple-range-op.cc (cfn_expect): New.
> > > > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > > > 	__builtin_expect.
> > > > 
> > > > 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> > > 
> > > Shouldn't that be something we handle generically for all
> > > ERF_RETURNS_ARG calls (and not just for irange, but for any
> > > supported ranges)?
> > > 
> > > Though, admittedly __builtin_expect probably doesn't set that
> > > and all the other current builtins with ERF_RETURNS_ARG return
> > > pointers I think.
> > 
> > Looking at builtin_fnspec we're indeed missing BUILT_IN_EXPECT,
> > but we could indeed use gimple_call_fnspec and look for a
> > returned argument.  If it's not the first handling this
> > generically is going to be interesting wrt op?_range though,
> > so we'd need a range operator for each case (returns arg 1,
> > returns arg 2, more args are not supported?).  Currently
> 
> I think fnspec supports 1-4, but nothing actually uses anything but 1
> or none; I could be wrong.
> 
> Anyway, I think it is fine to implement __builtin_expect this way
> for now, ERF_RETURNS_ARG will be more important for pointers, especially if
> we propagate something more than just maybe be/can't be/must be null.
> Don't you need to handle BUILT_IN_EXPECT_WITH_PROBABILITY the same though?

Yes, BUILT_IN_ASSUME_ALIGNED would be another candidate.

One issue revealed by testing is that EVRP now propagates

  b.0_1 = b;
  _2 = b.0_1 < 0;
  _3 = (long int) _2;
  _4 = __builtin_expect (_3, 0);
  if (_4 != 0)
...

  b.2_8 = b;
  _9 = b.2_8 < 0;
  d_13 = (int) _9;
  _10 = (long int) _9;
  _11 = __builtin_expect (_10, 0);
  if (_11 != 0)

and thus gcc.dg/predict-20.c FAILs and the change is that we propagate
known true/false into the last compare as

  <bb 5> [local count: 977105059]:
  # _9 = PHI <1(3), 0(4)>
  if (_9 != 0)

and lose the connection to __builtin_expect.

We also FAIL gcc.dg/tree-ssa/ssa-lim-21.c, but that's because

  for (int j = 0; j < m; j++)
    if (__builtin_expect (m, 0))

is now optimized (m is [1, +INF] when we enter the loop).  I
have difficulties in restoring the testcase by massaging it,
will try a bit more.

I've implemented the fnspec variant as well now and then we also
CSE the __builtin_expct call, see below for this patch variant.

Richard.

From feb846cbff9774125d8401dfeacd8a4b9c2dccfa Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 17 Mar 2023 13:14:49 +0100
Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
 __builtin_expect
To: gcc-patches@gcc.gnu.org

The following adds a missing range-op for __builtin_expect which
helps -Wuse-after-free to detect the case a realloc original
pointer is used when the result was NULL.  The implementation
should handle all argument one pass-through builtins we handle
in the fnspec machinery.

	tree-optimization/109170
	* gimple-range-op.cc (cfn_pass_through_arg1): New.
	(gimple_range_op_handler::maybe_builtin_call): Handle
	__builtin_expect and similar via cfn_pass_through_arg1
	and inspecting the calls fnspec.
	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
	and BUILT_IN_EXPECT_WITH_PROBABILITY.

	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
---
 gcc/builtins.cc                               |  2 ++
 gcc/gimple-range-op.cc                        | 32 ++++++++++++++++++-
 .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 90246e214d6..56545027297 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -11715,6 +11715,8 @@ builtin_fnspec (tree callee)
       case BUILT_IN_RETURN_ADDRESS:
 	return ".c";
       case BUILT_IN_ASSUME_ALIGNED:
+      case BUILT_IN_EXPECT:
+      case BUILT_IN_EXPECT_WITH_PROBABILITY:
 	return "1cX ";
       /* But posix_memalign stores a pointer into the memory pointed to
 	 by its first argument.  */
diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a5d625387e7..1a00f1690e5 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "range.h"
 #include "value-query.h"
 #include "gimple-range.h"
+#include "attr-fnspec.h"
 
 // Given stmt S, fill VEC, up to VEC_SIZE elements, with relevant ssa-names
 // on the statement.  For efficiency, it is an error to not pass in enough
@@ -309,6 +310,26 @@ public:
   }
 } op_cfn_constant_p;
 
+// Implement range operator for integral/pointer functions returning
+// the first argument.
+class cfn_pass_through_arg1 : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  virtual bool fold_range (irange &r, tree, const irange &lh,
+			   const irange &, relation_trio) const
+  {
+    r = lh;
+    return true;
+  }
+  virtual bool op1_range (irange &r, tree, const irange &lhs,
+			  const irange &, relation_trio) const
+  {
+    r = lhs;
+    return true;
+  }
+} op_cfn_pass_through_arg1;
+
 // Implement range operator for CFN_BUILT_IN_SIGNBIT.
 class cfn_signbit : public range_operator_float
 {
@@ -967,6 +988,15 @@ gimple_range_op_handler::maybe_builtin_call ()
       break;
 
     default:
-      break;
+      {
+	unsigned arg;
+	if (gimple_call_fnspec (call).returns_arg (&arg) && arg == 0)
+	  {
+	    m_valid = true;
+	    m_op1 = gimple_call_arg (call, 0);
+	    m_int = &op_cfn_pass_through_arg1;
+	  }
+	break;
+      }
     }
 }
diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
new file mode 100644
index 00000000000..fa7dc66d66c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuse-after-free" } */
+
+unsigned long bufmax = 0;
+unsigned long __open_catalog_bufmax;
+void *realloc(void *, __SIZE_TYPE__);
+void free(void *);
+
+void __open_catalog(char *buf)
+{
+  char *old_buf = buf;
+  buf = realloc (buf, bufmax);
+  if (__builtin_expect ((buf == ((void *)0)), 0))
+    free (old_buf); /* { dg-bogus "used after" } */
+}
  
Andrew MacLeod March 17, 2023, 1:59 p.m. UTC | #5
On 3/17/23 08:59, Jakub Jelinek wrote:
> On Fri, Mar 17, 2023 at 12:53:48PM +0000, Richard Biener wrote:
>> On Fri, 17 Mar 2023, Jakub Jelinek wrote:
>>
>>> On Fri, Mar 17, 2023 at 01:18:32PM +0100, Richard Biener wrote:
>>>> The following adds a missing range-op for __builtin_expect which
>>>> helps -Wuse-after-free to detect the case a realloc original
>>>> pointer is used when the result was NULL.
>>>>
>>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
>>>>
>>>> 	PR tree-optimization/109170
>>>> 	* gimple-range-op.cc (cfn_expect): New.
>>>> 	(gimple_range_op_handler::maybe_builtin_call): Handle
>>>> 	__builtin_expect.
>>>>
>>>> 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
>>> Shouldn't that be something we handle generically for all
>>> ERF_RETURNS_ARG calls (and not just for irange, but for any
>>> supported ranges)?
>>>
>>> Though, admittedly __builtin_expect probably doesn't set that
>>> and all the other current builtins with ERF_RETURNS_ARG return
>>> pointers I think.
>> Looking at builtin_fnspec we're indeed missing BUILT_IN_EXPECT,
>> but we could indeed use gimple_call_fnspec and look for a
>> returned argument.  If it's not the first handling this
>> generically is going to be interesting wrt op?_range though,
>> so we'd need a range operator for each case (returns arg 1,
>> returns arg 2, more args are not supported?).  Currently
> I think fnspec supports 1-4, but nothing actually uses anything but 1
> or none; I could be wrong.
>
> Anyway, I think it is fine to implement __builtin_expect this way
> for now, ERF_RETURNS_ARG will be more important for pointers, especially if
> we propagate something more than just maybe be/can't be/must be null.
> Don't you need to handle BUILT_IN_EXPECT_WITH_PROBABILITY the same though?
>
I think thats fine for now.

Im going to address improving dispatch for range-ops in stage 1 when it 
opens.

we want to handle non-standard ops more generally like we did for 
WIDEN_MULT_EXPR, plus we didnt know the actualy requirements for the 
initial cut of vrange ->irange/frange dispatch.   We'll clean that up to 
make adding more range kinds cleaner.

as for gimple_fnspec, im sure we can do something better than what we 
have.  Current range-ops works only with 2 operands, but via this 
mechanism they can be any 2. (I think :-)

We can fold arbitrary statements in 
gimpe-range-fold::fold_using_range(), so it is only the 
op[12]_range/relation  routines we loose... Im not sure if there is 
anything that critical there, but if we find something, well we can look 
at it.

Andrew
  
Jakub Jelinek March 17, 2023, 2:03 p.m. UTC | #6
On Fri, Mar 17, 2023 at 01:55:51PM +0000, Richard Biener wrote:
> > Anyway, I think it is fine to implement __builtin_expect this way
> > for now, ERF_RETURNS_ARG will be more important for pointers, especially if
> > we propagate something more than just maybe be/can't be/must be null.
> > Don't you need to handle BUILT_IN_EXPECT_WITH_PROBABILITY the same though?
> 
> Yes, BUILT_IN_ASSUME_ALIGNED would be another candidate.

BUILT_IN_ASSUME_ALIGNED doesn't do that intentionally.
E.g. tree-object-size.cc (pass_through_call) comments on this:
  unsigned rf = gimple_call_return_flags (call);
  if (rf & ERF_RETURNS_ARG)
    {
      unsigned argnum = rf & ERF_RETURN_ARG_MASK;
      if (argnum < gimple_call_num_args (call))
        return gimple_call_arg (call, argnum);
    }

  /* __builtin_assume_aligned is intentionally not marked RET1.  */
  if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED))
    return gimple_call_arg (call, 0);
The reason is that we don't want passes to propagate the argument to the
return value but use a different SSA_NAME there, so that we can have
there different alignment info.

And as you show on the testcases, it probably isn't a good idea for
BUILT_IN_EXPECT* either.

So, perhaps use op_cfn_pass_through_arg1 for the ERF_RETURNS_ARG functions
and BUILT_IN_EXPECT* ?

> >From feb846cbff9774125d8401dfeacd8a4b9c2dccfa Mon Sep 17 00:00:00 2001
> From: Richard Biener <rguenther@suse.de>
> Date: Fri, 17 Mar 2023 13:14:49 +0100
> Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
>  __builtin_expect
> To: gcc-patches@gcc.gnu.org
> 
> The following adds a missing range-op for __builtin_expect which
> helps -Wuse-after-free to detect the case a realloc original
> pointer is used when the result was NULL.  The implementation
> should handle all argument one pass-through builtins we handle
> in the fnspec machinery.
> 
> 	tree-optimization/109170
> 	* gimple-range-op.cc (cfn_pass_through_arg1): New.
> 	(gimple_range_op_handler::maybe_builtin_call): Handle
> 	__builtin_expect and similar via cfn_pass_through_arg1
> 	and inspecting the calls fnspec.
> 	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> 	and BUILT_IN_EXPECT_WITH_PROBABILITY.
> 
> 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> ---
>  gcc/builtins.cc                               |  2 ++
>  gcc/gimple-range-op.cc                        | 32 ++++++++++++++++++-
>  .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++
>  3 files changed, 48 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> 
> diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> index 90246e214d6..56545027297 100644
> --- a/gcc/builtins.cc
> +++ b/gcc/builtins.cc
> @@ -11715,6 +11715,8 @@ builtin_fnspec (tree callee)
>        case BUILT_IN_RETURN_ADDRESS:
>  	return ".c";
>        case BUILT_IN_ASSUME_ALIGNED:
> +      case BUILT_IN_EXPECT:
> +      case BUILT_IN_EXPECT_WITH_PROBABILITY:
>  	return "1cX ";
>        /* But posix_memalign stores a pointer into the memory pointed to
>  	 by its first argument.  */
> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
> index a5d625387e7..1a00f1690e5 100644
> --- a/gcc/gimple-range-op.cc
> +++ b/gcc/gimple-range-op.cc
> @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "range.h"
>  #include "value-query.h"
>  #include "gimple-range.h"
> +#include "attr-fnspec.h"
>  
>  // Given stmt S, fill VEC, up to VEC_SIZE elements, with relevant ssa-names
>  // on the statement.  For efficiency, it is an error to not pass in enough
> @@ -309,6 +310,26 @@ public:
>    }
>  } op_cfn_constant_p;
>  
> +// Implement range operator for integral/pointer functions returning
> +// the first argument.
> +class cfn_pass_through_arg1 : public range_operator
> +{
> +public:
> +  using range_operator::fold_range;
> +  virtual bool fold_range (irange &r, tree, const irange &lh,
> +			   const irange &, relation_trio) const
> +  {
> +    r = lh;
> +    return true;
> +  }
> +  virtual bool op1_range (irange &r, tree, const irange &lhs,
> +			  const irange &, relation_trio) const
> +  {
> +    r = lhs;
> +    return true;
> +  }
> +} op_cfn_pass_through_arg1;
> +
>  // Implement range operator for CFN_BUILT_IN_SIGNBIT.
>  class cfn_signbit : public range_operator_float
>  {
> @@ -967,6 +988,15 @@ gimple_range_op_handler::maybe_builtin_call ()
>        break;
>  
>      default:
> -      break;
> +      {
> +	unsigned arg;
> +	if (gimple_call_fnspec (call).returns_arg (&arg) && arg == 0)
> +	  {
> +	    m_valid = true;
> +	    m_op1 = gimple_call_arg (call, 0);
> +	    m_int = &op_cfn_pass_through_arg1;
> +	  }
> +	break;
> +      }
>      }
>  }
> diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> new file mode 100644
> index 00000000000..fa7dc66d66c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -Wuse-after-free" } */
> +
> +unsigned long bufmax = 0;
> +unsigned long __open_catalog_bufmax;
> +void *realloc(void *, __SIZE_TYPE__);
> +void free(void *);
> +
> +void __open_catalog(char *buf)
> +{
> +  char *old_buf = buf;
> +  buf = realloc (buf, bufmax);
> +  if (__builtin_expect ((buf == ((void *)0)), 0))
> +    free (old_buf); /* { dg-bogus "used after" } */
> +}
> -- 
> 2.35.3

	Jakub
  
Richard Biener March 17, 2023, 2:18 p.m. UTC | #7
On Fri, 17 Mar 2023, Jakub Jelinek wrote:

> On Fri, Mar 17, 2023 at 01:55:51PM +0000, Richard Biener wrote:
> > > Anyway, I think it is fine to implement __builtin_expect this way
> > > for now, ERF_RETURNS_ARG will be more important for pointers, especially if
> > > we propagate something more than just maybe be/can't be/must be null.
> > > Don't you need to handle BUILT_IN_EXPECT_WITH_PROBABILITY the same though?
> > 
> > Yes, BUILT_IN_ASSUME_ALIGNED would be another candidate.
> 
> BUILT_IN_ASSUME_ALIGNED doesn't do that intentionally.
> E.g. tree-object-size.cc (pass_through_call) comments on this:
>   unsigned rf = gimple_call_return_flags (call);
>   if (rf & ERF_RETURNS_ARG)
>     {
>       unsigned argnum = rf & ERF_RETURN_ARG_MASK;
>       if (argnum < gimple_call_num_args (call))
>         return gimple_call_arg (call, argnum);
>     }
> 
>   /* __builtin_assume_aligned is intentionally not marked RET1.  */
>   if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED))
>     return gimple_call_arg (call, 0);
> The reason is that we don't want passes to propagate the argument to the
> return value but use a different SSA_NAME there, so that we can have
> there different alignment info.

Only that we now _do_ have BUILT_IN_ASSUME_ALIGNED marked as RET1 ...

> And as you show on the testcases, it probably isn't a good idea for
> BUILT_IN_EXPECT* either.
> 
> So, perhaps use op_cfn_pass_through_arg1 for the ERF_RETURNS_ARG functions
> and BUILT_IN_EXPECT* ?

But that already causes the problems (I didn't yet finish testing
adding RET1 to BUILT_IN_EXPECT*).  Note FRE currently does not use
returns_arg but I have old patches that do - but those replace
uses after a RET1 function with the return value to also reduce
spilling around a call (they of course CSE equal calls).

Anyway, I suspect the ssa-lim-21.c testcase is just very fragile
while the predict-20.c shows a case which should be handled
better even without the __builtin_expect (which is now gone):

  for (;;)
    {
      if (__builtin_expect (b < 0, 0))
        break;
      if (__builtin_expect (a, 1))
        break;
    }
  int d = b < 0;
  if (__builtin_expect (d, 0))
    asm("");

we should be able to handle

 _1 = PHI <1, 0>
 if (_1 != 0)

by copying the probabilities from the incoming PHI edges here,
possibly even by using ranger to test for known condition
on some edges.

Of course it's bad to regress more here.  I'm considering
backing out the -Wuse-after-free changes.

Richard.


> > >From feb846cbff9774125d8401dfeacd8a4b9c2dccfa Mon Sep 17 00:00:00 2001
> > From: Richard Biener <rguenther@suse.de>
> > Date: Fri, 17 Mar 2023 13:14:49 +0100
> > Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
> >  __builtin_expect
> > To: gcc-patches@gcc.gnu.org
> > 
> > The following adds a missing range-op for __builtin_expect which
> > helps -Wuse-after-free to detect the case a realloc original
> > pointer is used when the result was NULL.  The implementation
> > should handle all argument one pass-through builtins we handle
> > in the fnspec machinery.
> > 
> > 	tree-optimization/109170
> > 	* gimple-range-op.cc (cfn_pass_through_arg1): New.
> > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > 	__builtin_expect and similar via cfn_pass_through_arg1
> > 	and inspecting the calls fnspec.
> > 	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> > 	and BUILT_IN_EXPECT_WITH_PROBABILITY.
> > 
> > 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> > ---
> >  gcc/builtins.cc                               |  2 ++
> >  gcc/gimple-range-op.cc                        | 32 ++++++++++++++++++-
> >  .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++
> >  3 files changed, 48 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> > 
> > diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> > index 90246e214d6..56545027297 100644
> > --- a/gcc/builtins.cc
> > +++ b/gcc/builtins.cc
> > @@ -11715,6 +11715,8 @@ builtin_fnspec (tree callee)
> >        case BUILT_IN_RETURN_ADDRESS:
> >  	return ".c";
> >        case BUILT_IN_ASSUME_ALIGNED:
> > +      case BUILT_IN_EXPECT:
> > +      case BUILT_IN_EXPECT_WITH_PROBABILITY:
> >  	return "1cX ";
> >        /* But posix_memalign stores a pointer into the memory pointed to
> >  	 by its first argument.  */
> > diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
> > index a5d625387e7..1a00f1690e5 100644
> > --- a/gcc/gimple-range-op.cc
> > +++ b/gcc/gimple-range-op.cc
> > @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "range.h"
> >  #include "value-query.h"
> >  #include "gimple-range.h"
> > +#include "attr-fnspec.h"
> >  
> >  // Given stmt S, fill VEC, up to VEC_SIZE elements, with relevant ssa-names
> >  // on the statement.  For efficiency, it is an error to not pass in enough
> > @@ -309,6 +310,26 @@ public:
> >    }
> >  } op_cfn_constant_p;
> >  
> > +// Implement range operator for integral/pointer functions returning
> > +// the first argument.
> > +class cfn_pass_through_arg1 : public range_operator
> > +{
> > +public:
> > +  using range_operator::fold_range;
> > +  virtual bool fold_range (irange &r, tree, const irange &lh,
> > +			   const irange &, relation_trio) const
> > +  {
> > +    r = lh;
> > +    return true;
> > +  }
> > +  virtual bool op1_range (irange &r, tree, const irange &lhs,
> > +			  const irange &, relation_trio) const
> > +  {
> > +    r = lhs;
> > +    return true;
> > +  }
> > +} op_cfn_pass_through_arg1;
> > +
> >  // Implement range operator for CFN_BUILT_IN_SIGNBIT.
> >  class cfn_signbit : public range_operator_float
> >  {
> > @@ -967,6 +988,15 @@ gimple_range_op_handler::maybe_builtin_call ()
> >        break;
> >  
> >      default:
> > -      break;
> > +      {
> > +	unsigned arg;
> > +	if (gimple_call_fnspec (call).returns_arg (&arg) && arg == 0)
> > +	  {
> > +	    m_valid = true;
> > +	    m_op1 = gimple_call_arg (call, 0);
> > +	    m_int = &op_cfn_pass_through_arg1;
> > +	  }
> > +	break;
> > +      }
> >      }
> >  }
> > diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> > new file mode 100644
> > index 00000000000..fa7dc66d66c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -Wuse-after-free" } */
> > +
> > +unsigned long bufmax = 0;
> > +unsigned long __open_catalog_bufmax;
> > +void *realloc(void *, __SIZE_TYPE__);
> > +void free(void *);
> > +
> > +void __open_catalog(char *buf)
> > +{
> > +  char *old_buf = buf;
> > +  buf = realloc (buf, bufmax);
> > +  if (__builtin_expect ((buf == ((void *)0)), 0))
> > +    free (old_buf); /* { dg-bogus "used after" } */
> > +}
> > -- 
> > 2.35.3
> 
> 	Jakub
> 
>
  
Jakub Jelinek March 17, 2023, 2:52 p.m. UTC | #8
On Fri, Mar 17, 2023 at 02:18:52PM +0000, Richard Biener wrote:
> > And as you show on the testcases, it probably isn't a good idea for
> > BUILT_IN_EXPECT* either.
> > 
> > So, perhaps use op_cfn_pass_through_arg1 for the ERF_RETURNS_ARG functions
> > and BUILT_IN_EXPECT* ?
> 
> But that already causes the problems (I didn't yet finish testing
> adding RET1 to BUILT_IN_EXPECT*).  Note FRE currently does not use
> returns_arg but I have old patches that do - but those replace
> uses after a RET1 function with the return value to also reduce
> spilling around a call (they of course CSE equal calls).

I meant in your patch drop the builtins.cc hunk and add from your
other patch
> > +    case CFN_BUILT_IN_EXPECT:
> > +    case CFN_BUILT_IN_EXPECT_WITH_PROBABILITY:
> > +      m_valid = true;
> > +      m_op1 = gimple_call_arg (call, 0);
> > +      m_int = &op_cfn_pass_through_arg1;
> > +      break;

hunk to gimple_range_op_handler::maybe_builtin_call.
Does that already cause the problems?
I mean, if we e.g. see that a range of the argument is singleton,
then it is fine to optimize the __builtin_expect away.

	Jakub
  
Richard Biener March 20, 2023, 8:21 a.m. UTC | #9
On Fri, 17 Mar 2023, Jakub Jelinek wrote:

> On Fri, Mar 17, 2023 at 02:18:52PM +0000, Richard Biener wrote:
> > > And as you show on the testcases, it probably isn't a good idea for
> > > BUILT_IN_EXPECT* either.
> > > 
> > > So, perhaps use op_cfn_pass_through_arg1 for the ERF_RETURNS_ARG functions
> > > and BUILT_IN_EXPECT* ?
> > 
> > But that already causes the problems (I didn't yet finish testing
> > adding RET1 to BUILT_IN_EXPECT*).  Note FRE currently does not use
> > returns_arg but I have old patches that do - but those replace
> > uses after a RET1 function with the return value to also reduce
> > spilling around a call (they of course CSE equal calls).
> 
> I meant in your patch drop the builtins.cc hunk and add from your
> other patch
> > > +    case CFN_BUILT_IN_EXPECT:
> > > +    case CFN_BUILT_IN_EXPECT_WITH_PROBABILITY:
> > > +      m_valid = true;
> > > +      m_op1 = gimple_call_arg (call, 0);
> > > +      m_int = &op_cfn_pass_through_arg1;
> > > +      break;
> 
> hunk to gimple_range_op_handler::maybe_builtin_call.
> Does that already cause the problems?

Yes, that's basically what the first variant of the patch did and
the FAILs quoted are from testing that variant.

There are no extra fails from the second more generic patch also
touching builtins.cc

> I mean, if we e.g. see that a range of the argument is singleton,
> then it is fine to optimize the __builtin_expect away.

Yes, it's somewhat odd that we handle the - case but not the +:

@@ -204,6 +206,7 @@
   _2 = b.0_1 < 0;
   # RANGE [irange] long int [0, 1] NONZERO 0x1
   _3 = (long int) _2;
+  # RANGE [irange] long int [0, 1] NONZERO 0x1
   _4 = __builtin_expect (_3, 0);
   if (_4 != 0)
     goto <bb 5>; [10.00%]
...
@@ -224,13 +228,14 @@
   goto <bb 3>; [100.00%]
 
   <bb 5> [local count: 977105059]:
-  # _9 = PHI <_4(3), 0(4)>
+  # RANGE [irange] long int [0, 1] NONZERO 0x1
+  # _9 = PHI <1(3), 0(4)>
   if (_9 != 0)
-    goto <bb 6>; [10.00%]
+    goto <bb 6>; [50.00%]
   else
-    goto <bb 7>; [90.00%]
+    goto <bb 7>; [50.00%]


 Predictions for bb 5
-  first match heuristics: 10.00%
-  combined heuristics: 10.00%
-  __builtin_expect heuristics of edge 5->6: 10.00%
+  no prediction heuristics: 50.00%
+  combined heuristics: 50.00%

the dumps don't get any hints on where the first matchor
__builtin_expect heuristic came from, but it looks like we
run expr_expected_value_1 on _9 != 0 which recurses for _9
and runs into the PHI handling which then looks for
a common value into the PHI.  In this case _4 is said
to be zero by PRED_BUILTIN_EXPECT and probability 9000.
But that seems wrong - it looks simply at the __builtin_expect
def here, not taking into account the edge this flows through
(the unlikely edge).

If we look at the recorded edge predictions we instead see

$28 = {ep_next = 0x43e4c80, ep_edge = <edge 0x7ffff67e0f00 (3 -> 5)>, 
  ep_predictor = PRED_BUILTIN_EXPECT, ep_probability = 1000}

so that's the same edge but unlikely now.  Of course there's
no value recorded for this.  For the other edge into the PHI
we get

$31 = {ep_next = 0x43c1e90, ep_edge = <edge 0x7ffff67e0f60 (4 -> 5)>, 
  ep_predictor = PRED_BUILTIN_EXPECT, ep_probability = 9000}

so to me a more reasonable approach would be to record '0' from
the 2nd edge with a probability of 9000 (or for the '+' case IL
'1' with a probability of 1000).  There's possibly a way to
combine predictor + value (here we'd simply take the more
probable value, or the constant for a PHI).  I also see that
we handle any PHI this way, not just PHIs defined in the same
BB as the condition which op we're ultimatively interested in.

So my conclusion is that currently it's pure luck the testcase
"works", and thus adjusting it and opening a bug with the
above findings would be appropriate?

Honza?

As for the LIM testcase, I'll try massaging it further, possibly
turning it into a GIMPLE testcase with fixed counts will make it
work.

Richard.
  
Richard Biener March 20, 2023, 12:12 p.m. UTC | #10
On Mon, 20 Mar 2023, Richard Biener wrote:

> On Fri, 17 Mar 2023, Jakub Jelinek wrote:
> 
> > On Fri, Mar 17, 2023 at 02:18:52PM +0000, Richard Biener wrote:
> > > > And as you show on the testcases, it probably isn't a good idea for
> > > > BUILT_IN_EXPECT* either.
> > > > 
> > > > So, perhaps use op_cfn_pass_through_arg1 for the ERF_RETURNS_ARG functions
> > > > and BUILT_IN_EXPECT* ?
> > > 
> > > But that already causes the problems (I didn't yet finish testing
> > > adding RET1 to BUILT_IN_EXPECT*).  Note FRE currently does not use
> > > returns_arg but I have old patches that do - but those replace
> > > uses after a RET1 function with the return value to also reduce
> > > spilling around a call (they of course CSE equal calls).
> > 
> > I meant in your patch drop the builtins.cc hunk and add from your
> > other patch
> > > > +    case CFN_BUILT_IN_EXPECT:
> > > > +    case CFN_BUILT_IN_EXPECT_WITH_PROBABILITY:
> > > > +      m_valid = true;
> > > > +      m_op1 = gimple_call_arg (call, 0);
> > > > +      m_int = &op_cfn_pass_through_arg1;
> > > > +      break;
> > 
> > hunk to gimple_range_op_handler::maybe_builtin_call.
> > Does that already cause the problems?
> 
> Yes, that's basically what the first variant of the patch did and
> the FAILs quoted are from testing that variant.
> 
> There are no extra fails from the second more generic patch also
> touching builtins.cc
> 
> > I mean, if we e.g. see that a range of the argument is singleton,
> > then it is fine to optimize the __builtin_expect away.
> 
> Yes, it's somewhat odd that we handle the - case but not the +:
> 
> @@ -204,6 +206,7 @@
>    _2 = b.0_1 < 0;
>    # RANGE [irange] long int [0, 1] NONZERO 0x1
>    _3 = (long int) _2;
> +  # RANGE [irange] long int [0, 1] NONZERO 0x1
>    _4 = __builtin_expect (_3, 0);
>    if (_4 != 0)
>      goto <bb 5>; [10.00%]
> ...
> @@ -224,13 +228,14 @@
>    goto <bb 3>; [100.00%]
>  
>    <bb 5> [local count: 977105059]:
> -  # _9 = PHI <_4(3), 0(4)>
> +  # RANGE [irange] long int [0, 1] NONZERO 0x1
> +  # _9 = PHI <1(3), 0(4)>
>    if (_9 != 0)
> -    goto <bb 6>; [10.00%]
> +    goto <bb 6>; [50.00%]
>    else
> -    goto <bb 7>; [90.00%]
> +    goto <bb 7>; [50.00%]
> 
> 
>  Predictions for bb 5
> -  first match heuristics: 10.00%
> -  combined heuristics: 10.00%
> -  __builtin_expect heuristics of edge 5->6: 10.00%
> +  no prediction heuristics: 50.00%
> +  combined heuristics: 50.00%
> 
> the dumps don't get any hints on where the first matchor
> __builtin_expect heuristic came from, but it looks like we
> run expr_expected_value_1 on _9 != 0 which recurses for _9
> and runs into the PHI handling which then looks for
> a common value into the PHI.  In this case _4 is said
> to be zero by PRED_BUILTIN_EXPECT and probability 9000.
> But that seems wrong - it looks simply at the __builtin_expect
> def here, not taking into account the edge this flows through
> (the unlikely edge).
> 
> If we look at the recorded edge predictions we instead see
> 
> $28 = {ep_next = 0x43e4c80, ep_edge = <edge 0x7ffff67e0f00 (3 -> 5)>, 
>   ep_predictor = PRED_BUILTIN_EXPECT, ep_probability = 1000}
> 
> so that's the same edge but unlikely now.  Of course there's
> no value recorded for this.  For the other edge into the PHI
> we get
> 
> $31 = {ep_next = 0x43c1e90, ep_edge = <edge 0x7ffff67e0f60 (4 -> 5)>, 
>   ep_predictor = PRED_BUILTIN_EXPECT, ep_probability = 9000}
> 
> so to me a more reasonable approach would be to record '0' from
> the 2nd edge with a probability of 9000 (or for the '+' case IL
> '1' with a probability of 1000).  There's possibly a way to
> combine predictor + value (here we'd simply take the more
> probable value, or the constant for a PHI).  I also see that
> we handle any PHI this way, not just PHIs defined in the same
> BB as the condition which op we're ultimatively interested in.
> 
> So my conclusion is that currently it's pure luck the testcase
> "works", and thus adjusting it and opening a bug with the
> above findings would be appropriate?
> 
> Honza?

I filed PR109210 and updated the patch with adjustment of the
two failing testcases.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

From aac0ed731c49f74d7f363b80745150943b75bd4c Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 17 Mar 2023 13:14:49 +0100
Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
 __builtin_expect
To: gcc-patches@gcc.gnu.org

The following adds a missing range-op for __builtin_expect which
helps -Wuse-after-free to detect the case a realloc original
pointer is used when the result was NULL.  The implementation
should handle all argument one pass-through builtins we handle
in the fnspec machinery.

The gcc.dg/tree-ssa/ssa-lim-21.c testcase needs adjustment because

   for (int j = 0; j < m; j++)
     if (__builtin_expect (m, 0))
       for (int i = 0; i < m; i++)

is now correctly optimized to a unconditional jump by EVRP - m
cannot be zero when the outer loop is entered.  I've adjusted
the outer loop to iterate 'n' times which makes us apply store-motion
to 'count' and 'q->data1' but only out of the inner loop and
as expected not apply store motion to 'q->data' at all.

The gcc.dg/predict-20.c testcase relies on broken behavior of
profile estimation when trying to handle __builtin_expect values
flowing into PHI nodes.  I have opened PR109210 and removed
the expected matching from the testcase.

	PR tree-optimization/109170
	* gimple-range-op.cc (cfn_pass_through_arg1): New.
	(gimple_range_op_handler::maybe_builtin_call): Handle
	__builtin_expect and similar via cfn_pass_through_arg1
	and inspecting the calls fnspec.
	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
	and BUILT_IN_EXPECT_WITH_PROBABILITY.

	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
	* gcc.dg/tree-ssa/ssa-lim-21.c: Adjust.
	* gcc.dg/predict-20.c: Likewise.
---
 gcc/builtins.cc                               |  2 ++
 gcc/gimple-range-op.cc                        | 32 ++++++++++++++++++-
 .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++
 gcc/testsuite/gcc.dg/predict-20.c             |  3 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c    |  7 ++--
 5 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 90246e214d6..56545027297 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -11715,6 +11715,8 @@ builtin_fnspec (tree callee)
       case BUILT_IN_RETURN_ADDRESS:
 	return ".c";
       case BUILT_IN_ASSUME_ALIGNED:
+      case BUILT_IN_EXPECT:
+      case BUILT_IN_EXPECT_WITH_PROBABILITY:
 	return "1cX ";
       /* But posix_memalign stores a pointer into the memory pointed to
 	 by its first argument.  */
diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a5d625387e7..1a00f1690e5 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "range.h"
 #include "value-query.h"
 #include "gimple-range.h"
+#include "attr-fnspec.h"
 
 // Given stmt S, fill VEC, up to VEC_SIZE elements, with relevant ssa-names
 // on the statement.  For efficiency, it is an error to not pass in enough
@@ -309,6 +310,26 @@ public:
   }
 } op_cfn_constant_p;
 
+// Implement range operator for integral/pointer functions returning
+// the first argument.
+class cfn_pass_through_arg1 : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  virtual bool fold_range (irange &r, tree, const irange &lh,
+			   const irange &, relation_trio) const
+  {
+    r = lh;
+    return true;
+  }
+  virtual bool op1_range (irange &r, tree, const irange &lhs,
+			  const irange &, relation_trio) const
+  {
+    r = lhs;
+    return true;
+  }
+} op_cfn_pass_through_arg1;
+
 // Implement range operator for CFN_BUILT_IN_SIGNBIT.
 class cfn_signbit : public range_operator_float
 {
@@ -967,6 +988,15 @@ gimple_range_op_handler::maybe_builtin_call ()
       break;
 
     default:
-      break;
+      {
+	unsigned arg;
+	if (gimple_call_fnspec (call).returns_arg (&arg) && arg == 0)
+	  {
+	    m_valid = true;
+	    m_op1 = gimple_call_arg (call, 0);
+	    m_int = &op_cfn_pass_through_arg1;
+	  }
+	break;
+      }
     }
 }
diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
new file mode 100644
index 00000000000..fa7dc66d66c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuse-after-free" } */
+
+unsigned long bufmax = 0;
+unsigned long __open_catalog_bufmax;
+void *realloc(void *, __SIZE_TYPE__);
+void free(void *);
+
+void __open_catalog(char *buf)
+{
+  char *old_buf = buf;
+  buf = realloc (buf, bufmax);
+  if (__builtin_expect ((buf == ((void *)0)), 0))
+    free (old_buf); /* { dg-bogus "used after" } */
+}
diff --git a/gcc/testsuite/gcc.dg/predict-20.c b/gcc/testsuite/gcc.dg/predict-20.c
index 31d01835b80..7bb0d411f88 100644
--- a/gcc/testsuite/gcc.dg/predict-20.c
+++ b/gcc/testsuite/gcc.dg/predict-20.c
@@ -16,8 +16,9 @@ c ()
 	break;
     }
   int d = b < 0;
+  /* We fail to apply __builtin_expect heuristics here.  Se PR109210.  */
   if (__builtin_expect (d, 0))
     asm("");
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_expect heuristics of edge" 3 "profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_expect heuristics of edge" 2 "profile_estimate" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
index ffe6f8f699d..fe29e841f28 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
@@ -17,7 +17,7 @@ void
 func (int m, int n, int k, struct obj *a)
 {
   struct obj *q = a;
-  for (int j = 0; j < m; j++)
+  for (int j = 0; j < n; j++)
     if (__builtin_expect (m, 0))
       for (int i = 0; i < m; i++)
 	{
@@ -31,5 +31,6 @@ func (int m, int n, int k, struct obj *a)
 	}
 }
 
-/* { dg-final { scan-tree-dump-not "Executing store motion of" "lim2"  }  } */
-
+/* { dg-final { scan-tree-dump "Executing store motion of count from loop 2" "lim2"  }  } */
+/* { dg-final { scan-tree-dump "Executing store motion of \[^ \]*data1 from loop 2" "lim2"  }  } */
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 2 "lim2"  }  } */
  
Jakub Jelinek March 20, 2023, 1:22 p.m. UTC | #11
On Mon, Mar 20, 2023 at 12:12:14PM +0000, Richard Biener wrote:
> 	PR tree-optimization/109170
> 	* gimple-range-op.cc (cfn_pass_through_arg1): New.
> 	(gimple_range_op_handler::maybe_builtin_call): Handle
> 	__builtin_expect and similar via cfn_pass_through_arg1
> 	and inspecting the calls fnspec.
> 	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> 	and BUILT_IN_EXPECT_WITH_PROBABILITY.

I'm still worried about this builtins.cc change, can't we defer
that part till GCC 14 where there will be enough time to see if it
doesn't result in some undesirable problems (__builtin_expect* being
optimized away when it still shouldn't etc.)?
> 
> 	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
> 	* gcc.dg/tree-ssa/ssa-lim-21.c: Adjust.
> 	* gcc.dg/predict-20.c: Likewise.

Otherwise LGTM.

	Jakub
  
Richard Biener March 21, 2023, 8:21 a.m. UTC | #12
On Mon, 20 Mar 2023, Jakub Jelinek wrote:

> On Mon, Mar 20, 2023 at 12:12:14PM +0000, Richard Biener wrote:
> > 	PR tree-optimization/109170
> > 	* gimple-range-op.cc (cfn_pass_through_arg1): New.
> > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > 	__builtin_expect and similar via cfn_pass_through_arg1
> > 	and inspecting the calls fnspec.
> > 	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> > 	and BUILT_IN_EXPECT_WITH_PROBABILITY.
> 
> I'm still worried about this builtins.cc change, can't we defer
> that part till GCC 14 where there will be enough time to see if it
> doesn't result in some undesirable problems (__builtin_expect* being
> optimized away when it still shouldn't etc.)?

Sure.  I've retested and pushed the following.

Richard.

From 02face8ff38e5a7942cfcb8c7444e6cca35d7523 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 17 Mar 2023 13:14:49 +0100
Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
 __builtin_expect
To: gcc-patches@gcc.gnu.org

The following adds a missing range-op for __builtin_expect which
helps -Wuse-after-free to detect the case a realloc original
pointer is used when the result was NULL.  The implementation
should handle all argument one pass-through builtins we handle
in the fnspec machinery, but that's defered to GCC 14.

The gcc.dg/tree-ssa/ssa-lim-21.c testcase needs adjustment because

   for (int j = 0; j < m; j++)
     if (__builtin_expect (m, 0))
       for (int i = 0; i < m; i++)

is now correctly optimized to a unconditional jump by EVRP - m
cannot be zero when the outer loop is entered.  I've adjusted
the outer loop to iterate 'n' times which makes us apply store-motion
to 'count' and 'q->data1' but only out of the inner loop and
as expected not apply store motion to 'q->data' at all.

The gcc.dg/predict-20.c testcase relies on broken behavior of
profile estimation when trying to handle __builtin_expect values
flowing into PHI nodes.  I have opened PR109210 and removed
the expected matching from the testcase.

	PR tree-optimization/109170
	* gimple-range-op.cc (cfn_pass_through_arg1): New.
	(gimple_range_op_handler::maybe_builtin_call): Handle
	__builtin_expect via cfn_pass_through_arg1.

	* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
	* gcc.dg/tree-ssa/ssa-lim-21.c: Adjust.
	* gcc.dg/predict-20.c: Likewise.
---
 gcc/gimple-range-op.cc                        | 27 +++++++++++++++++++
 .../gcc.dg/Wuse-after-free-pr109170.c         | 15 +++++++++++
 gcc/testsuite/gcc.dg/predict-20.c             |  3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c    |  7 ++---
 4 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a5d625387e7..c7c546caf43 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -309,6 +309,26 @@ public:
   }
 } op_cfn_constant_p;
 
+// Implement range operator for integral/pointer functions returning
+// the first argument.
+class cfn_pass_through_arg1 : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  virtual bool fold_range (irange &r, tree, const irange &lh,
+			   const irange &, relation_trio) const
+  {
+    r = lh;
+    return true;
+  }
+  virtual bool op1_range (irange &r, tree, const irange &lhs,
+			  const irange &, relation_trio) const
+  {
+    r = lhs;
+    return true;
+  }
+} op_cfn_pass_through_arg1;
+
 // Implement range operator for CFN_BUILT_IN_SIGNBIT.
 class cfn_signbit : public range_operator_float
 {
@@ -966,6 +986,13 @@ gimple_range_op_handler::maybe_builtin_call ()
       m_int = &op_cfn_parity;
       break;
 
+    case CFN_BUILT_IN_EXPECT:
+    case CFN_BUILT_IN_EXPECT_WITH_PROBABILITY:
+      m_valid = true;
+      m_op1 = gimple_call_arg (call, 0);
+      m_int = &op_cfn_pass_through_arg1;
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
new file mode 100644
index 00000000000..14f1350aa29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuse-after-free=2" } */
+
+unsigned long bufmax = 0;
+unsigned long __open_catalog_bufmax;
+void *realloc(void *, __SIZE_TYPE__);
+void free(void *);
+
+void __open_catalog(char *buf)
+{
+  char *old_buf = buf;
+  buf = realloc (buf, bufmax);
+  if (__builtin_expect ((buf == ((void *)0)), 0))
+    free (old_buf); /* { dg-bogus "used after" } */
+}
diff --git a/gcc/testsuite/gcc.dg/predict-20.c b/gcc/testsuite/gcc.dg/predict-20.c
index 31d01835b80..7bb0d411f88 100644
--- a/gcc/testsuite/gcc.dg/predict-20.c
+++ b/gcc/testsuite/gcc.dg/predict-20.c
@@ -16,8 +16,9 @@ c ()
 	break;
     }
   int d = b < 0;
+  /* We fail to apply __builtin_expect heuristics here.  Se PR109210.  */
   if (__builtin_expect (d, 0))
     asm("");
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_expect heuristics of edge" 3 "profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_expect heuristics of edge" 2 "profile_estimate" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
index ffe6f8f699d..fe29e841f28 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c
@@ -17,7 +17,7 @@ void
 func (int m, int n, int k, struct obj *a)
 {
   struct obj *q = a;
-  for (int j = 0; j < m; j++)
+  for (int j = 0; j < n; j++)
     if (__builtin_expect (m, 0))
       for (int i = 0; i < m; i++)
 	{
@@ -31,5 +31,6 @@ func (int m, int n, int k, struct obj *a)
 	}
 }
 
-/* { dg-final { scan-tree-dump-not "Executing store motion of" "lim2"  }  } */
-
+/* { dg-final { scan-tree-dump "Executing store motion of count from loop 2" "lim2"  }  } */
+/* { dg-final { scan-tree-dump "Executing store motion of \[^ \]*data1 from loop 2" "lim2"  }  } */
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 2 "lim2"  }  } */
  
Jakub Jelinek March 21, 2023, 8:23 a.m. UTC | #13
On Tue, Mar 21, 2023 at 08:21:18AM +0000, Richard Biener wrote:
> On Mon, 20 Mar 2023, Jakub Jelinek wrote:
> 
> > On Mon, Mar 20, 2023 at 12:12:14PM +0000, Richard Biener wrote:
> > > 	PR tree-optimization/109170
> > > 	* gimple-range-op.cc (cfn_pass_through_arg1): New.
> > > 	(gimple_range_op_handler::maybe_builtin_call): Handle
> > > 	__builtin_expect and similar via cfn_pass_through_arg1
> > > 	and inspecting the calls fnspec.
> > > 	* builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> > > 	and BUILT_IN_EXPECT_WITH_PROBABILITY.
> > 
> > I'm still worried about this builtins.cc change, can't we defer
> > that part till GCC 14 where there will be enough time to see if it
> > doesn't result in some undesirable problems (__builtin_expect* being
> > optimized away when it still shouldn't etc.)?
> 
> Sure.  I've retested and pushed the following.

Thanks.

	Jakub
  

Patch

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a5d625387e7..f3d6e29e58d 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -309,6 +309,25 @@  public:
   }
 } op_cfn_constant_p;
 
+// Implement range operator for integral CFN_BUILT_IN_EXPECT.
+class cfn_expect : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  virtual bool fold_range (irange &r, tree, const irange &lh,
+			   const irange &, relation_trio) const
+  {
+    r = lh;
+    return true;
+  }
+  virtual bool op1_range (irange &r, tree, const irange &lhs,
+			  const irange &, relation_trio) const
+  {
+    r = lhs;
+    return true;
+  }
+} op_cfn_expect;
+
 // Implement range operator for CFN_BUILT_IN_SIGNBIT.
 class cfn_signbit : public range_operator_float
 {
@@ -966,6 +985,12 @@  gimple_range_op_handler::maybe_builtin_call ()
       m_int = &op_cfn_parity;
       break;
 
+    case CFN_BUILT_IN_EXPECT:
+      m_valid = true;
+      m_op1 = gimple_call_arg (call, 0);
+      m_int = &op_cfn_expect;
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
new file mode 100644
index 00000000000..fa7dc66d66c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuse-after-free" } */
+
+unsigned long bufmax = 0;
+unsigned long __open_catalog_bufmax;
+void *realloc(void *, __SIZE_TYPE__);
+void free(void *);
+
+void __open_catalog(char *buf)
+{
+  char *old_buf = buf;
+  buf = realloc (buf, bufmax);
+  if (__builtin_expect ((buf == ((void *)0)), 0))
+    free (old_buf); /* { dg-bogus "used after" } */
+}