libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS

Message ID YWRrKXG3vMjifCeb@redhat.com
State RFC
Headers
Series libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS |

Commit Message

Jonathan Wakely Oct. 11, 2021, 4:49 p.m. UTC
  This enables lightweight checks for the __glibcxx_requires_valid_range
and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
defined.  By using __builtin_object_size we can check whether the end of
the range is part of the same object as the start of the range, and
detect problems like in PR 89927.

libstdc++-v3/ChangeLog:

	* include/debug/debug.h (__valid_range_p, __valid_range_n): New
	inline functions using __builtin_object_size to check ranges
	delimited by pointers.
	[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
	__valid_range_p.
	[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
	__valid_range_n.


The first patch allows us to detect bugs like string("foo", "bar"),
like in PR 89927. Debug mode cannot currently detect this. The new
check uses the compiler built-in to detect when the two arguments are
not part of the same object. This assumes we're optimizing and the
compiler knows the values of the pointers. If it doesn't, then the
function just returns true and should inline to nothing.

I would like to also enable that for Debug Mode, otherwise we have
checks that work for _GLIBCXX_ASSERTIONS but not for _GLIBCXX_DEBUG. I
tried to make that work with the second patch attached to this mail,
but it doesn't abort for the example in PR 89927. I think puttingthe
checks inside the "real" debug checking functions is too many levels
of inlining and the compiler "forgets" the pointer values.

I think the first patch is worth committing. It should add no overhead
for optimized builds, and diagnoses some bugs that we do not diagnose
today. I'm less sure about the second, since it doesn't actually help.
Maybe the second one should wait for Siddhesh's
__builtin_dynamic_object_size to land on trunk.

Taking this idea further, we could do something similar for
__glibcxx_requires_string, which is currently almost useless (it only
checks if the pointer is null) but could be changed to use
__valid_range_n(_String, char_traits<...>::length(_String))
so that we can diagnose non-null terminated strings (because the
length that char-traits would give us would be larger than the size
that __builtin_object_size would give us).

Thoughts?
commit b008cc08c6b05e32c896ed6e5a3e289ccf8f3c91
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Mon Oct 11 15:58:43 2021

    libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS
    
    This enables lightweight checks for the __glibcxx_requires_valid_range
    and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
    defined.  By using __builtin_object_size we can check whether the end of
    the range is part of the same object as the start of the range, and
    detect problems like in PR 89927.
    
    libstdc++-v3/ChangeLog:
    
            * include/debug/debug.h (__valid_range_p, __valid_range_n): New
            inline functions using __builtin_object_size to check ranges
            delimited by pointers.
            [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
            __valid_range_p.
            [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
            __valid_range_n.
diff --git a/libstdc++-v3/include/debug/debug.h b/libstdc++-v3/include/debug/debug.h
index 1db5aa34c55..15df64f54d9 100644
--- a/libstdc++-v3/include/debug/debug.h
+++ b/libstdc++-v3/include/debug/debug.h
@@ -70,7 +70,11 @@ namespace __gnu_debug
       __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last;
       if (const std::size_t __sz = __builtin_object_size(__first, 3))
 	return __f <= __l && (__l - __f) <= __sz;
+#ifdef _GLIBCXX_DEBUG
+      return __f <= __l && static_cast<bool>(__f) == static_cast<bool>(__l);
+#else
       return true;
+#endif
     }
 
 #ifndef _GLIBCXX_DEBUG
@@ -89,7 +93,11 @@ namespace __gnu_debug
     {
       if (const std::size_t __sz = __builtin_object_size(__first, 3))
 	return __n <= __sz;
+#ifdef _GLIBCXX_DEBUG
+      return static_cast<bool>(__first) == static_cast<bool>(__n);
+#else
       return true;
+#endif
     }
 #endif
 }
diff --git a/libstdc++-v3/include/debug/helper_functions.h b/libstdc++-v3/include/debug/helper_functions.h
index c0144ced979..ed11147b451 100644
--- a/libstdc++-v3/include/debug/helper_functions.h
+++ b/libstdc++-v3/include/debug/helper_functions.h
@@ -266,6 +266,12 @@ namespace __gnu_debug
       return __valid_range_aux(__first, __last, _Integral());
     }
 
+  template<typename _Tp>
+    __attribute__((__always_inline__)) _GLIBCXX14_CONSTEXPR
+    inline bool
+    __valid_range(_Tp* __first, _Tp* __last) _GLIBCXX_NOEXCEPT
+    { return __gnu_debug::__valid_range_p(__first, __last); }
+
   template<typename _Iterator, typename _Sequence, typename _Category>
     bool
     __valid_range(const _Safe_iterator<_Iterator, _Sequence, _Category>&,
@@ -285,6 +291,12 @@ namespace __gnu_debug
     __can_advance(_InputIterator, _Size)
     { return true; }
 
+  template<typename _Tp, typename _Size>
+    __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
+    inline bool
+    __can_advance(_Tp* __p, _Size __n) _GLIBCXX_NOEXCEPT
+    { return __gnu_debug::__valid_range_n(__p, (std::size_t)__n); }
+
   template<typename _Iterator, typename _Sequence, typename _Category,
 	   typename _Size>
     bool
diff --git a/libstdc++-v3/include/debug/macros.h b/libstdc++-v3/include/debug/macros.h
index 9e1288cf4d9..5c655083cb3 100644
--- a/libstdc++-v3/include/debug/macros.h
+++ b/libstdc++-v3/include/debug/macros.h
@@ -461,6 +461,6 @@ _GLIBCXX_DEBUG_VERIFY(_This.get_allocator() == _Other.get_allocator(),	\
 
 #define __glibcxx_check_string(_String) _GLIBCXX_DEBUG_PEDASSERT(_String != 0)
 #define __glibcxx_check_string_len(_String,_Len) \
-  _GLIBCXX_DEBUG_PEDASSERT(_String != 0 || _Len == 0)
+  _GLIBCXX_DEBUG_ASSERT(__gnu_debug::__valid_range_n(_String, _Len))
 
 #endif
diff --git a/libstdc++-v3/include/debug/stl_iterator.h b/libstdc++-v3/include/debug/stl_iterator.h
index edeb42ebe98..65c1828d6e4 100644
--- a/libstdc++-v3/include/debug/stl_iterator.h
+++ b/libstdc++-v3/include/debug/stl_iterator.h
@@ -29,6 +29,7 @@
 #ifndef _GLIBCXX_DEBUG_STL_ITERATOR_H
 #define _GLIBCXX_DEBUG_STL_ITERATOR_H 1
 
+#include <debug/debug.h>
 #include <debug/helper_functions.h>
 
 namespace __gnu_debug
diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc
index 0128535135e..59382defe27 100644
--- a/libstdc++-v3/src/c++11/debug.cc
+++ b/libstdc++-v3/src/c++11/debug.cc
@@ -25,6 +25,7 @@
 #include <bits/move.h>
 #include <bits/stl_iterator_base_types.h>
 
+#include <debug/debug.h>
 #include <debug/formatter.h>
 #include <debug/safe_base.h>
 #include <debug/safe_unordered_base.h>
  

Comments

François Dumont Oct. 14, 2021, 5:10 p.m. UTC | #1
Hi

     On a related subject I am waiting for some feedback on:

https://gcc.gnu.org/pipermail/libstdc++/2021-August/053005.html


On 11/10/21 6:49 pm, Jonathan Wakely wrote:
> This enables lightweight checks for the __glibcxx_requires_valid_range
> and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
> defined.  By using __builtin_object_size we can check whether the end of
> the range is part of the same object as the start of the range, and
> detect problems like in PR 89927.
>
> libstdc++-v3/ChangeLog:
>
> 	* include/debug/debug.h (__valid_range_p, __valid_range_n): New
> 	inline functions using __builtin_object_size to check ranges
> 	delimited by pointers.
> 	[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
> 	__valid_range_p.
> 	[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
> 	__valid_range_n.
>
>
> The first patch allows us to detect bugs like string("foo", "bar"),
> like in PR 89927. Debug mode cannot currently detect this. The new
> check uses the compiler built-in to detect when the two arguments are
> not part of the same object. This assumes we're optimizing and the
> compiler knows the values of the pointers. If it doesn't, then the
> function just returns true and should inline to nothing.

I see, it does not detect that input pointers are unrelated but as they 
are the computed size is >= __sz.

Isn't it UB to compare unrelated pointers ?

> I would like to also enable that for Debug Mode, otherwise we have
> checks that work for _GLIBCXX_ASSERTIONS but not for _GLIBCXX_DEBUG. I
> tried to make that work with the second patch attached to this mail,
> but it doesn't abort for the example in PR 89927. I think puttingthe
> checks inside the "real" debug checking functions is too many levels
> of inlining and the compiler "forgets" the pointer values.
>
> I think the first patch is worth committing. It should add no overhead
> for optimized builds, and diagnoses some bugs that we do not diagnose
> today. I'm less sure about the second, since it doesn't actually help.
> Maybe the second one should wait for Siddhesh's
> __builtin_dynamic_object_size to land on trunk.
>
> Taking this idea further, we could do something similar for
> __glibcxx_requires_string, which is currently almost useless (it only
> checks if the pointer is null) but could be changed to use
> __valid_range_n(_String, char_traits<...>::length(_String))
> so that we can diagnose non-null terminated strings (because the
> length that char-traits would give us would be larger than the size
> that __builtin_object_size would give us).
>
> Thoughts?
>
>
  
Jonathan Wakely Oct. 14, 2021, 5:43 p.m. UTC | #2
On Thu, 14 Oct 2021 at 18:11, François Dumont <frs.dumont@gmail.com> wrote:
>
> Hi
>
>      On a related subject I am waiting for some feedback on:
>
> https://gcc.gnu.org/pipermail/libstdc++/2021-August/053005.html

I'm concerned that this adds too much overhead for the
_GLIBCXX_ASSERTIONS case. It adds function calls which are not
necessarily inlined, and which perform arithmetic and comparisons on
the arguments. That has a runtime cost which is non-zero.

The patches I sent in this thread have zero runtime cost, because they
use the compiler built-in which compiles away to nothing if the sizes
aren't known.

>
> On 11/10/21 6:49 pm, Jonathan Wakely wrote:
> > This enables lightweight checks for the __glibcxx_requires_valid_range
> > and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
> > defined.  By using __builtin_object_size we can check whether the end of
> > the range is part of the same object as the start of the range, and
> > detect problems like in PR 89927.
> >
> > libstdc++-v3/ChangeLog:
> >
> >       * include/debug/debug.h (__valid_range_p, __valid_range_n): New
> >       inline functions using __builtin_object_size to check ranges
> >       delimited by pointers.
> >       [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
> >       __valid_range_p.
> >       [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
> >       __valid_range_n.
> >
> >
> > The first patch allows us to detect bugs like string("foo", "bar"),
> > like in PR 89927. Debug mode cannot currently detect this. The new
> > check uses the compiler built-in to detect when the two arguments are
> > not part of the same object. This assumes we're optimizing and the
> > compiler knows the values of the pointers. If it doesn't, then the
> > function just returns true and should inline to nothing.
>
> I see, it does not detect that input pointers are unrelated but as they
> are the computed size is >= __sz.
>
> Isn't it UB to compare unrelated pointers ?

Yes, and my patch doesn't compare any pointers, does it?
  
François Dumont Oct. 15, 2021, 5:19 a.m. UTC | #3
On 14/10/21 7:43 pm, Jonathan Wakely wrote:
> On Thu, 14 Oct 2021 at 18:11, François Dumont <frs.dumont@gmail.com> wrote:
>> Hi
>>
>>       On a related subject I am waiting for some feedback on:
>>
>> https://gcc.gnu.org/pipermail/libstdc++/2021-August/053005.html
> I'm concerned that this adds too much overhead for the
> _GLIBCXX_ASSERTIONS case. It adds function calls which are not
> necessarily inlined, and which perform arithmetic and comparisons on
> the arguments. That has a runtime cost which is non-zero.

I thought that limiting the checks to __valid_range would be fine for 
_GLIBCXX_ASSERTIONS. If you do not want any overhead you just don't 
define it.

>
> The patches I sent in this thread have zero runtime cost, because they
> use the compiler built-in which compiles away to nothing if the sizes
> aren't known.
I'll try to find out if it can help for the test case on std::copy which 
I was adding with my proposal.
>
>> On 11/10/21 6:49 pm, Jonathan Wakely wrote:
>>> This enables lightweight checks for the __glibcxx_requires_valid_range
>>> and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
>>> defined.  By using __builtin_object_size we can check whether the end of
>>> the range is part of the same object as the start of the range, and
>>> detect problems like in PR 89927.
>>>
>>> libstdc++-v3/ChangeLog:
>>>
>>>        * include/debug/debug.h (__valid_range_p, __valid_range_n): New
>>>        inline functions using __builtin_object_size to check ranges
>>>        delimited by pointers.
>>>        [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
>>>        __valid_range_p.
>>>        [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
>>>        __valid_range_n.
>>>
>>>
>>> The first patch allows us to detect bugs like string("foo", "bar"),
>>> like in PR 89927. Debug mode cannot currently detect this. The new
>>> check uses the compiler built-in to detect when the two arguments are
>>> not part of the same object. This assumes we're optimizing and the
>>> compiler knows the values of the pointers. If it doesn't, then the
>>> function just returns true and should inline to nothing.
>> I see, it does not detect that input pointers are unrelated but as they
>> are the computed size is >= __sz.
>>
>> Isn't it UB to compare unrelated pointers ?
> Yes, and my patch doesn't compare any pointers, does it?
>
+      __UINTPTR_TYPE__ __f = (__UINTPTR_TYPE__)__first;
+      __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last;
+      if (const std::size_t __sz = __builtin_object_size(__first, 3))
+    return __f <= __l && (__l - __f) <= __sz;

Isn't it a comparison ?

But maybe this is what the previous cast is for, I never understood it.

Note that those cast could be moved within the if branch, even if I 
guess that the compiler does it.
  
Jonathan Wakely Oct. 15, 2021, 8:47 a.m. UTC | #4
On Fri, 15 Oct 2021 at 06:19, François Dumont wrote:
>
> On 14/10/21 7:43 pm, Jonathan Wakely wrote:
> > On Thu, 14 Oct 2021 at 18:11, François Dumont <frs.dumont@gmail.com> wrote:
> >> Hi
> >>
> >>       On a related subject I am waiting for some feedback on:
> >>
> >> https://gcc.gnu.org/pipermail/libstdc++/2021-August/053005.html
> > I'm concerned that this adds too much overhead for the
> > _GLIBCXX_ASSERTIONS case. It adds function calls which are not
> > necessarily inlined, and which perform arithmetic and comparisons on
> > the arguments. That has a runtime cost which is non-zero.
>
> I thought that limiting the checks to __valid_range would be fine for
> _GLIBCXX_ASSERTIONS. If you do not want any overhead you just don't
> define it.

Then you get no checks at all. The point of _GLIBCXX_ASSERTIONS is to
get *some* checking, without too much overhead. If you are willing to
accept the overhead we already have _GLIBCXX_DEBUG for that.

We could consider a second level of _GLIBCXX_ASSERTIONS=2 that turns
on extra checks, but we need to be careful about adding any
non-trivial checks to _GLIBCXX_ASSERTIONS=1 (which is what is used
today in major linux distributions, to build every C++ program and
library in the OS).


>
> >
> > The patches I sent in this thread have zero runtime cost, because they
> > use the compiler built-in which compiles away to nothing if the sizes
> > aren't known.
> I'll try to find out if it can help for the test case on std::copy which
> I was adding with my proposal.
> >
> >> On 11/10/21 6:49 pm, Jonathan Wakely wrote:
> >>> This enables lightweight checks for the __glibcxx_requires_valid_range
> >>> and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
> >>> defined.  By using __builtin_object_size we can check whether the end of
> >>> the range is part of the same object as the start of the range, and
> >>> detect problems like in PR 89927.
> >>>
> >>> libstdc++-v3/ChangeLog:
> >>>
> >>>        * include/debug/debug.h (__valid_range_p, __valid_range_n): New
> >>>        inline functions using __builtin_object_size to check ranges
> >>>        delimited by pointers.
> >>>        [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
> >>>        __valid_range_p.
> >>>        [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
> >>>        __valid_range_n.
> >>>
> >>>
> >>> The first patch allows us to detect bugs like string("foo", "bar"),
> >>> like in PR 89927. Debug mode cannot currently detect this. The new
> >>> check uses the compiler built-in to detect when the two arguments are
> >>> not part of the same object. This assumes we're optimizing and the
> >>> compiler knows the values of the pointers. If it doesn't, then the
> >>> function just returns true and should inline to nothing.
> >> I see, it does not detect that input pointers are unrelated but as they
> >> are the computed size is >= __sz.
> >>
> >> Isn't it UB to compare unrelated pointers ?
> > Yes, and my patch doesn't compare any pointers, does it?
> >
> +      __UINTPTR_TYPE__ __f = (__UINTPTR_TYPE__)__first;
> +      __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last;
> +      if (const std::size_t __sz = __builtin_object_size(__first, 3))
> +    return __f <= __l && (__l - __f) <= __sz;
>
> Isn't it a comparison ?

It's not comparing pointers, it's comparing integers. To avoid the
unspecified behaviour of comparing unrelated pointers.

>
> But maybe this is what the previous cast is for, I never understood it.
>
> Note that those cast could be moved within the if branch, even if I
> guess that the compiler does it.

At -O1 the casts are zero cost, they don't generate any code. At -O0,
you have so much overhead for every line of code that this doesn't
make much difference! But yes, we could move them into the if
statement.
  

Patch

diff --git a/libstdc++-v3/include/debug/debug.h b/libstdc++-v3/include/debug/debug.h
index 116f2f023e2..1db5aa34c55 100644
--- a/libstdc++-v3/include/debug/debug.h
+++ b/libstdc++-v3/include/debug/debug.h
@@ -59,12 +59,46 @@  namespace __gnu_debug
 
   template<typename _Ite, typename _Seq, typename _Cat>
     struct _Safe_iterator;
+
+#ifdef _GLIBCXX_ASSERTIONS
+  template<typename _Tp>
+    __attribute__((__always_inline__))
+    _GLIBCXX14_CONSTEXPR inline bool
+    __valid_range_p(_Tp* __first, _Tp* __last) _GLIBCXX_NOEXCEPT
+    {
+      __UINTPTR_TYPE__ __f = (__UINTPTR_TYPE__)__first;
+      __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last;
+      if (const std::size_t __sz = __builtin_object_size(__first, 3))
+	return __f <= __l && (__l - __f) <= __sz;
+      return true;
+    }
+
+#ifndef _GLIBCXX_DEBUG
+  // __glibcxx_requires_valid_range uses this overload for non-pointers.
+  template<typename _Tp>
+    __attribute__((__always_inline__))
+    _GLIBCXX14_CONSTEXPR inline bool
+    __valid_range_p(_Tp, _Tp) _GLIBCXX_NOEXCEPT
+    { return true; }
+#endif
+
+  template<typename _Tp>
+    _GLIBCXX14_CONSTEXPR __attribute__((__always_inline__))
+    inline bool
+    __valid_range_n(_Tp* __first, std::size_t __n) _GLIBCXX_NOEXCEPT
+    {
+      if (const std::size_t __sz = __builtin_object_size(__first, 3))
+	return __n <= __sz;
+      return true;
+    }
+#endif
 }
 
 #ifndef _GLIBCXX_DEBUG
 
 # define __glibcxx_requires_cond(_Cond,_Msg)
-# define __glibcxx_requires_valid_range(_First,_Last)
+# define __glibcxx_requires_valid_range(_First,_Last) \
+  __glibcxx_assert(__gnu_debug::__valid_range_p(_First, _Last))
 # define __glibcxx_requires_can_increment(_First,_Size)
 # define __glibcxx_requires_can_increment_range(_First1,_Last1,_First2)
 # define __glibcxx_requires_can_decrement_range(_First1,_Last1,_First2)
@@ -79,7 +113,8 @@  namespace __gnu_debug
 # define __glibcxx_requires_heap(_First,_Last)
 # define __glibcxx_requires_heap_pred(_First,_Last,_Pred)
 # define __glibcxx_requires_string(_String)
-# define __glibcxx_requires_string_len(_String,_Len)
+# define __glibcxx_requires_string_len(_String,_Len) \
+  __glibcxx_assert(__gnu_debug::__valid_range_n(_String, _Len))
 # define __glibcxx_requires_irreflexive(_First,_Last)
 # define __glibcxx_requires_irreflexive2(_First,_Last)
 # define __glibcxx_requires_irreflexive_pred(_First,_Last,_Pred)