_Hashtable implementation cleanup

Message ID f3eb53fe-b930-5162-656e-8adfdb31e797@gmail.com
State Committed
Commit 5476c9142830d01c4b8f2d91e9d439cb32d76378
Headers
Series _Hashtable implementation cleanup |

Commit Message

François Dumont May 10, 2023, 4:59 a.m. UTC
  Hi

Rather than providing a series of patches for _Hashtable I prefer to 
submit them one by one. It will maximize the chances to have some of 
them in gcc 14.

I'm starting with this simple patch to do some cleanup in the current 
implementation to ease compiler optimizations by making some methods 
implicitly inline and avoiding the iterator abstraction when useless.

It is also replacing some faulty usages of __node_type* with __node_ptr. 
It should simplify the patch to make use of allocator custom pointer I 
would like to reactivate.

libstdc++: [_Hashtable] Implement several small methods implicitly inline

Make implementation of 3 simple _Hashtable methods implicitly inline.

Avoid usage of const_iterator abstraction within _Hashtable implementation.

Replace several usages of __node_type* with expected __node_ptr.

libstdc++-v3/ChangeLog:

             * include/bits/hashtable_policy.h
             (_NodeBuilder<>::_S_build): Use __node_ptr.
             (_ReuseOrAllocNode<>): Use __node_ptr in place of __node_type*.
             (_AllocNode<>): Likewise.
             (_Equality<>::_M_equal): Remove const_iterator usages. Only 
preserved
             to call std::is_permutation in the non-unique key 
implementation.
             * include/bits/hashtable.h 
(_Hashtable<>::_M_update_begin()): Capture
             _M_begin() once.
             (_Hashtable<>::_M_bucket_begin(size_type)): Implement 
implicitly inline.
             (_Hashtable<>::_M_insert_bucket_begin): Likewise.
             (_Hashtable<>::_M_remove_bucket_begin): Likewise.
             (_Hashtable<>::_M_compute_hash_code): Use __node_ptr rather 
than
             const_iterator.
             (_Hashtable<>::find): Likewise.
             (_Hashtable<>::_M_emplace): Likewise.
             (_Hashtable<>::_M_insert_unique): Likewise.

Ok to commit ?

François
  

Comments

Jonathan Wakely May 10, 2023, 8:37 a.m. UTC | #1
On Wed, 10 May 2023 at 05:59, François Dumont via Libstdc++ <
libstdc++@gcc.gnu.org> wrote:

> Hi
>
> Rather than providing a series of patches for _Hashtable I prefer to
> submit them one by one. It will maximize the chances to have some of
> them in gcc 14.
>
> I'm starting with this simple patch to do some cleanup in the current
> implementation to ease compiler optimizations by making some methods
> implicitly inline and avoiding the iterator abstraction when useless.
>
> It is also replacing some faulty usages of __node_type* with __node_ptr.
> It should simplify the patch to make use of allocator custom pointer I
> would like to reactivate.
>
> libstdc++: [_Hashtable] Implement several small methods implicitly inline
>
> Make implementation of 3 simple _Hashtable methods implicitly inline.
>
> Avoid usage of const_iterator abstraction within _Hashtable implementation.
>
> Replace several usages of __node_type* with expected __node_ptr.
>
> libstdc++-v3/ChangeLog:
>
>              * include/bits/hashtable_policy.h
>              (_NodeBuilder<>::_S_build): Use __node_ptr.
>              (_ReuseOrAllocNode<>): Use __node_ptr in place of
> __node_type*.
>              (_AllocNode<>): Likewise.
>              (_Equality<>::_M_equal): Remove const_iterator usages. Only
> preserved
>              to call std::is_permutation in the non-unique key
> implementation.
>              * include/bits/hashtable.h
> (_Hashtable<>::_M_update_begin()): Capture
>              _M_begin() once.
>              (_Hashtable<>::_M_bucket_begin(size_type)): Implement
> implicitly inline.
>              (_Hashtable<>::_M_insert_bucket_begin): Likewise.
>              (_Hashtable<>::_M_remove_bucket_begin): Likewise.
>              (_Hashtable<>::_M_compute_hash_code): Use __node_ptr rather
> than
>              const_iterator.
>              (_Hashtable<>::find): Likewise.
>              (_Hashtable<>::_M_emplace): Likewise.
>              (_Hashtable<>::_M_insert_unique): Likewise.
>
> Ok to commit ?
>

OK, thanks
  

Patch

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index d2ff15320fc..954a1c7a58d 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -401,8 +401,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_update_bbegin()
       {
-	if (_M_begin())
-	  _M_buckets[_M_bucket_index(*_M_begin())] = &_M_before_begin;
+	if (auto __begin = _M_begin())
+	  _M_buckets[_M_bucket_index(*__begin)] = &_M_before_begin;
       }
 
       void
@@ -458,7 +458,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // Gets bucket begin, deals with the fact that non-empty buckets contain
       // their before begin node.
       __node_ptr
-      _M_bucket_begin(size_type __bkt) const;
+      _M_bucket_begin(size_type __bkt) const
+      {
+	__node_base_ptr __n = _M_buckets[__bkt];
+	return __n ? static_cast<__node_ptr>(__n->_M_nxt) : nullptr;
+      }
 
       __node_ptr
       _M_begin() const
@@ -831,19 +835,57 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       // Insert a node at the beginning of a bucket.
       void
-      _M_insert_bucket_begin(size_type, __node_ptr);
+      _M_insert_bucket_begin(size_type __bkt, __node_ptr __node)
+      {
+	if (_M_buckets[__bkt])
+	  {
+	    // Bucket is not empty, we just need to insert the new node
+	    // after the bucket before begin.
+	    __node->_M_nxt = _M_buckets[__bkt]->_M_nxt;
+	    _M_buckets[__bkt]->_M_nxt = __node;
+	  }
+	else
+	  {
+	    // The bucket is empty, the new node is inserted at the
+	    // beginning of the singly-linked list and the bucket will
+	    // contain _M_before_begin pointer.
+	    __node->_M_nxt = _M_before_begin._M_nxt;
+	    _M_before_begin._M_nxt = __node;
+
+	    if (__node->_M_nxt)
+	      // We must update former begin bucket that is pointing to
+	      // _M_before_begin.
+	      _M_buckets[_M_bucket_index(*__node->_M_next())] = __node;
+
+	    _M_buckets[__bkt] = &_M_before_begin;
+	  }
+      }
 
       // Remove the bucket first node
       void
       _M_remove_bucket_begin(size_type __bkt, __node_ptr __next_n,
-			     size_type __next_bkt);
+			     size_type __next_bkt)
+      {
+	if (!__next_n || __next_bkt != __bkt)
+	  {
+	    // Bucket is now empty
+	    // First update next bucket if any
+	    if (__next_n)
+	      _M_buckets[__next_bkt] = _M_buckets[__bkt];
+
+	    // Second update before begin node if necessary
+	    if (&_M_before_begin == _M_buckets[__bkt])
+	      _M_before_begin._M_nxt = __next_n;
+	    _M_buckets[__bkt] = nullptr;
+	  }
+      }
 
       // Get the node before __n in the bucket __bkt
       __node_base_ptr
       _M_get_previous_node(size_type __bkt, __node_ptr __n);
 
-      pair<const_iterator, __hash_code>
-      _M_compute_hash_code(const_iterator __hint, const key_type& __k) const;
+      pair<__node_ptr, __hash_code>
+      _M_compute_hash_code(__node_ptr __hint, const key_type& __k) const;
 
       // Insert node __n with hash code __code, in bucket __bkt if no
       // rehash (assumes no element with same key already present).
@@ -1153,20 +1195,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     };
 
   // Definitions of class template _Hashtable's out-of-line member functions.
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _Hash, typename _RangeHash, typename _Unused,
-	   typename _RehashPolicy, typename _Traits>
-    auto
-    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-	       _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
-    _M_bucket_begin(size_type __bkt) const
-    -> __node_ptr
-    {
-      __node_base_ptr __n = _M_buckets[__bkt];
-      return __n ? static_cast<__node_ptr>(__n->_M_nxt) : nullptr;
-    }
-
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _Hash, typename _RangeHash, typename _Unused,
@@ -1653,9 +1681,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (size() <= __small_size_threshold())
 	{
-	  for (auto __it = begin(); __it != end(); ++__it)
-	    if (this->_M_key_equals(__k, *__it._M_cur))
-	      return __it;
+	  for (auto __it = _M_begin(); __it; __it = __it->_M_next())
+	    if (this->_M_key_equals(__k, *__it))
+	      return iterator(__it);
 	  return end();
 	}
 
@@ -1676,9 +1704,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (size() <= __small_size_threshold())
 	{
-	  for (auto __it = begin(); __it != end(); ++__it)
-	    if (this->_M_key_equals(__k, *__it._M_cur))
-	      return __it;
+	  for (auto __it = _M_begin(); __it; __it = __it->_M_next())
+	    if (this->_M_key_equals(__k, *__it))
+	      return const_iterator(__it);
 	  return end();
 	}
 
@@ -1984,63 +2012,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return nullptr;
       }
 
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _Hash, typename _RangeHash, typename _Unused,
-	   typename _RehashPolicy, typename _Traits>
-    void
-    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-	       _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
-    _M_insert_bucket_begin(size_type __bkt, __node_ptr __node)
-    {
-      if (_M_buckets[__bkt])
-	{
-	  // Bucket is not empty, we just need to insert the new node
-	  // after the bucket before begin.
-	  __node->_M_nxt = _M_buckets[__bkt]->_M_nxt;
-	  _M_buckets[__bkt]->_M_nxt = __node;
-	}
-      else
-	{
-	  // The bucket is empty, the new node is inserted at the
-	  // beginning of the singly-linked list and the bucket will
-	  // contain _M_before_begin pointer.
-	  __node->_M_nxt = _M_before_begin._M_nxt;
-	  _M_before_begin._M_nxt = __node;
-
-	  if (__node->_M_nxt)
-	    // We must update former begin bucket that is pointing to
-	    // _M_before_begin.
-	    _M_buckets[_M_bucket_index(*__node->_M_next())] = __node;
-
-	  _M_buckets[__bkt] = &_M_before_begin;
-	}
-    }
-
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _Hash, typename _RangeHash, typename _Unused,
-	   typename _RehashPolicy, typename _Traits>
-    void
-    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-	       _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
-    _M_remove_bucket_begin(size_type __bkt, __node_ptr __next,
-			   size_type __next_bkt)
-    {
-      if (!__next || __next_bkt != __bkt)
-	{
-	  // Bucket is now empty
-	  // First update next bucket if any
-	  if (__next)
-	    _M_buckets[__next_bkt] = _M_buckets[__bkt];
-
-	  // Second update before begin node if necessary
-	  if (&_M_before_begin == _M_buckets[__bkt])
-	    _M_before_begin._M_nxt = __next;
-	  _M_buckets[__bkt] = nullptr;
-	}
-    }
-
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _Hash, typename _RangeHash, typename _Unused,
@@ -2073,10 +2044,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	const key_type& __k = _ExtractKey{}(__node._M_node->_M_v());
 	if (size() <= __small_size_threshold())
 	  {
-	    for (auto __it = begin(); __it != end(); ++__it)
-	      if (this->_M_key_equals(__k, *__it._M_cur))
+	    for (auto __it = _M_begin(); __it; __it = __it->_M_next())
+	      if (this->_M_key_equals(__k, *__it))
 		// There is already an equivalent node, no insertion
-		return { __it, false };
+		return { iterator(__it), false };
 	  }
 
 	__hash_code __code = this->_M_hash_code(__k);
@@ -2108,10 +2079,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_Scoped_node __node { this, std::forward<_Args>(__args)...  };
 	const key_type& __k = _ExtractKey{}(__node._M_node->_M_v());
 
-	auto __res = this->_M_compute_hash_code(__hint, __k);
+	auto __res = this->_M_compute_hash_code(__hint._M_cur, __k);
 	auto __pos
-	  = _M_insert_multi_node(__res.first._M_cur, __res.second,
-				 __node._M_node);
+	  = _M_insert_multi_node(__res.first, __res.second, __node._M_node);
 	__node._M_node = nullptr;
 	return __pos;
       }
@@ -2123,21 +2093,23 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     auto
     _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 	       _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
-    _M_compute_hash_code(const_iterator __hint, const key_type& __k) const
-    -> pair<const_iterator, __hash_code>
+    _M_compute_hash_code(__node_ptr __hint, const key_type& __k) const
+    -> pair<__node_ptr, __hash_code>
     {
       if (size() <= __small_size_threshold())
 	{
-	  if (__hint != cend())
+	  if (__hint)
 	    {
-	      for (auto __it = __hint; __it != cend(); ++__it)
-		if (this->_M_key_equals(__k, *__it._M_cur))
-		  return { __it, this->_M_hash_code(*__it._M_cur) };
+	      for (auto __it = __hint; __it; __it = __it->_M_next())
+		if (this->_M_key_equals(__k, *__it))
+		  return { __it, this->_M_hash_code(*__it) };
 	    }
 
-	  for (auto __it = cbegin(); __it != __hint; ++__it)
-	    if (this->_M_key_equals(__k, *__it._M_cur))
-	      return { __it, this->_M_hash_code(*__it._M_cur) };
+	  for (auto __it = _M_begin(); __it != __hint; __it = __it->_M_next())
+	    if (this->_M_key_equals(__k, *__it))
+	      return { __it, this->_M_hash_code(*__it) };
+
+	  __hint = nullptr;
 	}
 
       return { __hint, this->_M_hash_code(__k) };
@@ -2242,9 +2214,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       -> pair<iterator, bool>
       {
 	if (size() <= __small_size_threshold())
-	  for (auto __it = begin(); __it != end(); ++__it)
-	    if (this->_M_key_equals_tr(__k, *__it._M_cur))
-	      return { __it, false };
+	  for (auto __it = _M_begin(); __it; __it = __it->_M_next())
+	    if (this->_M_key_equals_tr(__k, *__it))
+	      return { iterator(__it), false };
 
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	size_type __bkt = _M_bucket_index(__code);
@@ -2284,11 +2256,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 	// Second compute the hash code so that we don't rehash if it throws.
 	auto __res = this->_M_compute_hash_code(
-	  __hint, _ExtractKey{}(__node._M_node->_M_v()));
+	  __hint._M_cur, _ExtractKey{}(__node._M_node->_M_v()));
 
 	auto __pos
-	  = _M_insert_multi_node(__res.first._M_cur, __res.second,
-				 __node._M_node);
+	  = _M_insert_multi_node(__res.first, __res.second, __node._M_node);
 	__node._M_node = nullptr;
 	return __pos;
       }
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index cce4e2844cf..347d468ea86 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -156,7 +156,7 @@  namespace __detail
       template<typename _Kt, typename _Arg, typename _NodeGenerator>
 	static auto
 	_S_build(_Kt&& __k, _Arg&& __arg, const _NodeGenerator& __node_gen)
-	-> typename _NodeGenerator::__node_type*
+	-> typename _NodeGenerator::__node_ptr
 	{
 	  return __node_gen(std::forward<_Kt>(__k),
 			    std::forward<_Arg>(__arg).second);
@@ -169,7 +169,7 @@  namespace __detail
       template<typename _Kt, typename _Arg, typename _NodeGenerator>
 	static auto
 	_S_build(_Kt&& __k, _Arg&&, const _NodeGenerator& __node_gen)
-	-> typename _NodeGenerator::__node_type*
+	-> typename _NodeGenerator::__node_ptr
 	{ return __node_gen(std::forward<_Kt>(__k)); }
     };
 
@@ -188,9 +188,9 @@  namespace __detail
 	typename __hashtable_alloc::__node_alloc_traits;
 
     public:
-      using __node_type = typename __hashtable_alloc::__node_type;
+      using __node_ptr = typename __hashtable_alloc::__node_ptr;
 
-      _ReuseOrAllocNode(__node_type* __nodes, __hashtable_alloc& __h)
+      _ReuseOrAllocNode(__node_ptr __nodes, __hashtable_alloc& __h)
       : _M_nodes(__nodes), _M_h(__h) { }
       _ReuseOrAllocNode(const _ReuseOrAllocNode&) = delete;
 
@@ -198,12 +198,12 @@  namespace __detail
       { _M_h._M_deallocate_nodes(_M_nodes); }
 
       template<typename... _Args>
-	__node_type*
+	__node_ptr
 	operator()(_Args&&... __args) const
 	{
 	  if (_M_nodes)
 	    {
-	      __node_type* __node = _M_nodes;
+	      __node_ptr __node = _M_nodes;
 	      _M_nodes = _M_nodes->_M_next();
 	      __node->_M_nxt = nullptr;
 	      auto& __a = _M_h._M_node_allocator();
@@ -224,7 +224,7 @@  namespace __detail
 	}
 
     private:
-      mutable __node_type* _M_nodes;
+      mutable __node_ptr _M_nodes;
       __hashtable_alloc& _M_h;
     };
 
@@ -237,13 +237,13 @@  namespace __detail
       using __hashtable_alloc = _Hashtable_alloc<_NodeAlloc>;
 
     public:
-      using __node_type = typename __hashtable_alloc::__node_type;
+      using __node_ptr = typename __hashtable_alloc::__node_ptr;
 
       _AllocNode(__hashtable_alloc& __h)
       : _M_h(__h) { }
 
       template<typename... _Args>
-	__node_type*
+	__node_ptr
 	operator()(_Args&&... __args) const
 	{ return _M_h._M_allocate_node(std::forward<_Args>(__args)...); }
 
@@ -1809,22 +1809,22 @@  namespace __detail
 	      _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits, true>::
     _M_equal(const __hashtable& __other) const
     {
-      using __node_type = typename __hashtable::__node_type;
+      using __node_ptr = typename __hashtable::__node_ptr;
       const __hashtable* __this = static_cast<const __hashtable*>(this);
       if (__this->size() != __other.size())
 	return false;
 
-      for (auto __itx = __this->begin(); __itx != __this->end(); ++__itx)
+      for (auto __x_n = __this->_M_begin(); __x_n; __x_n = __x_n->_M_next())
 	{
-	  std::size_t __ybkt = __other._M_bucket_index(*__itx._M_cur);
+	  std::size_t __ybkt = __other._M_bucket_index(*__x_n);
 	  auto __prev_n = __other._M_buckets[__ybkt];
 	  if (!__prev_n)
 	    return false;
 
-	  for (__node_type* __n = static_cast<__node_type*>(__prev_n->_M_nxt);;
+	  for (__node_ptr __n = static_cast<__node_ptr>(__prev_n->_M_nxt);;
 	       __n = __n->_M_next())
 	    {
-	      if (__n->_M_v() == *__itx)
+	      if (__n->_M_v() == __x_n->_M_v())
 		break;
 
 	      if (!__n->_M_nxt
@@ -1861,31 +1861,32 @@  namespace __detail
 	      _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits, false>::
     _M_equal(const __hashtable& __other) const
     {
-      using __node_type = typename __hashtable::__node_type;
+      using __node_ptr = typename __hashtable::__node_ptr;
+      using const_iterator = typename __hashtable::const_iterator;
       const __hashtable* __this = static_cast<const __hashtable*>(this);
       if (__this->size() != __other.size())
 	return false;
 
-      for (auto __itx = __this->begin(); __itx != __this->end();)
+      for (auto __x_n = __this->_M_begin(); __x_n;)
 	{
 	  std::size_t __x_count = 1;
-	  auto __itx_end = __itx;
-	  for (++__itx_end; __itx_end != __this->end()
-		 && __this->key_eq()(_ExtractKey{}(*__itx),
-				     _ExtractKey{}(*__itx_end));
-	       ++__itx_end)
+	  auto __x_n_end = __x_n->_M_next();
+	  for (; __x_n_end
+		 && __this->key_eq()(_ExtractKey{}(__x_n->_M_v()),
+				     _ExtractKey{}(__x_n_end->_M_v()));
+	       __x_n_end = __x_n_end->_M_next())
 	    ++__x_count;
 
-	  std::size_t __ybkt = __other._M_bucket_index(*__itx._M_cur);
+	  std::size_t __ybkt = __other._M_bucket_index(*__x_n);
 	  auto __y_prev_n = __other._M_buckets[__ybkt];
 	  if (!__y_prev_n)
 	    return false;
 
-	  __node_type* __y_n = static_cast<__node_type*>(__y_prev_n->_M_nxt);
+	  __node_ptr __y_n = static_cast<__node_ptr>(__y_prev_n->_M_nxt);
 	  for (;;)
 	    {
 	      if (__this->key_eq()(_ExtractKey{}(__y_n->_M_v()),
-				   _ExtractKey{}(*__itx)))
+				   _ExtractKey{}(__x_n->_M_v())))
 		break;
 
 	      auto __y_ref_n = __y_n;
@@ -1897,18 +1898,20 @@  namespace __detail
 		return false;
 	    }
 
-	  typename __hashtable::const_iterator __ity(__y_n);
-	  for (auto __ity_end = __ity; __ity_end != __other.end(); ++__ity_end)
+	  auto __y_n_end = __y_n;
+	  for (; __y_n_end; __y_n_end = __y_n_end->_M_next())
 	    if (--__x_count == 0)
 	      break;
 
 	  if (__x_count != 0)
 	    return false;
 
+	  const_iterator __itx(__x_n), __itx_end(__x_n_end);
+	  const_iterator __ity(__y_n);
 	  if (!std::is_permutation(__itx, __itx_end, __ity))
 	    return false;
 
-	  __itx = __itx_end;
+	  __x_n = __x_n_end;
 	}
       return true;
     }