libstdc++: Add fast path for std::format("{}", x) [PR110801]

Message ID 20231117155420.1513704-1-jwakely@redhat.com
State Superseded
Headers
Series libstdc++: Add fast path for std::format("{}", x) [PR110801] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed

Commit Message

Jonathan Wakely Nov. 17, 2023, 3:53 p.m. UTC
  I'll probably push this before stage 1 closes.

I might move the new lambda out to a struct at namespace scope first
though.

-- >8 --

libstdc++-v3/ChangeLog:

	PR libstdc++/110801
	* include/std/format (_Sink_iter::_M_get_pointer)
	(_Sink_iter::_M_end_pointer): New functions
	(_Sink::_M_get_pointer, _Sink::_M_end_pointer): New virtual
	functions.
	(_Seq_sink::_M_get_pointer, _Seq_sink::_M_end_pointer): New
	functions.
	(_Iter_sink<O, ContigIter>::_M_get_pointer): Likewise.
	(__do_vformat_to): Use new functions to optimize "{}" case.
---
 libstdc++-v3/include/std/format | 155 +++++++++++++++++++++++++++++++-
 1 file changed, 154 insertions(+), 1 deletion(-)
  

Patch

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 8ec1c8a0b9a..3a9c64e4ab9 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -2442,6 +2442,10 @@  namespace __format
       iter_difference_t<_Out> size;
     };
 
+_GLIBCXX_BEGIN_NAMESPACE_CONTAINER
+template<typename, typename> class vector;
+_GLIBCXX_END_NAMESPACE_CONTAINER
+
 /// @cond undocumented
 namespace __format
 {
@@ -2492,6 +2496,14 @@  namespace __format
       [[__gnu__::__always_inline__]]
       constexpr _Sink_iter
       operator++(int) { return *this; }
+
+      _CharT*
+      _M_get_pointer(size_t __n) const
+      { return _M_sink->_M_get_pointer(__n); }
+
+      void
+      _M_end_pointer(size_t __n) const
+      { _M_sink->_M_end_pointer(__n); }
     };
 
   // Abstract base class for type-erased character sinks.
@@ -2508,6 +2520,7 @@  namespace __format
       // Called when the span is full, to make more space available.
       // Precondition: _M_next != _M_span.begin()
       // Postcondition: _M_next != _M_span.end()
+      // TODO: remove the precondition? could make overflow handle it.
       virtual void _M_overflow() = 0;
 
     protected:
@@ -2572,6 +2585,32 @@  namespace __format
 	  }
       }
 
+      // If this returns a non-null pointer it can be used to write directly
+      // up to N characters to the sink to avoid unwanted buffering.
+      // If anything is written to the buffer then there must be a call to
+      // _M_end_pointer(n2) before any call to another member function of
+      // this object, where N2 is the number of characters written.
+      // TODO: rewrite this direct access as an RAII type that exposes a span.
+      virtual _CharT*
+      _M_get_pointer(size_t __n)
+      {
+	auto __avail = _M_unused();
+	if (__n <= __avail.size())
+	  return __avail.data();
+
+	if (__n > _M_span.size()) // Cannot meet the request.
+	  return nullptr;
+
+	_M_overflow(); // Make more space available.
+	__avail = _M_unused();
+	return __n <= __avail.size() ? __avail.data() : nullptr;
+      }
+
+      // pre: no calls to _M_write or _M_overflow since _M_get_pointer.
+      virtual void
+      _M_end_pointer(size_t __n)
+      { _M_next += __n; }
+
     public:
       _Sink(const _Sink&) = delete;
       _Sink& operator=(const _Sink&) = delete;
@@ -2596,6 +2635,8 @@  namespace __format
       { }
     };
 
+  using _GLIBCXX_STD_C::vector;
+
   // A sink that fills a sequence (e.g. std::string, std::vector, std::deque).
   // Writes to a buffer then appends that to the sequence when it fills up.
   template<typename _Seq>
@@ -2619,6 +2660,46 @@  namespace __format
 	this->_M_rewind();
       }
 
+      _CharT*
+      _M_get_pointer(size_t __n) override
+      {
+	if constexpr (__is_specialization_of<_Seq, basic_string>
+			|| __is_specialization_of<_Seq, vector>)
+	  {
+	    // Flush the buffer to _M_seq first:
+	    if (this->_M_used().size())
+	      _M_overflow();
+	    // Expand _M_seq to make __n new characters available:
+	    const auto __sz = _M_seq.size();
+	    if constexpr (is_same_v<string, _Seq> || is_same_v<wstring, _Seq>)
+	      _M_seq.__resize_and_overwrite(__sz + __n,
+					    [](auto, auto __n2) {
+					      return __n2;
+					    });
+	    else
+	      _M_seq.resize(__sz + __n);
+	    // Set _M_used() to be a span over the original part of _M_seq:
+	    this->_M_reset(_M_seq, __sz);
+	    // And return a pointer to the new portion:
+	    return this->_M_unused().data();
+	  }
+	else // Try to use the base class' buffer.
+	  return _Sink<_CharT>::_M_get_pointer();
+      }
+
+      void
+      _M_end_pointer(size_t __n) override
+      {
+	if constexpr (__is_specialization_of<_Seq, basic_string>
+			|| __is_specialization_of<_Seq, vector>)
+	  {
+	    // Truncate the sequence to the part that was actually written to:
+	    _M_seq.resize(this->_M_used().size() + __n);
+	    // Switch back to using buffer:
+	    this->_M_reset(this->_M_buf);
+	  }
+      }
+
     public:
       // TODO: for SSO string, use SSO buffer as initial span, then switch
       // to _M_buf if it overflows? Or even do that for all unused capacity?
@@ -2744,6 +2825,21 @@  namespace __format
 	  }
       }
 
+      _CharT*
+      _M_get_pointer(size_t __n) final
+      {
+	auto __avail = this->_M_unused();
+	if (__n > __avail.size())
+	  {
+	    if (_M_max >= 0)
+	      return nullptr; // cannot grow
+
+	    auto __s = this->_M_used();
+	    this->_M_reset({__s.data(), __s.size() + __n}, __s.size());
+	  }
+	return __avail.data();
+      }
+
     private:
       static span<_CharT>
       _S_make_span(_CharT* __ptr, iter_difference_t<_OutIter> __n,
@@ -2773,7 +2869,7 @@  namespace __format
 	uint64_t __off = reinterpret_cast<uint64_t>(__ptr) % 1024;
 	__n = (1024 - __off) / sizeof(_CharT);
 	if (__n > 0) [[likely]]
-	return {__ptr, static_cast<size_t>(__n)};
+	  return {__ptr, static_cast<size_t>(__n)};
 	else // Misaligned/packed buffer of wchar_t?
 	  return {__ptr, 1};
       }
@@ -3835,6 +3931,63 @@  namespace __format
       else
 	__sink_out = __sink.out();
 
+      if constexpr (is_same_v<_CharT, char>)
+	if (__fmt.size() == 2 && __fmt[0] == '{' && __fmt[1] == '}')
+	  {
+	    bool __done = false;
+	    std::visit_format_arg([&](auto& __arg) {
+	      using _Tp = remove_cvref_t<decltype(__arg)>;
+	      if constexpr (is_same_v<_Tp, bool>)
+		{
+		  if (auto __ptr = __sink_out._M_get_pointer(4 + !__arg))
+		    {
+		      if (__arg)
+			__builtin_memcpy(__ptr, "true", 4);
+		      else
+			__builtin_memcpy(__ptr, "false", 5);
+		      __sink_out._M_end_pointer(4 + !__arg);
+		      __done = true;
+		    }
+		}
+	      else if constexpr (is_integral_v<_Tp>)
+		{
+		  make_unsigned_t<_Tp> __uval;
+		  const bool __neg = __arg < 0;
+		  if (__neg)
+		    __uval = make_unsigned_t<_Tp>(~__arg) + 1u;
+		  else
+		    __uval = __arg;
+		  const auto __n = __detail::__to_chars_len(__uval) + __neg;
+		  if (auto __ptr = __sink_out._M_get_pointer(__n))
+		    {
+		      *__ptr = '-';
+		      __detail::__to_chars_10_impl(__ptr + (int)__neg, __n,
+						   __uval);
+		      __sink_out._M_end_pointer(__n);
+		      __done = true;
+		    }
+		}
+	      else if constexpr (is_convertible_v<_Tp, string_view>)
+		{
+		  string_view __sv = __arg;
+		  if (auto __ptr = __sink_out._M_get_pointer(__sv.size()))
+		    {
+		      __builtin_memcpy(__ptr, __sv.data(), __sv.size());
+		      __sink_out._M_end_pointer(__sv.size());
+		      __done = true;
+		    }
+		}
+	    }, __args.get(0));
+
+	    if (__done)
+	      {
+		if constexpr (is_same_v<_Out, _Sink_iter<_CharT>>)
+		  return __sink_out;
+		else
+		  return std::move(__sink)._M_finish().out;
+	      }
+	  }
+
       auto __ctx = __loc == nullptr
 		     ? _Context(__args, __sink_out)
 		     : _Context(__args, __sink_out, *__loc);