diff mbox series

[v8,1/9] stdlib: Add arc4random, arc4random_buf, and arc4random_uniform (BZ #4417)

Message ID 20220629213428.3065430-2-adhemerval.zanella@linaro.org
State Superseded
Headers show
Series Add arc4random support | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto June 29, 2022, 9:34 p.m. UTC
The implementation is based on scalar Chacha20 with per-thread cache.
It uses getrandom or /dev/urandom as fallback to get the initial entropy,
and reseeds the internal state on every 16MB of consumed buffer.

To improve performance and lower memory consumption the per-thread cache
is allocated lazily on first arc4random functions call, and if the
memory allocation fails getentropy or /dev/urandom is used as fallback.
The cache is also cleared on thread exit iff it was initialized (so if
arc4random is not called it is not touched).

Although it is lock-free, arc4random is still not async-signal-safe
(the per thread state is not updated atomically).

The ChaCha20 implementation is based on RFC8439 [1], omitting the final
XOR of the keystream with the plaintext because the plaintext is a
stream of zeros.  This strategy is similar to what OpenBSD arc4random
does.

The arc4random_uniform is based on previous work by Florian Weimer,
where the algorithm is based on Jérémie Lumbroso paper Optimal Discrete
Uniform Generation from Coin Flips, and Applications (2013) [2], who
credits Donald E. Knuth and Andrew C. Yao, The complexity of nonuniform
random number generation (1976), for solving the general case.

The main advantage of this method is the that the unit of randomness is not
the uniform random variable (uint32_t), but a random bit.  It optimizes the
internal buffer sampling by initially consuming a 32-bit random variable
and then sampling byte per byte.  Depending of the upper bound requested,
it might lead to better CPU utilization.

Checked on x86_64-linux-gnu, aarch64-linux, and powerpc64le-linux-gnu.

Co-authored-by: Florian Weimer <fweimer@redhat.com>
Reviewed-by: Yann Droneaud <ydroneaud@opteya.com>

[1] https://datatracker.ietf.org/doc/html/rfc8439
[2] https://arxiv.org/pdf/1304.1916.pdf
---
 NEWS                                          |   4 +
 include/stdlib.h                              |  13 ++
 malloc/thread-freeres.c                       |   2 +-
 nptl/allocatestack.c                          |   5 +-
 stdlib/Makefile                               |   2 +
 stdlib/Versions                               |   5 +
 stdlib/arc4random.c                           | 207 ++++++++++++++++++
 stdlib/arc4random.h                           |  45 ++++
 stdlib/arc4random_uniform.c                   | 140 ++++++++++++
 stdlib/chacha20.c                             | 187 ++++++++++++++++
 stdlib/stdlib.h                               |  14 ++
 sysdeps/generic/not-cancel.h                  |   2 +
 sysdeps/generic/tls-internal-struct.h         |   3 +
 sysdeps/generic/tls-internal.c                |  17 ++
 sysdeps/generic/tls-internal.h                |   7 +-
 sysdeps/mach/hurd/_Fork.c                     |   2 +
 sysdeps/mach/hurd/i386/libc.abilist           |   3 +
 sysdeps/mach/hurd/not-cancel.h                |   3 +
 sysdeps/nptl/_Fork.c                          |   2 +
 sysdeps/unix/sysv/linux/aarch64/libc.abilist  |   3 +
 sysdeps/unix/sysv/linux/alpha/libc.abilist    |   3 +
 sysdeps/unix/sysv/linux/arc/libc.abilist      |   3 +
 sysdeps/unix/sysv/linux/arm/be/libc.abilist   |   3 +
 sysdeps/unix/sysv/linux/arm/le/libc.abilist   |   3 +
 sysdeps/unix/sysv/linux/csky/libc.abilist     |   3 +
 sysdeps/unix/sysv/linux/hppa/libc.abilist     |   3 +
 sysdeps/unix/sysv/linux/i386/libc.abilist     |   3 +
 sysdeps/unix/sysv/linux/ia64/libc.abilist     |   3 +
 .../sysv/linux/m68k/coldfire/libc.abilist     |   3 +
 .../unix/sysv/linux/m68k/m680x0/libc.abilist  |   3 +
 .../sysv/linux/microblaze/be/libc.abilist     |   3 +
 .../sysv/linux/microblaze/le/libc.abilist     |   3 +
 .../sysv/linux/mips/mips32/fpu/libc.abilist   |   3 +
 .../sysv/linux/mips/mips32/nofpu/libc.abilist |   3 +
 .../sysv/linux/mips/mips64/n32/libc.abilist   |   3 +
 .../sysv/linux/mips/mips64/n64/libc.abilist   |   3 +
 sysdeps/unix/sysv/linux/nios2/libc.abilist    |   3 +
 sysdeps/unix/sysv/linux/not-cancel.h          |   7 +
 sysdeps/unix/sysv/linux/or1k/libc.abilist     |   3 +
 .../linux/powerpc/powerpc32/fpu/libc.abilist  |   3 +
 .../powerpc/powerpc32/nofpu/libc.abilist      |   3 +
 .../linux/powerpc/powerpc64/be/libc.abilist   |   3 +
 .../linux/powerpc/powerpc64/le/libc.abilist   |   3 +
 .../unix/sysv/linux/riscv/rv32/libc.abilist   |   3 +
 .../unix/sysv/linux/riscv/rv64/libc.abilist   |   3 +
 .../unix/sysv/linux/s390/s390-32/libc.abilist |   3 +
 .../unix/sysv/linux/s390/s390-64/libc.abilist |   3 +
 sysdeps/unix/sysv/linux/sh/be/libc.abilist    |   3 +
 sysdeps/unix/sysv/linux/sh/le/libc.abilist    |   3 +
 .../sysv/linux/sparc/sparc32/libc.abilist     |   3 +
 .../sysv/linux/sparc/sparc64/libc.abilist     |   3 +
 sysdeps/unix/sysv/linux/tls-internal.c        |  38 +++-
 sysdeps/unix/sysv/linux/tls-internal.h        |  16 +-
 .../unix/sysv/linux/x86_64/64/libc.abilist    |   3 +
 .../unix/sysv/linux/x86_64/x32/libc.abilist   |   3 +
 55 files changed, 808 insertions(+), 15 deletions(-)
 create mode 100644 stdlib/arc4random.c
 create mode 100644 stdlib/arc4random.h
 create mode 100644 stdlib/arc4random_uniform.c
 create mode 100644 stdlib/chacha20.c

Comments

Florian Weimer July 12, 2022, 8:57 a.m. UTC | #1
* Adhemerval Zanella:

> diff --git a/NEWS b/NEWS
> index b0a3d7e512..f9dc316ead 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -46,6 +46,10 @@ Major new features:
>    to more flexibly configure and operate on filesystem mounts.  The new
>    mount APIs are specifically designed to work with namespaces.
>  
> +* The functions arc4random, arc4random_buf, and arc4random_uniform have been
> +  added.  The functions use a cryptographic pseudo-random number generator
> +  based on ChaCha20 initilized with entropy from the kernel.

This implies that the generator is cryptographically strong.  Maybe it
should not mention ChaCha20 and “cryptographic”.

> diff --git a/include/stdlib.h b/include/stdlib.h
> index 1c6f70b082..c5f5628f22 100644
> --- a/include/stdlib.h
> +++ b/include/stdlib.h

> +/* Called from the fork function to reinitialize the internal lock in the
> +   child process.  This avoids deadlocks if fork is called in multi-threaded
> +   processes.  */
> +extern void __arc4random_fork_subprocess (void) attribute_hidden;

Looks like the comment is outdated.  There isn't a lock anymore.

> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 01a282f3f6..ada65d40c2 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c

> @@ -559,6 +560,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  #endif
>    pd->robust_head.list = &pd->robust_head;
>  
> +  __glibc_tls_internal_init (&pd->tls_state);

I think this is a bit confusing because it's not aligned with the main
thread.  memset would probably be clearer because it mirrors setup for
the main thread (zeroed implicitly).

> diff --git a/stdlib/arc4random.c b/stdlib/arc4random.c
> new file mode 100644
> index 0000000000..92d1da92cc
> --- /dev/null
> +++ b/stdlib/arc4random.c
> @@ -0,0 +1,207 @@
> +/* Pseudo Random Number Generator based on ChaCha20.
> +   Copyright (C) 2020 Free Software Foundation, Inc.

Copyright year 2022.  Please check the other files, too.

> +/* arc4random keeps two counters: 'have' is the current valid bytes not yet
> +   consumed in 'buf' while 'count' is the maximum number of bytes until a
> +   reseed.
> +
> +   Both the initial seed and reseed try to obtain entropy from the kernel
> +   and abort the process if none could be obtained.
> +
> +   The state 'buf' improves the usage of the cipher calls, allowing to call
> +   optimized implementations (if the architecture provides it) and optimize
> +   arc4random calls (since only multiple calls it will encrypt the next
> +   block).  */

I don't understand the “since only multiple calls it will encrypt the
next block” part.

> +/* Called from the fork function to reset the state.  */
> +void
> +__arc4random_fork_subprocess (void)
> +{
> +  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;
> +  if (state != NULL)
> +    {
> +      explicit_bzero (state, sizeof (struct arc4random_state_t));

sizeof (*state)?

> +      /* Force key init.  */
> +      state->count = -1;
> +    }
> +}
> +
> +/* Return the current thread random state or try to create one if there is
> +   none available.  In the case malloc can not allocate a state, arc4random
> +   will try to get entropy with arc4random_getentropy.  */
> +static struct arc4random_state_t *
> +arc4random_get_state (void)
> +{
> +  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;

Missing space before ().

> +  if (state == NULL)
> +    {
> +      state = malloc (sizeof (struct arc4random_state_t));
> +      if (state != NULL)
> +	{
> +	  /* Force key initialization on first call.  */
> +	  state->count = -1;
> +	  __glibc_tls_internal()->rand_state = state;

Missing space before ().

> +	}
> +    }
> +  return state;
> +}
> +
> +static void
> +arc4random_getrandom_failure (void)
> +{
> +  __libc_fatal ("Fatal glibc error: cannot get entropy for arc4random\n");
> +}
> +
> +static void
> +arc4random_rekey (struct arc4random_state_t *state, uint8_t *rnd, size_t rndlen)
> +{
> +  chacha20_crypt (state->ctx, state->buf, state->buf, sizeof state->buf);
> +
> +  /* Mix some extra entropy if provided.  */
> +  if (rnd != NULL)
> +    {
> +      size_t m = MIN (rndlen, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
> +      for (size_t i = 0; i < m; i++)
> +	state->buf[i] ^= rnd[i];
> +    }
> +
> +  /* Immediately reinit for backtracking resistance.  */
> +  chacha20_init (state->ctx, state->buf, state->buf + CHACHA20_KEY_SIZE);
> +  memset (state->buf, 0, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
> +  state->have = sizeof (state->buf) - (CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
> +}

Should that memset be explicit_bzero for clarity?

> +static void
> +arc4random_getentropy (void *rnd, size_t len)
> +{
> +  if (__getrandom_nocancel (rnd, len, GRND_NONBLOCK) == len)
> +    return;
> +
> +  int fd = __open64_nocancel ("/dev/urandom", O_RDONLY | O_CLOEXEC);

This could use TEMP_FAILURE_RETRY, too.

> +  if (fd != -1)
> +    {
> +      uint8_t *p = rnd;
> +      uint8_t *end = p + len;
> +      do
> +	{
> +	  ssize_t ret = TEMP_FAILURE_RETRY (__read_nocancel (fd, p, end - p));
> +	  if (ret <= 0)
> +	    arc4random_getrandom_failure ();
> +	  p += ret;
> +	}
> +      while (p < end);
> +
> +      if (__close_nocancel (fd) == 0)
> +	return;
> +    }
> +  arc4random_getrandom_failure ();
> +}
> +
> +/* Reinit the thread context by reseeding the cipher state with kernel
> +   entropy.  */
> +static void
> +arc4random_check_stir (struct arc4random_state_t *state, size_t len)

Could you add a comment describing the len parameter?

> +{
> +  if (state->count < len || state->count == -1)
> +    {
> +      uint8_t rnd[CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE];
> +      arc4random_getentropy (rnd, sizeof rnd);
> +
> +      if (state->count == -1)
> +	chacha20_init (state->ctx, rnd, rnd + CHACHA20_KEY_SIZE);
> +      else
> +	arc4random_rekey (state, rnd, sizeof rnd);
> +
> +      explicit_bzero (rnd, sizeof rnd);
> +
> +      state->have = 0;
> +      memset (state->buf, 0, sizeof state->buf);
> +      state->count = CHACHA20_RESEED_SIZE;
> +    }
> +  if (state->count <= len)
> +    state->count = 0;
> +  else
> +    state->count -= len;
> +}

It's not clear to me if we want to enter the true branch of the second
if statement if we already executed the true branch of the first if
statement.

> +void
> +__arc4random_buf (void *buffer, size_t len)
> +{
> +  struct arc4random_state_t *state = arc4random_get_state ();
> +  if (__glibc_unlikely (state == NULL))
> +    {
> +      arc4random_getentropy (buffer, len);
> +      return;
> +    }
> +
> +  arc4random_check_stir (state, len);
> +  while (len > 0)
> +    {
> +      if (state->have > 0)
> +	{
> +	  size_t m = MIN (len, state->have);
> +	  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
> +	  memcpy (buffer, ks, m);
> +	  memset (ks, 0, m);

Should be explicit_bzero, I think.

> +	  buffer += m;
> +	  len -= m;
> +	  state->have -= m;
> +	}
> +      if (state->have == 0)
> +	arc4random_rekey (state, NULL, 0);
> +    }
> +}
> +libc_hidden_def (__arc4random_buf)
> +weak_alias (__arc4random_buf, arc4random_buf)
> +
> +uint32_t
> +__arc4random (void)
> +{
> +  uint32_t r;
> +
> +  struct arc4random_state_t *state = arc4random_get_state ();
> +  if (__glibc_unlikely (state == NULL))
> +    {
> +      arc4random_getentropy (&r, sizeof (uint32_t));
> +      return r;
> +    }
> +
> +  arc4random_check_stir (state, sizeof (uint32_t));
> +  if (state->have < sizeof (uint32_t))
> +    arc4random_rekey (state, NULL, 0);
> +  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
> +  memcpy (&r, ks, sizeof (uint32_t));
> +  memset (ks, 0, sizeof (uint32_t));
> +  state->have -= sizeof (uint32_t);
> +
> +  return r;
> +}

Why not simply call __arc4random_buf?  If you want to retain the
optimization, turn the implementation of __arc4random_buf into an inline
function and call it here and from __arc4random_buf.

> +libc_hidden_def (__arc4random)
> +weak_alias (__arc4random, arc4random)
> diff --git a/stdlib/arc4random.h b/stdlib/arc4random.h
> new file mode 100644
> index 0000000000..cdca639d9d
> --- /dev/null
> +++ b/stdlib/arc4random.h
> @@ -0,0 +1,45 @@

> +/* Internal arc4random buffer, used on each feedback step so offer some
> +   backtracking protection and to allow better used of vectorized
> +   chacha20 implementations.  */
> +#define CHACHA20_BUFSIZE        (8 * CHACHA20_BLOCK_SIZE)

Please add a _Static_assert that this is larger than CHACHA20_KEY_SIZE +
CHACHA20_IV_SIZE because the implementation assumes this.

> diff --git a/stdlib/chacha20.c b/stdlib/chacha20.c
> new file mode 100644
> index 0000000000..4549fc780f
> --- /dev/null
> +++ b/stdlib/chacha20.c
> @@ -0,0 +1,187 @@

> +/* 32-bit stream position, then 96-bit nonce.  */
> +#define CHACHA20_IV_SIZE	16
> +#define CHACHA20_KEY_SIZE	32
> +
> +#define CHACHA20_STATE_LEN	16
> +
> +/* The ChaCha20 implementation is based on RFC8439 [1], omitting the final
> +   XOR of the keystream with the plaintext because the plaintext is a
> +   stream of zeros.  */

You can also remove the byte swapping because the key is random and
discarded, so we don't care about its precise value.  You can do the
swapping in the ChaCha20 test instead, to get reproducible test vectors.

Hmm, given that this impacts the assembler implementations as well,
maybe we can do this at a later stage.

> diff --git a/stdlib/stdlib.h b/stdlib/stdlib.h
> index bf7cd438e1..f02a713a7b 100644
> --- a/stdlib/stdlib.h
> +++ b/stdlib/stdlib.h
> @@ -485,6 +485,7 @@ extern unsigned short int *seed48 (unsigned short int __seed16v[3])
>  extern void lcong48 (unsigned short int __param[7]) __THROW __nonnull ((1));
>  
>  # ifdef __USE_MISC
> +#  include <bits/stdint-uintn.h>
>  /* Data structure for communication with thread safe versions.  This
>     type is to be regarded as opaque.  It's only exported because users
>     have to allocate objects of this type.  */

> +/* Return a random number between zero (inclusive) and the specified
> +   limit (exclusive).  */
> +extern uint32_t arc4random_uniform (uint32_t __upper_bound)
> +     __THROW __wur;
>  # endif	/* Use misc.  */
>  #endif	/* Use misc or X/Open.  */

I'm a bit worried about that <bits/stdint-uintn.h> inclusion.  It will
be a bit confusing if these types become available via <stdlib.h> in
some glibc versions.  Maybe use __uint32_t instead?

> diff --git a/sysdeps/generic/tls-internal-struct.h b/sysdeps/generic/tls-internal-struct.h
> index d76c715a96..81a71ac54b 100644
> --- a/sysdeps/generic/tls-internal-struct.h
> +++ b/sysdeps/generic/tls-internal-struct.h
> @@ -19,10 +19,13 @@
>  #ifndef _TLS_INTERNAL_STRUCT_H
>  #define _TLS_INTERNAL_STRUCT_H 1
>  
> +#include <stdlib/arc4random.h>
> +

You can use a forward declaration instead.  Minimizing <descr.h>
dependencies seems desirable to me.

>  struct tls_internal_t
>  {
>    char *strsignal_buf;
>    char *strerror_l_buf;
> +  struct arc4random_state_t *rand_state;
>  };


> diff --git a/sysdeps/generic/tls-internal.c b/sysdeps/generic/tls-internal.c
> index 898c20b61c..ec0ceeebd1 100644
> --- a/sysdeps/generic/tls-internal.c
> +++ b/sysdeps/generic/tls-internal.c
> @@ -16,6 +16,23 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#include <string.h>
>  #include <tls-internal.h>
>  
>  __thread struct tls_internal_t __tls_internal;
> +
> +void
> +__glibc_tls_internal_free (void)
> +{
> +  free (__tls_internal.strsignal_buf);
> +  free (__tls_internal.strerror_l_buf);
> +
> +  if (__tls_internal.rand_state != NULL)
> +    {
> +      /* Clear any lingering random state prior so if the thread stack is
> +	 cached it won't leak any data.  */
> +      explicit_bzero (__tls_internal.rand_state,
> +		      sizeof (struct arc4random_state_t));
> +      free (__tls_internal.rand_state);
> +    }

sizeof (*__tls_internal.rand_state)?

> diff --git a/sysdeps/mach/hurd/_Fork.c b/sysdeps/mach/hurd/_Fork.c
> index e60b86fab1..1c44b39c5b 100644
> --- a/sysdeps/mach/hurd/_Fork.c
> +++ b/sysdeps/mach/hurd/_Fork.c
> @@ -665,6 +665,8 @@ retry:
>        /* Run things that want to run in the child task to set up.  */
>        RUN_HOOK (_hurd_fork_child_hook, ());
>  
> +      call_function_static_weak (__arc4random_fork_subprocess);
> +

Hmm, is the ordering correct?  Can _hurd_fork_child_hook run user code?
(This probably needs review from Hurd developers.)

> diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
> index 6e25b021ab..045176197e 100644
> --- a/sysdeps/unix/sysv/linux/tls-internal.c
> +++ b/sysdeps/unix/sysv/linux/tls-internal.c
> @@ -1 +1,37 @@

> +void
> +__glibc_tls_internal_free (void)
> +{
> +  struct pthread *self = THREAD_SELF;
> +  free (self->tls_state.strsignal_buf);
> +  free (self->tls_state.strerror_l_buf);
> +
> +  if (self->tls_state.rand_state != NULL)
> +    {
> +      /* Clear any lingering random state prior so if the thread stack is
> +         cached it won't leak any data.  */
> +      explicit_bzero (self->tls_state.rand_state,
> +		      sizeof (struct arc4random_state_t));

sizeof (self->tls_state.rand_state)?

> diff --git a/sysdeps/unix/sysv/linux/tls-internal.h b/sysdeps/unix/sysv/linux/tls-internal.h
> index f7a1a62135..f268a2d43b 100644
> --- a/sysdeps/unix/sysv/linux/tls-internal.h
> +++ b/sysdeps/unix/sysv/linux/tls-internal.h
> @@ -22,17 +22,21 @@
>  #include <stdlib.h>
>  #include <pthreadP.h>
>  
> +static inline void
> +__glibc_tls_internal_init (struct tls_internal_t *tls_state)
> +{
> +  tls_state->strsignal_buf = NULL;
> +  tls_state->strerror_l_buf = NULL;
> +  tls_state->rand_state = NULL;
> +}

See the comment about using memset earlier.

Thanks,
Florian
Adhemerval Zanella Netto July 12, 2022, 4:57 p.m. UTC | #2
On 12/07/22 05:57, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> diff --git a/NEWS b/NEWS
>> index b0a3d7e512..f9dc316ead 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -46,6 +46,10 @@ Major new features:
>>     to more flexibly configure and operate on filesystem mounts.  The new
>>     mount APIs are specifically designed to work with namespaces.
>>   
>> +* The functions arc4random, arc4random_buf, and arc4random_uniform have been
>> +  added.  The functions use a cryptographic pseudo-random number generator
>> +  based on ChaCha20 initilized with entropy from the kernel.
> 
> This implies that the generator is cryptographically strong.  Maybe it
> should not mention ChaCha20 and “cryptographic”.

Ack, I changed to:

   * The functions arc4random, arc4random_buf, and arc4random_uniform 
have been
   added.  The functions use a pseudo-random number generator along with
   entropy from the kernel.

> 
>> diff --git a/include/stdlib.h b/include/stdlib.h
>> index 1c6f70b082..c5f5628f22 100644
>> --- a/include/stdlib.h
>> +++ b/include/stdlib.h
> 
>> +/* Called from the fork function to reinitialize the internal lock in the
>> +   child process.  This avoids deadlocks if fork is called in multi-threaded
>> +   processes.  */
>> +extern void __arc4random_fork_subprocess (void) attribute_hidden;
> 
> Looks like the comment is outdated.  There isn't a lock anymore.

Indeed, I changed to:

/* Called from the fork function to reinitialize the internal cipher state
   in child process.  */

> 
>> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
>> index 01a282f3f6..ada65d40c2 100644
>> --- a/nptl/allocatestack.c
>> +++ b/nptl/allocatestack.c
> 
>> @@ -559,6 +560,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>>   #endif
>>     pd->robust_head.list = &pd->robust_head;
>>   
>> +  __glibc_tls_internal_init (&pd->tls_state);
> 
> I think this is a bit confusing because it's not aligned with the main
> thread.  memset would probably be clearer because it mirrors setup for
> the main thread (zeroed implicitly).

Alright, I have added it on previous version because it required non
zero initialization.  I will change memset.

> 
>> diff --git a/stdlib/arc4random.c b/stdlib/arc4random.c
>> new file mode 100644
>> index 0000000000..92d1da92cc
>> --- /dev/null
>> +++ b/stdlib/arc4random.c
>> @@ -0,0 +1,207 @@
>> +/* Pseudo Random Number Generator based on ChaCha20.
>> +   Copyright (C) 2020 Free Software Foundation, Inc.
> 
> Copyright year 2022.  Please check the other files, too.

Ack.

> 
>> +/* arc4random keeps two counters: 'have' is the current valid bytes not yet
>> +   consumed in 'buf' while 'count' is the maximum number of bytes until a
>> +   reseed.
>> +
>> +   Both the initial seed and reseed try to obtain entropy from the kernel
>> +   and abort the process if none could be obtained.
>> +
>> +   The state 'buf' improves the usage of the cipher calls, allowing to call
>> +   optimized implementations (if the architecture provides it) and optimize
>> +   arc4random calls (since only multiple calls it will encrypt the next
>> +   block).  */
> 
> I don't understand the “since only multiple calls it will encrypt the
> next block” part.

I changed to 'and minimize function call overhead'.  Using the generic
implementation, a 8 times the chacha20 blocks buffer shows about 2x more
throughput um aarch64.

A buffer with 4x the chacha20 block size shows slight less performance,
so one option might to make the buffer sizes arch-specific (since AVX2,
and potentially AVX512 requires large block size for the arch-specific
implementations).

> 
>> +/* Called from the fork function to reset the state.  */
>> +void
>> +__arc4random_fork_subprocess (void)
>> +{
>> +  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;
>> +  if (state != NULL)
>> +    {
>> +      explicit_bzero (state, sizeof (struct arc4random_state_t));
> 
> sizeof (*state)?

Ack.

> 
>> +      /* Force key init.  */
>> +      state->count = -1;
>> +    }
>> +}
>> +
>> +/* Return the current thread random state or try to create one if there is
>> +   none available.  In the case malloc can not allocate a state, arc4random
>> +   will try to get entropy with arc4random_getentropy.  */
>> +static struct arc4random_state_t *
>> +arc4random_get_state (void)
>> +{
>> +  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;
> 
> Missing space before ().

Ack.

> 
>> +  if (state == NULL)
>> +    {
>> +      state = malloc (sizeof (struct arc4random_state_t));
>> +      if (state != NULL)
>> +	{
>> +	  /* Force key initialization on first call.  */
>> +	  state->count = -1;
>> +	  __glibc_tls_internal()->rand_state = state;
> 
> Missing space before ().

Ack.

> 
>> +	}
>> +    }
>> +  return state;
>> +}
>> +
>> +static void
>> +arc4random_getrandom_failure (void)
>> +{
>> +  __libc_fatal ("Fatal glibc error: cannot get entropy for arc4random\n");
>> +}
>> +
>> +static void
>> +arc4random_rekey (struct arc4random_state_t *state, uint8_t *rnd, size_t rndlen)
>> +{
>> +  chacha20_crypt (state->ctx, state->buf, state->buf, sizeof state->buf);
>> +
>> +  /* Mix some extra entropy if provided.  */
>> +  if (rnd != NULL)
>> +    {
>> +      size_t m = MIN (rndlen, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
>> +      for (size_t i = 0; i < m; i++)
>> +	state->buf[i] ^= rnd[i];
>> +    }
>> +
>> +  /* Immediately reinit for backtracking resistance.  */
>> +  chacha20_init (state->ctx, state->buf, state->buf + CHACHA20_KEY_SIZE);
>> +  memset (state->buf, 0, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
>> +  state->have = sizeof (state->buf) - (CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
>> +}
> 
> Should that memset be explicit_bzero for clarity?

Ack, it makes sense.

> 
>> +static void
>> +arc4random_getentropy (void *rnd, size_t len)
>> +{
>> +  if (__getrandom_nocancel (rnd, len, GRND_NONBLOCK) == len)
>> +    return;
>> +
>> +  int fd = __open64_nocancel ("/dev/urandom", O_RDONLY | O_CLOEXEC);
> 
> This could use TEMP_FAILURE_RETRY, too.

Hum, it makes sense since we abort on a failure.

> 
>> +  if (fd != -1)
>> +    {
>> +      uint8_t *p = rnd;
>> +      uint8_t *end = p + len;
>> +      do
>> +	{
>> +	  ssize_t ret = TEMP_FAILURE_RETRY (__read_nocancel (fd, p, end - p));
>> +	  if (ret <= 0)
>> +	    arc4random_getrandom_failure ();
>> +	  p += ret;
>> +	}
>> +      while (p < end);
>> +
>> +      if (__close_nocancel (fd) == 0)
>> +	return;
>> +    }
>> +  arc4random_getrandom_failure ();
>> +}
>> +
>> +/* Reinit the thread context by reseeding the cipher state with kernel
>> +   entropy.  */
>> +static void
>> +arc4random_check_stir (struct arc4random_state_t *state, size_t len)
> 
> Could you add a comment describing the len parameter?

I changed to:

/* Check if the thread context STATE should be reseed with kernel entropy
    depending of requested LEN bytes.  If there is less than requested,
    the state is either initialized or reseed, otherwise the internal
    counter subtract the requested lenght.  */


> 
>> +{
>> +  if (state->count < len || state->count == -1)
>> +    {
>> +      uint8_t rnd[CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE];
>> +      arc4random_getentropy (rnd, sizeof rnd);
>> +
>> +      if (state->count == -1)
>> +	chacha20_init (state->ctx, rnd, rnd + CHACHA20_KEY_SIZE);
>> +      else
>> +	arc4random_rekey (state, rnd, sizeof rnd);
>> +
>> +      explicit_bzero (rnd, sizeof rnd);
>> +
>> +      state->have = 0;
>> +      memset (state->buf, 0, sizeof state->buf);
>> +      state->count = CHACHA20_RESEED_SIZE;
>> +    }
>> +  if (state->count <= len)
>> +    state->count = 0;
>> +  else
>> +    state->count -= len;
>> +}
> 
> It's not clear to me if we want to enter the true branch of the second
> if statement if we already executed the true branch of the first if
> statement.

It is not needed, neither we need to set the count to 0 since we
already reinitalized it.

> 
>> +void
>> +__arc4random_buf (void *buffer, size_t len)
>> +{
>> +  struct arc4random_state_t *state = arc4random_get_state ();
>> +  if (__glibc_unlikely (state == NULL))
>> +    {
>> +      arc4random_getentropy (buffer, len);
>> +      return;
>> +    }
>> +
>> +  arc4random_check_stir (state, len);
>> +  while (len > 0)
>> +    {
>> +      if (state->have > 0)
>> +	{
>> +	  size_t m = MIN (len, state->have);
>> +	  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
>> +	  memcpy (buffer, ks, m);
>> +	  memset (ks, 0, m);
> 
> Should be explicit_bzero, I think.

Ack.

> 
>> +	  buffer += m;
>> +	  len -= m;
>> +	  state->have -= m;
>> +	}
>> +      if (state->have == 0)
>> +	arc4random_rekey (state, NULL, 0);
>> +    }
>> +}
>> +libc_hidden_def (__arc4random_buf)
>> +weak_alias (__arc4random_buf, arc4random_buf)
>> +
>> +uint32_t
>> +__arc4random (void)
>> +{
>> +  uint32_t r;
>> +
>> +  struct arc4random_state_t *state = arc4random_get_state ();
>> +  if (__glibc_unlikely (state == NULL))
>> +    {
>> +      arc4random_getentropy (&r, sizeof (uint32_t));
>> +      return r;
>> +    }
>> +
>> +  arc4random_check_stir (state, sizeof (uint32_t));
>> +  if (state->have < sizeof (uint32_t))
>> +    arc4random_rekey (state, NULL, 0);
>> +  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
>> +  memcpy (&r, ks, sizeof (uint32_t));
>> +  memset (ks, 0, sizeof (uint32_t));
>> +  state->have -= sizeof (uint32_t);
>> +
>> +  return r;
>> +}
> 
> Why not simply call __arc4random_buf?  If you want to retain the
> optimization, turn the implementation of __arc4random_buf into an inline
> function and call it here and from __arc4random_buf.

I actually tried in some interation and I recall that it yield some
worse throughput.  I just tested again and it holds true, on
an aarch64 system current approach with generic implementation yields
290 MB/s while calling arc4random_buf shows 172 MB/s.

I am trying to decompose the function to eliminate the need of the
loop (which I think compiler can't optimize away for arc4random)
but I don't think it would be simpler than open code the logic
on both functions.

> 
>> +libc_hidden_def (__arc4random)
>> +weak_alias (__arc4random, arc4random)
>> diff --git a/stdlib/arc4random.h b/stdlib/arc4random.h
>> new file mode 100644
>> index 0000000000..cdca639d9d
>> --- /dev/null
>> +++ b/stdlib/arc4random.h
>> @@ -0,0 +1,45 @@
> 
>> +/* Internal arc4random buffer, used on each feedback step so offer some
>> +   backtracking protection and to allow better used of vectorized
>> +   chacha20 implementations.  */
>> +#define CHACHA20_BUFSIZE        (8 * CHACHA20_BLOCK_SIZE)
> 
> Please add a _Static_assert that this is larger than CHACHA20_KEY_SIZE +
> CHACHA20_IV_SIZE because the implementation assumes this.

Ack.

> 
>> diff --git a/stdlib/chacha20.c b/stdlib/chacha20.c
>> new file mode 100644
>> index 0000000000..4549fc780f
>> --- /dev/null
>> +++ b/stdlib/chacha20.c
>> @@ -0,0 +1,187 @@
> 
>> +/* 32-bit stream position, then 96-bit nonce.  */
>> +#define CHACHA20_IV_SIZE	16
>> +#define CHACHA20_KEY_SIZE	32
>> +
>> +#define CHACHA20_STATE_LEN	16
>> +
>> +/* The ChaCha20 implementation is based on RFC8439 [1], omitting the final
>> +   XOR of the keystream with the plaintext because the plaintext is a
>> +   stream of zeros.  */
> 
> You can also remove the byte swapping because the key is random and
> discarded, so we don't care about its precise value.  You can do the
> swapping in the ChaCha20 test instead, to get reproducible test vectors.
> 
> Hmm, given that this impacts the assembler implementations as well,
> maybe we can do this at a later stage.

Right, I would prefer indeed to optimize it later. But it a good
suggestion.

> 
>> diff --git a/stdlib/stdlib.h b/stdlib/stdlib.h
>> index bf7cd438e1..f02a713a7b 100644
>> --- a/stdlib/stdlib.h
>> +++ b/stdlib/stdlib.h
>> @@ -485,6 +485,7 @@ extern unsigned short int *seed48 (unsigned short int __seed16v[3])
>>   extern void lcong48 (unsigned short int __param[7]) __THROW __nonnull ((1));
>>   
>>   # ifdef __USE_MISC
>> +#  include <bits/stdint-uintn.h>
>>   /* Data structure for communication with thread safe versions.  This
>>      type is to be regarded as opaque.  It's only exported because users
>>      have to allocate objects of this type.  */
> 
>> +/* Return a random number between zero (inclusive) and the specified
>> +   limit (exclusive).  */
>> +extern uint32_t arc4random_uniform (uint32_t __upper_bound)
>> +     __THROW __wur;
>>   # endif	/* Use misc.  */
>>   #endif	/* Use misc or X/Open.  */
> 
> I'm a bit worried about that <bits/stdint-uintn.h> inclusion.  It will
> be a bit confusing if these types become available via <stdlib.h> in
> some glibc versions.  Maybe use __uint32_t instead?

Hum ok, __uint32_t might work indeed.

> 
>> diff --git a/sysdeps/generic/tls-internal-struct.h b/sysdeps/generic/tls-internal-struct.h
>> index d76c715a96..81a71ac54b 100644
>> --- a/sysdeps/generic/tls-internal-struct.h
>> +++ b/sysdeps/generic/tls-internal-struct.h
>> @@ -19,10 +19,13 @@
>>   #ifndef _TLS_INTERNAL_STRUCT_H
>>   #define _TLS_INTERNAL_STRUCT_H 1
>>   
>> +#include <stdlib/arc4random.h>
>> +
> 
> You can use a forward declaration instead.  Minimizing <descr.h>
> dependencies seems desirable to me.
> 
>>   struct tls_internal_t
>>   {
>>     char *strsignal_buf;
>>     char *strerror_l_buf;
>> +  struct arc4random_state_t *rand_state;
>>   };
> 

Ack.

> 
>> diff --git a/sysdeps/generic/tls-internal.c b/sysdeps/generic/tls-internal.c
>> index 898c20b61c..ec0ceeebd1 100644
>> --- a/sysdeps/generic/tls-internal.c
>> +++ b/sysdeps/generic/tls-internal.c
>> @@ -16,6 +16,23 @@
>>      License along with the GNU C Library; if not, see
>>      <https://www.gnu.org/licenses/>.  */
>>   
>> +#include <string.h>
>>   #include <tls-internal.h>
>>   
>>   __thread struct tls_internal_t __tls_internal;
>> +
>> +void
>> +__glibc_tls_internal_free (void)
>> +{
>> +  free (__tls_internal.strsignal_buf);
>> +  free (__tls_internal.strerror_l_buf);
>> +
>> +  if (__tls_internal.rand_state != NULL)
>> +    {
>> +      /* Clear any lingering random state prior so if the thread stack is
>> +	 cached it won't leak any data.  */
>> +      explicit_bzero (__tls_internal.rand_state,
>> +		      sizeof (struct arc4random_state_t));
>> +      free (__tls_internal.rand_state);
>> +    }
> 
> sizeof (*__tls_internal.rand_state)?

Ack.

> 
>> diff --git a/sysdeps/mach/hurd/_Fork.c b/sysdeps/mach/hurd/_Fork.c
>> index e60b86fab1..1c44b39c5b 100644
>> --- a/sysdeps/mach/hurd/_Fork.c
>> +++ b/sysdeps/mach/hurd/_Fork.c
>> @@ -665,6 +665,8 @@ retry:
>>         /* Run things that want to run in the child task to set up.  */
>>         RUN_HOOK (_hurd_fork_child_hook, ());
>>   
>> +      call_function_static_weak (__arc4random_fork_subprocess);
>> +
> 
> Hmm, is the ordering correct?  Can _hurd_fork_child_hook run user code?
> (This probably needs review from Hurd developers.)

I don think so, in any case I moved it before _hurd_fork_child_hook.
Hurd developers can certify later if this in the correct place.

> 
>> diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
>> index 6e25b021ab..045176197e 100644
>> --- a/sysdeps/unix/sysv/linux/tls-internal.c
>> +++ b/sysdeps/unix/sysv/linux/tls-internal.c
>> @@ -1 +1,37 @@
> 
>> +void
>> +__glibc_tls_internal_free (void)
>> +{
>> +  struct pthread *self = THREAD_SELF;
>> +  free (self->tls_state.strsignal_buf);
>> +  free (self->tls_state.strerror_l_buf);
>> +
>> +  if (self->tls_state.rand_state != NULL)
>> +    {
>> +      /* Clear any lingering random state prior so if the thread stack is
>> +         cached it won't leak any data.  */
>> +      explicit_bzero (self->tls_state.rand_state,
>> +		      sizeof (struct arc4random_state_t));
> 
> sizeof (self->tls_state.rand_state)?

Ack.

> 
>> diff --git a/sysdeps/unix/sysv/linux/tls-internal.h b/sysdeps/unix/sysv/linux/tls-internal.h
>> index f7a1a62135..f268a2d43b 100644
>> --- a/sysdeps/unix/sysv/linux/tls-internal.h
>> +++ b/sysdeps/unix/sysv/linux/tls-internal.h
>> @@ -22,17 +22,21 @@
>>   #include <stdlib.h>
>>   #include <pthreadP.h>
>>   
>> +static inline void
>> +__glibc_tls_internal_init (struct tls_internal_t *tls_state)
>> +{
>> +  tls_state->strsignal_buf = NULL;
>> +  tls_state->strerror_l_buf = NULL;
>> +  tls_state->rand_state = NULL;
>> +}
> 
> See the comment about using memset earlier.

Ack.

> 
> Thanks,
> Florian
>
Florian Weimer July 12, 2022, 5:15 p.m. UTC | #3
* Adhemerval Zanella Netto:

>>> +/* arc4random keeps two counters: 'have' is the current valid bytes not yet
>>> +   consumed in 'buf' while 'count' is the maximum number of bytes until a
>>> +   reseed.
>>> +
>>> +   Both the initial seed and reseed try to obtain entropy from the kernel
>>> +   and abort the process if none could be obtained.
>>> +
>>> +   The state 'buf' improves the usage of the cipher calls, allowing to call
>>> +   optimized implementations (if the architecture provides it) and optimize
>>> +   arc4random calls (since only multiple calls it will encrypt the next
>>> +   block).  */

>> I don't understand the “since only multiple calls it will encrypt the
>> next block” part.
>
> I changed to 'and minimize function call overhead'.  Using the generic
> implementation, a 8 times the chacha20 blocks buffer shows about 2x more
> throughput um aarch64.
>
> A buffer with 4x the chacha20 block size shows slight less performance,
> so one option might to make the buffer sizes arch-specific (since AVX2,
> and potentially AVX512 requires large block size for the arch-specific
> implementations).

Ah, that makes sense.  I think the quoted part is just a bit garbled and
needs some polishing.

>>> +/* Reinit the thread context by reseeding the cipher state with kernel
>>> +   entropy.  */
>>> +static void
>>> +arc4random_check_stir (struct arc4random_state_t *state, size_t len)
>> Could you add a comment describing the len parameter?
>
> I changed to:
>
> /* Check if the thread context STATE should be reseed with kernel entropy
>    depending of requested LEN bytes.  If there is less than requested,
>    the state is either initialized or reseed, otherwise the internal
>    counter subtract the requested lenght.  */

“reseeded”

>> Why not simply call __arc4random_buf?  If you want to retain the
>> optimization, turn the implementation of __arc4random_buf into an inline
>> function and call it here and from __arc4random_buf.
>
> I actually tried in some interation and I recall that it yield some
> worse throughput.  I just tested again and it holds true, on
> an aarch64 system current approach with generic implementation yields
> 290 MB/s while calling arc4random_buf shows 172 MB/s.
>
> I am trying to decompose the function to eliminate the need of the
> loop (which I think compiler can't optimize away for arc4random)
> but I don't think it would be simpler than open code the logic
> on both functions.

Hmm, this isn't great, but I see why you are doing it.

Thanks,
Florian
Adhemerval Zanella Netto July 12, 2022, 5:21 p.m. UTC | #4
On 12/07/22 14:15, Florian Weimer wrote:
> * Adhemerval Zanella Netto:
> 
>>>> +/* arc4random keeps two counters: 'have' is the current valid bytes not yet
>>>> +   consumed in 'buf' while 'count' is the maximum number of bytes until a
>>>> +   reseed.
>>>> +
>>>> +   Both the initial seed and reseed try to obtain entropy from the kernel
>>>> +   and abort the process if none could be obtained.
>>>> +
>>>> +   The state 'buf' improves the usage of the cipher calls, allowing to call
>>>> +   optimized implementations (if the architecture provides it) and optimize
>>>> +   arc4random calls (since only multiple calls it will encrypt the next
>>>> +   block).  */
> 
>>> I don't understand the “since only multiple calls it will encrypt the
>>> next block” part.
>>
>> I changed to 'and minimize function call overhead'.  Using the generic
>> implementation, a 8 times the chacha20 blocks buffer shows about 2x more
>> throughput um aarch64.
>>
>> A buffer with 4x the chacha20 block size shows slight less performance,
>> so one option might to make the buffer sizes arch-specific (since AVX2,
>> and potentially AVX512 requires large block size for the arch-specific
>> implementations).
> 
> Ah, that makes sense.  I think the quoted part is just a bit garbled and
> needs some polishing.
> 
>>>> +/* Reinit the thread context by reseeding the cipher state with kernel
>>>> +   entropy.  */
>>>> +static void
>>>> +arc4random_check_stir (struct arc4random_state_t *state, size_t len)
>>> Could you add a comment describing the len parameter?
>>
>> I changed to:
>>
>> /* Check if the thread context STATE should be reseed with kernel entropy
>>     depending of requested LEN bytes.  If there is less than requested,
>>     the state is either initialized or reseed, otherwise the internal
>>     counter subtract the requested lenght.  */
> 
> “reseeded”

Ack.

> 
>>> Why not simply call __arc4random_buf?  If you want to retain the
>>> optimization, turn the implementation of __arc4random_buf into an inline
>>> function and call it here and from __arc4random_buf.
>>
>> I actually tried in some interation and I recall that it yield some
>> worse throughput.  I just tested again and it holds true, on
>> an aarch64 system current approach with generic implementation yields
>> 290 MB/s while calling arc4random_buf shows 172 MB/s.
>>
>> I am trying to decompose the function to eliminate the need of the
>> loop (which I think compiler can't optimize away for arc4random)
>> but I don't think it would be simpler than open code the logic
>> on both functions.
> 
> Hmm, this isn't great, but I see why you are doing it.

Agreed, but performance difference is significant so I think it
worth the code duplication in this case.
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index b0a3d7e512..f9dc316ead 100644
--- a/NEWS
+++ b/NEWS
@@ -46,6 +46,10 @@  Major new features:
   to more flexibly configure and operate on filesystem mounts.  The new
   mount APIs are specifically designed to work with namespaces.
 
+* The functions arc4random, arc4random_buf, and arc4random_uniform have been
+  added.  The functions use a cryptographic pseudo-random number generator
+  based on ChaCha20 initilized with entropy from the kernel.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * Support for prelink will be removed in the next release; this includes
diff --git a/include/stdlib.h b/include/stdlib.h
index 1c6f70b082..c5f5628f22 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -144,6 +144,19 @@  libc_hidden_proto (__ptsname_r)
 libc_hidden_proto (grantpt)
 libc_hidden_proto (unlockpt)
 
+__typeof (arc4random) __arc4random;
+libc_hidden_proto (__arc4random);
+__typeof (arc4random_buf) __arc4random_buf;
+libc_hidden_proto (__arc4random_buf);
+__typeof (arc4random_uniform) __arc4random_uniform;
+libc_hidden_proto (__arc4random_uniform);
+extern void __arc4random_buf_internal (void *buffer, size_t len)
+     attribute_hidden;
+/* Called from the fork function to reinitialize the internal lock in the
+   child process.  This avoids deadlocks if fork is called in multi-threaded
+   processes.  */
+extern void __arc4random_fork_subprocess (void) attribute_hidden;
+
 extern double __strtod_internal (const char *__restrict __nptr,
 				 char **__restrict __endptr, int __group)
      __THROW __nonnull ((1)) __wur;
diff --git a/malloc/thread-freeres.c b/malloc/thread-freeres.c
index 3894652169..b22e1d789f 100644
--- a/malloc/thread-freeres.c
+++ b/malloc/thread-freeres.c
@@ -36,7 +36,7 @@  __libc_thread_freeres (void)
   __rpc_thread_destroy ();
 #endif
   call_function_static_weak (__res_thread_freeres);
-  __glibc_tls_internal_free ();
+  call_function_static_weak (__glibc_tls_internal_free);
   call_function_static_weak (__libc_dlerror_result_free);
 
   /* This should come last because it shuts down malloc for this
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 01a282f3f6..ada65d40c2 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -32,6 +32,7 @@ 
 #include <kernel-features.h>
 #include <nptl-stack.h>
 #include <libc-lock.h>
+#include <tls-internal.h>
 
 /* Default alignment of stack.  */
 #ifndef STACK_ALIGN
@@ -127,7 +128,7 @@  get_cached_stack (size_t *sizep, void **memp)
 
   result->exiting = false;
   __libc_lock_init (result->exit_lock);
-  result->tls_state = (struct tls_internal_t) { 0 };
+  __glibc_tls_internal_init (&result->tls_state);
 
   /* Clear the DTV.  */
   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
@@ -559,6 +560,8 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 #endif
   pd->robust_head.list = &pd->robust_head;
 
+  __glibc_tls_internal_init (&pd->tls_state);
+
   /* We place the thread descriptor at the end of the stack.  */
   *pdp = pd;
 
diff --git a/stdlib/Makefile b/stdlib/Makefile
index d4a4d5679a..62f8253225 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -53,6 +53,8 @@  routines := \
   a64l \
   abort \
   abs \
+  arc4random \
+  arc4random_uniform \
   at_quick_exit \
   atof \
   atoi \
diff --git a/stdlib/Versions b/stdlib/Versions
index 5e9099a153..d09a308fb5 100644
--- a/stdlib/Versions
+++ b/stdlib/Versions
@@ -136,6 +136,11 @@  libc {
     strtof32; strtof64; strtof32x;
     strtof32_l; strtof64_l; strtof32x_l;
   }
+  GLIBC_2.36 {
+    arc4random;
+    arc4random_buf;
+    arc4random_uniform;
+  }
   GLIBC_PRIVATE {
     # functions which have an additional interface since they are
     # are cancelable.
diff --git a/stdlib/arc4random.c b/stdlib/arc4random.c
new file mode 100644
index 0000000000..92d1da92cc
--- /dev/null
+++ b/stdlib/arc4random.c
@@ -0,0 +1,207 @@ 
+/* Pseudo Random Number Generator based on ChaCha20.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <arc4random.h>
+#include <errno.h>
+#include <not-cancel.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/random.h>
+#include <tls-internal.h>
+
+/* arc4random keeps two counters: 'have' is the current valid bytes not yet
+   consumed in 'buf' while 'count' is the maximum number of bytes until a
+   reseed.
+
+   Both the initial seed and reseed try to obtain entropy from the kernel
+   and abort the process if none could be obtained.
+
+   The state 'buf' improves the usage of the cipher calls, allowing to call
+   optimized implementations (if the architecture provides it) and optimize
+   arc4random calls (since only multiple calls it will encrypt the next
+   block).  */
+
+#include <chacha20.c>
+
+/* Called from the fork function to reset the state.  */
+void
+__arc4random_fork_subprocess (void)
+{
+  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;
+  if (state != NULL)
+    {
+      explicit_bzero (state, sizeof (struct arc4random_state_t));
+      /* Force key init.  */
+      state->count = -1;
+    }
+}
+
+/* Return the current thread random state or try to create one if there is
+   none available.  In the case malloc can not allocate a state, arc4random
+   will try to get entropy with arc4random_getentropy.  */
+static struct arc4random_state_t *
+arc4random_get_state (void)
+{
+  struct arc4random_state_t *state = __glibc_tls_internal()->rand_state;
+  if (state == NULL)
+    {
+      state = malloc (sizeof (struct arc4random_state_t));
+      if (state != NULL)
+	{
+	  /* Force key initialization on first call.  */
+	  state->count = -1;
+	  __glibc_tls_internal()->rand_state = state;
+	}
+    }
+  return state;
+}
+
+static void
+arc4random_getrandom_failure (void)
+{
+  __libc_fatal ("Fatal glibc error: cannot get entropy for arc4random\n");
+}
+
+static void
+arc4random_rekey (struct arc4random_state_t *state, uint8_t *rnd, size_t rndlen)
+{
+  chacha20_crypt (state->ctx, state->buf, state->buf, sizeof state->buf);
+
+  /* Mix some extra entropy if provided.  */
+  if (rnd != NULL)
+    {
+      size_t m = MIN (rndlen, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
+      for (size_t i = 0; i < m; i++)
+	state->buf[i] ^= rnd[i];
+    }
+
+  /* Immediately reinit for backtracking resistance.  */
+  chacha20_init (state->ctx, state->buf, state->buf + CHACHA20_KEY_SIZE);
+  memset (state->buf, 0, CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
+  state->have = sizeof (state->buf) - (CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE);
+}
+
+static void
+arc4random_getentropy (void *rnd, size_t len)
+{
+  if (__getrandom_nocancel (rnd, len, GRND_NONBLOCK) == len)
+    return;
+
+  int fd = __open64_nocancel ("/dev/urandom", O_RDONLY | O_CLOEXEC);
+  if (fd != -1)
+    {
+      uint8_t *p = rnd;
+      uint8_t *end = p + len;
+      do
+	{
+	  ssize_t ret = TEMP_FAILURE_RETRY (__read_nocancel (fd, p, end - p));
+	  if (ret <= 0)
+	    arc4random_getrandom_failure ();
+	  p += ret;
+	}
+      while (p < end);
+
+      if (__close_nocancel (fd) == 0)
+	return;
+    }
+  arc4random_getrandom_failure ();
+}
+
+/* Reinit the thread context by reseeding the cipher state with kernel
+   entropy.  */
+static void
+arc4random_check_stir (struct arc4random_state_t *state, size_t len)
+{
+  if (state->count < len || state->count == -1)
+    {
+      uint8_t rnd[CHACHA20_KEY_SIZE + CHACHA20_IV_SIZE];
+      arc4random_getentropy (rnd, sizeof rnd);
+
+      if (state->count == -1)
+	chacha20_init (state->ctx, rnd, rnd + CHACHA20_KEY_SIZE);
+      else
+	arc4random_rekey (state, rnd, sizeof rnd);
+
+      explicit_bzero (rnd, sizeof rnd);
+
+      state->have = 0;
+      memset (state->buf, 0, sizeof state->buf);
+      state->count = CHACHA20_RESEED_SIZE;
+    }
+  if (state->count <= len)
+    state->count = 0;
+  else
+    state->count -= len;
+}
+
+void
+__arc4random_buf (void *buffer, size_t len)
+{
+  struct arc4random_state_t *state = arc4random_get_state ();
+  if (__glibc_unlikely (state == NULL))
+    {
+      arc4random_getentropy (buffer, len);
+      return;
+    }
+
+  arc4random_check_stir (state, len);
+  while (len > 0)
+    {
+      if (state->have > 0)
+	{
+	  size_t m = MIN (len, state->have);
+	  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
+	  memcpy (buffer, ks, m);
+	  memset (ks, 0, m);
+	  buffer += m;
+	  len -= m;
+	  state->have -= m;
+	}
+      if (state->have == 0)
+	arc4random_rekey (state, NULL, 0);
+    }
+}
+libc_hidden_def (__arc4random_buf)
+weak_alias (__arc4random_buf, arc4random_buf)
+
+uint32_t
+__arc4random (void)
+{
+  uint32_t r;
+
+  struct arc4random_state_t *state = arc4random_get_state ();
+  if (__glibc_unlikely (state == NULL))
+    {
+      arc4random_getentropy (&r, sizeof (uint32_t));
+      return r;
+    }
+
+  arc4random_check_stir (state, sizeof (uint32_t));
+  if (state->have < sizeof (uint32_t))
+    arc4random_rekey (state, NULL, 0);
+  uint8_t *ks = state->buf + sizeof (state->buf) - state->have;
+  memcpy (&r, ks, sizeof (uint32_t));
+  memset (ks, 0, sizeof (uint32_t));
+  state->have -= sizeof (uint32_t);
+
+  return r;
+}
+libc_hidden_def (__arc4random)
+weak_alias (__arc4random, arc4random)
diff --git a/stdlib/arc4random.h b/stdlib/arc4random.h
new file mode 100644
index 0000000000..cdca639d9d
--- /dev/null
+++ b/stdlib/arc4random.h
@@ -0,0 +1,45 @@ 
+/* Arc4random definition used on TLS.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _CHACHA20_H
+#define _CHACHA20_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* Internal ChaCha20 state.  */
+#define CHACHA20_STATE_LEN	16
+#define CHACHA20_BLOCK_SIZE	64
+
+/* Maximum number bytes until reseed (16 MB).  */
+#define CHACHA20_RESEED_SIZE	(16 * 1024 * 1024)
+
+/* Internal arc4random buffer, used on each feedback step so offer some
+   backtracking protection and to allow better used of vectorized
+   chacha20 implementations.  */
+#define CHACHA20_BUFSIZE        (8 * CHACHA20_BLOCK_SIZE)
+
+struct arc4random_state_t
+{
+  uint32_t ctx[CHACHA20_STATE_LEN];
+  size_t have;
+  size_t count;
+  uint8_t buf[CHACHA20_BUFSIZE];
+};
+
+#endif
diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
new file mode 100644
index 0000000000..83772de5cd
--- /dev/null
+++ b/stdlib/arc4random_uniform.c
@@ -0,0 +1,140 @@ 
+/* Random pseudo generator number which returns a single 32 bit value
+   uniformly distributed but with an upper_bound.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <endian.h>
+#include <libc-lock.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+/* Return the number of bytes which cover values up to the limit.  */
+__attribute__ ((const))
+static uint32_t
+byte_count (uint32_t n)
+{
+  if (n < (1U << 8))
+    return 1;
+  else if (n < (1U << 16))
+    return 2;
+  else if (n < (1U << 24))
+    return 3;
+  else
+    return 4;
+}
+
+/* Fill the lower bits of the result with randomness, according to the
+   number of bytes requested.  */
+static void
+random_bytes (uint32_t *result, uint32_t byte_count)
+{
+  *result = 0;
+  unsigned char *ptr = (unsigned char *) result;
+  if (__BYTE_ORDER == __BIG_ENDIAN)
+    ptr += 4 - byte_count;
+  __arc4random_buf (ptr, byte_count);
+}
+
+uint32_t
+__arc4random_uniform (uint32_t n)
+{
+  if (n <= 1)
+    /* There is no valid return value for a zero limit, and 0 is the
+       only possible result for limit 1.  */
+    return 0;
+
+  /* The bits variable serves as a source for bits.  Prefetch the
+     minimum number of bytes needed.  */
+  uint32_t count = byte_count (n);
+  uint32_t bits_length = count * CHAR_BIT;
+  uint32_t bits;
+  random_bytes (&bits, count);
+
+  /* Powers of two are easy.  */
+  if (powerof2 (n))
+    return bits & (n - 1);
+
+  /* The general case.  This algorithm follows Jérémie Lumbroso,
+     Optimal Discrete Uniform Generation from Coin Flips, and
+     Applications (2013), who credits Donald E. Knuth and Andrew
+     C. Yao, The complexity of nonuniform random number generation
+     (1976), for solving the general case.
+
+     The implementation below unrolls the initialization stage of the
+     loop, where v is less than n.  */
+
+  /* Use 64-bit variables even though the intermediate results are
+     never larger than 33 bits.  This ensures the code is easier to
+     compile on 64-bit architectures.  */
+  uint64_t v;
+  uint64_t c;
+
+  /* Initialize v and c.  v is the smallest power of 2 which is larger
+     than n.*/
+  {
+    uint32_t log2p1 = 32 - __builtin_clz (n);
+    v = 1ULL << log2p1;
+    c = bits & (v - 1);
+    bits >>= log2p1;
+    bits_length -= log2p1;
+  }
+
+  /* At the start of the loop, c is uniformly distributed within the
+     half-open interval [0, v), and v < 2n < 2**33.  */
+  while (true)
+    {
+      if (v >= n)
+        {
+          /* If the candidate is less than n, accept it.  */
+          if (c < n)
+            /* c is uniformly distributed on [0, n).  */
+            return c;
+          else
+            {
+              /* c is uniformly distributed on [n, v).  */
+              v -= n;
+              c -= n;
+              /* The distribution was shifted, so c is uniformly
+                 distributed on [0, v) again.  */
+            }
+        }
+      /* v < n here.  */
+
+      /* Replenish the bit source if necessary.  */
+      if (bits_length == 0)
+        {
+          /* Overwrite the least significant byte.  */
+	  random_bytes (&bits, 1);
+	  bits_length = CHAR_BIT;
+        }
+
+      /* Double the range.  No overflow because v < n < 2**32.  */
+      v *= 2;
+      /* v < 2n here.  */
+
+      /* Extract a bit and append it to c.  c remains less than v and
+         thus 2**33.  */
+      c = (c << 1) | (bits & 1);
+      bits >>= 1;
+      --bits_length;
+
+      /* At this point, c is uniformly distributed on [0, v) again,
+         and v < 2n < 2**33.  */
+    }
+}
+libc_hidden_def (__arc4random_uniform)
+weak_alias (__arc4random_uniform, arc4random_uniform)
diff --git a/stdlib/chacha20.c b/stdlib/chacha20.c
new file mode 100644
index 0000000000..4549fc780f
--- /dev/null
+++ b/stdlib/chacha20.c
@@ -0,0 +1,187 @@ 
+/* Generic ChaCha20 implementation (used on arc4random).
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <endian.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/* 32-bit stream position, then 96-bit nonce.  */
+#define CHACHA20_IV_SIZE	16
+#define CHACHA20_KEY_SIZE	32
+
+#define CHACHA20_STATE_LEN	16
+
+/* The ChaCha20 implementation is based on RFC8439 [1], omitting the final
+   XOR of the keystream with the plaintext because the plaintext is a
+   stream of zeros.  */
+
+enum chacha20_constants
+{
+  CHACHA20_CONSTANT_EXPA = 0x61707865U,
+  CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
+  CHACHA20_CONSTANT_2_BY = 0x79622d32U,
+  CHACHA20_CONSTANT_TE_K = 0x6b206574U
+};
+
+static inline uint32_t
+read_unaligned_32 (const uint8_t *p)
+{
+  uint32_t r;
+  memcpy (&r, p, sizeof (r));
+  return r;
+}
+
+static inline void
+write_unaligned_32 (uint8_t *p, uint32_t v)
+{
+  memcpy (p, &v, sizeof (v));
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define read_unaligned_le32(p) __builtin_bswap32 (read_unaligned_32 (p))
+# define set_state(v)		__builtin_bswap32 ((v))
+#else
+# define read_unaligned_le32(p) read_unaligned_32 ((p))
+# define set_state(v)		(v)
+#endif
+
+static inline void
+chacha20_init (uint32_t *state, const uint8_t *key, const uint8_t *iv)
+{
+  state[0]  = CHACHA20_CONSTANT_EXPA;
+  state[1]  = CHACHA20_CONSTANT_ND_3;
+  state[2]  = CHACHA20_CONSTANT_2_BY;
+  state[3]  = CHACHA20_CONSTANT_TE_K;
+
+  state[4]  = read_unaligned_le32 (key + 0 * sizeof (uint32_t));
+  state[5]  = read_unaligned_le32 (key + 1 * sizeof (uint32_t));
+  state[6]  = read_unaligned_le32 (key + 2 * sizeof (uint32_t));
+  state[7]  = read_unaligned_le32 (key + 3 * sizeof (uint32_t));
+  state[8]  = read_unaligned_le32 (key + 4 * sizeof (uint32_t));
+  state[9]  = read_unaligned_le32 (key + 5 * sizeof (uint32_t));
+  state[10] = read_unaligned_le32 (key + 6 * sizeof (uint32_t));
+  state[11] = read_unaligned_le32 (key + 7 * sizeof (uint32_t));
+
+  state[12] = read_unaligned_le32 (iv + 0 * sizeof (uint32_t));
+  state[13] = read_unaligned_le32 (iv + 1 * sizeof (uint32_t));
+  state[14] = read_unaligned_le32 (iv + 2 * sizeof (uint32_t));
+  state[15] = read_unaligned_le32 (iv + 3 * sizeof (uint32_t));
+}
+
+static inline uint32_t
+rotl32 (unsigned int shift, uint32_t word)
+{
+  return (word << (shift & 31)) | (word >> ((-shift) & 31));
+}
+
+static void
+state_final (const uint8_t *src, uint8_t *dst, uint32_t v)
+{
+#ifdef CHACHA20_XOR_FINAL
+  v ^= read_unaligned_32 (src);
+#endif
+  write_unaligned_32 (dst, v);
+}
+
+static inline void
+chacha20_block (uint32_t *state, uint8_t *dst, const uint8_t *src)
+{
+  uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
+  uint32_t x8, x9, x10, x11, x12, x13, x14, x15;
+
+  x0 = state[0];
+  x1 = state[1];
+  x2 = state[2];
+  x3 = state[3];
+  x4 = state[4];
+  x5 = state[5];
+  x6 = state[6];
+  x7 = state[7];
+  x8 = state[8];
+  x9 = state[9];
+  x10 = state[10];
+  x11 = state[11];
+  x12 = state[12];
+  x13 = state[13];
+  x14 = state[14];
+  x15 = state[15];
+
+  for (int i = 0; i < 20; i += 2)
+    {
+#define QROUND(_x0, _x1, _x2, _x3) 			\
+  do {							\
+   _x0 = _x0 + _x1; _x3 = rotl32 (16, (_x0 ^ _x3)); 	\
+   _x2 = _x2 + _x3; _x1 = rotl32 (12, (_x1 ^ _x2)); 	\
+   _x0 = _x0 + _x1; _x3 = rotl32 (8,  (_x0 ^ _x3));	\
+   _x2 = _x2 + _x3; _x1 = rotl32 (7,  (_x1 ^ _x2));	\
+  } while(0)
+
+      QROUND (x0, x4, x8,  x12);
+      QROUND (x1, x5, x9,  x13);
+      QROUND (x2, x6, x10, x14);
+      QROUND (x3, x7, x11, x15);
+
+      QROUND (x0, x5, x10, x15);
+      QROUND (x1, x6, x11, x12);
+      QROUND (x2, x7, x8,  x13);
+      QROUND (x3, x4, x9,  x14);
+    }
+
+  state_final (&src[0], &dst[0], set_state (x0 + state[0]));
+  state_final (&src[4], &dst[4], set_state (x1 + state[1]));
+  state_final (&src[8], &dst[8], set_state (x2 + state[2]));
+  state_final (&src[12], &dst[12], set_state (x3 + state[3]));
+  state_final (&src[16], &dst[16], set_state (x4 + state[4]));
+  state_final (&src[20], &dst[20], set_state (x5 + state[5]));
+  state_final (&src[24], &dst[24], set_state (x6 + state[6]));
+  state_final (&src[28], &dst[28], set_state (x7 + state[7]));
+  state_final (&src[32], &dst[32], set_state (x8 + state[8]));
+  state_final (&src[36], &dst[36], set_state (x9 + state[9]));
+  state_final (&src[40], &dst[40], set_state (x10 + state[10]));
+  state_final (&src[44], &dst[44], set_state (x11 + state[11]));
+  state_final (&src[48], &dst[48], set_state (x12 + state[12]));
+  state_final (&src[52], &dst[52], set_state (x13 + state[13]));
+  state_final (&src[56], &dst[56], set_state (x14 + state[14]));
+  state_final (&src[60], &dst[60], set_state (x15 + state[15]));
+
+  state[12]++;
+}
+
+static void
+chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src,
+		size_t bytes)
+{
+  while (bytes >= CHACHA20_BLOCK_SIZE)
+    {
+      chacha20_block (state, dst, src);
+
+      bytes -= CHACHA20_BLOCK_SIZE;
+      dst += CHACHA20_BLOCK_SIZE;
+      src += CHACHA20_BLOCK_SIZE;
+    }
+
+  if (__glibc_unlikely (bytes != 0))
+    {
+      uint8_t stream[CHACHA20_BLOCK_SIZE];
+      chacha20_block (state, stream, src);
+      memcpy (dst, stream, bytes);
+      explicit_bzero (stream, sizeof stream);
+    }
+}
diff --git a/stdlib/stdlib.h b/stdlib/stdlib.h
index bf7cd438e1..f02a713a7b 100644
--- a/stdlib/stdlib.h
+++ b/stdlib/stdlib.h
@@ -485,6 +485,7 @@  extern unsigned short int *seed48 (unsigned short int __seed16v[3])
 extern void lcong48 (unsigned short int __param[7]) __THROW __nonnull ((1));
 
 # ifdef __USE_MISC
+#  include <bits/stdint-uintn.h>
 /* Data structure for communication with thread safe versions.  This
    type is to be regarded as opaque.  It's only exported because users
    have to allocate objects of this type.  */
@@ -533,6 +534,19 @@  extern int seed48_r (unsigned short int __seed16v[3],
 extern int lcong48_r (unsigned short int __param[7],
 		      struct drand48_data *__buffer)
      __THROW __nonnull ((1, 2));
+
+/* Return a random integer between zero and 2**32-1 (inclusive).  */
+extern uint32_t arc4random (void)
+     __THROW __wur;
+
+/* Fill the buffer with random data.  */
+extern void arc4random_buf (void *__buf, size_t __size)
+     __THROW __nonnull ((1));
+
+/* Return a random number between zero (inclusive) and the specified
+   limit (exclusive).  */
+extern uint32_t arc4random_uniform (uint32_t __upper_bound)
+     __THROW __wur;
 # endif	/* Use misc.  */
 #endif	/* Use misc or X/Open.  */
 
diff --git a/sysdeps/generic/not-cancel.h b/sysdeps/generic/not-cancel.h
index 2104efeb54..acceb9b67f 100644
--- a/sysdeps/generic/not-cancel.h
+++ b/sysdeps/generic/not-cancel.h
@@ -48,5 +48,7 @@ 
   (void) __writev (fd, iov, n)
 #define __fcntl64_nocancel(fd, cmd, ...) \
   __fcntl64 (fd, cmd, __VA_ARGS__)
+#define __getrandom_nocancel(buf, size, flags) \
+  __getrandom (buf, size, flags)
 
 #endif /* NOT_CANCEL_H  */
diff --git a/sysdeps/generic/tls-internal-struct.h b/sysdeps/generic/tls-internal-struct.h
index d76c715a96..81a71ac54b 100644
--- a/sysdeps/generic/tls-internal-struct.h
+++ b/sysdeps/generic/tls-internal-struct.h
@@ -19,10 +19,13 @@ 
 #ifndef _TLS_INTERNAL_STRUCT_H
 #define _TLS_INTERNAL_STRUCT_H 1
 
+#include <stdlib/arc4random.h>
+
 struct tls_internal_t
 {
   char *strsignal_buf;
   char *strerror_l_buf;
+  struct arc4random_state_t *rand_state;
 };
 
 #endif
diff --git a/sysdeps/generic/tls-internal.c b/sysdeps/generic/tls-internal.c
index 898c20b61c..ec0ceeebd1 100644
--- a/sysdeps/generic/tls-internal.c
+++ b/sysdeps/generic/tls-internal.c
@@ -16,6 +16,23 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <string.h>
 #include <tls-internal.h>
 
 __thread struct tls_internal_t __tls_internal;
+
+void
+__glibc_tls_internal_free (void)
+{
+  free (__tls_internal.strsignal_buf);
+  free (__tls_internal.strerror_l_buf);
+
+  if (__tls_internal.rand_state != NULL)
+    {
+      /* Clear any lingering random state prior so if the thread stack is
+	 cached it won't leak any data.  */
+      explicit_bzero (__tls_internal.rand_state,
+		      sizeof (struct arc4random_state_t));
+      free (__tls_internal.rand_state);
+    }
+}
diff --git a/sysdeps/generic/tls-internal.h b/sysdeps/generic/tls-internal.h
index acb8ac9abe..3f53e4a1fa 100644
--- a/sysdeps/generic/tls-internal.h
+++ b/sysdeps/generic/tls-internal.h
@@ -30,11 +30,6 @@  __glibc_tls_internal (void)
   return &__tls_internal;
 }
 
-static inline void
-__glibc_tls_internal_free (void)
-{
-  free (__tls_internal.strsignal_buf);
-  free (__tls_internal.strerror_l_buf);
-}
+extern void __glibc_tls_internal_free (void) attribute_hidden;
 
 #endif
diff --git a/sysdeps/mach/hurd/_Fork.c b/sysdeps/mach/hurd/_Fork.c
index e60b86fab1..1c44b39c5b 100644
--- a/sysdeps/mach/hurd/_Fork.c
+++ b/sysdeps/mach/hurd/_Fork.c
@@ -665,6 +665,8 @@  retry:
       /* Run things that want to run in the child task to set up.  */
       RUN_HOOK (_hurd_fork_child_hook, ());
 
+      call_function_static_weak (__arc4random_fork_subprocess);
+
       /* Set up proc server-assisted fault recovery for the signal thread.  */
       _hurdsig_fault_init ();
 
diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist
index 4dc87e9061..7bd565103b 100644
--- a/sysdeps/mach/hurd/i386/libc.abilist
+++ b/sysdeps/mach/hurd/i386/libc.abilist
@@ -2289,6 +2289,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 close_range F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.4 __confstr_chk F
 GLIBC_2.4 __fgets_chk F
 GLIBC_2.4 __fgets_unlocked_chk F
diff --git a/sysdeps/mach/hurd/not-cancel.h b/sysdeps/mach/hurd/not-cancel.h
index 6ec92ced84..9a3a7ed59a 100644
--- a/sysdeps/mach/hurd/not-cancel.h
+++ b/sysdeps/mach/hurd/not-cancel.h
@@ -74,6 +74,9 @@  __typeof (__fcntl) __fcntl_nocancel;
 #define __fcntl64_nocancel(...) \
   __fcntl_nocancel (__VA_ARGS__)
 
+#define __getrandom_nocancel(buf, size, flags) \
+  __getrandom (buf, size, flags)
+
 #if IS_IN (libc)
 hidden_proto (__close_nocancel)
 hidden_proto (__close_nocancel_nostatus)
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index dd568992e2..7dc02569f6 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -43,6 +43,8 @@  _Fork (void)
       self->robust_head.list = &self->robust_head;
       INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
 			     sizeof (struct robust_list_head));
+
+      call_function_static_weak (__arc4random_fork_subprocess);
     }
   return pid;
 }
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
index 8dba065b81..8ff7e802a0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
@@ -2616,6 +2616,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist
index 08f4750022..3d7118210b 100644
--- a/sysdeps/unix/sysv/linux/alpha/libc.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist
@@ -2713,6 +2713,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist
index 75db763023..9a046a4f60 100644
--- a/sysdeps/unix/sysv/linux/arc/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libc.abilist
@@ -2377,6 +2377,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
index fa33f317ac..a9c3f0c77b 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
@@ -496,6 +496,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
index dba2e4ce42..38ba778c47 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
@@ -493,6 +493,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist
index e6ff921c29..0695fffebc 100644
--- a/sysdeps/unix/sysv/linux/csky/libc.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libc.abilist
@@ -2652,6 +2652,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist
index 8a40cece83..9af97c7ce8 100644
--- a/sysdeps/unix/sysv/linux/hppa/libc.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist
@@ -2601,6 +2601,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist
index a89826049f..6b90604a58 100644
--- a/sysdeps/unix/sysv/linux/i386/libc.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libc.abilist
@@ -2785,6 +2785,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/ia64/libc.abilist b/sysdeps/unix/sysv/linux/ia64/libc.abilist
index d1d96b7469..4b9227d6c9 100644
--- a/sysdeps/unix/sysv/linux/ia64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/ia64/libc.abilist
@@ -2551,6 +2551,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
index 63a62f267a..70bb58ae69 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
@@ -497,6 +497,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
index f68325f9bc..b8053a1d45 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
@@ -2728,6 +2728,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
index 247af2075c..2412d0c9bf 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
@@ -2701,6 +2701,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
index b0ac3f9009..2694714320 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
@@ -2698,6 +2698,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
index b22cd6bf2f..712f9c716b 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
@@ -2693,6 +2693,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
index 12fc2cce3e..79f2f60c2a 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
@@ -2691,6 +2691,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
index d3e96dfd43..e7205f16ed 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
@@ -2699,6 +2699,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
index cb58ed4db0..6fa6f18150 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
@@ -2602,6 +2602,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/nios2/libc.abilist b/sysdeps/unix/sysv/linux/nios2/libc.abilist
index 61ad58a599..bd21a17ade 100644
--- a/sysdeps/unix/sysv/linux/nios2/libc.abilist
+++ b/sysdeps/unix/sysv/linux/nios2/libc.abilist
@@ -2740,6 +2740,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index 75b9e0ee1e..2c58d5ae2f 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -67,6 +67,13 @@  __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt)
   INTERNAL_SYSCALL_CALL (writev, fd, iov, iovcnt);
 }
 
+static inline int
+__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags)
+{
+  return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags);
+}
+
+
 /* Uncancelable fcntl.  */
 __typeof (__fcntl) __fcntl64_nocancel;
 
diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist
index 1260dc4e2e..132d650745 100644
--- a/sysdeps/unix/sysv/linux/or1k/libc.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist
@@ -2123,6 +2123,9 @@  GLIBC_2.35 wprintf F
 GLIBC_2.35 write F
 GLIBC_2.35 writev F
 GLIBC_2.35 wscanf F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
index 363939762c..3fd4a03f67 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
@@ -2755,6 +2755,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
index f512ad8baf..5bf5dc6977 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
@@ -2788,6 +2788,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
index c9bdc9859c..404fafb377 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
@@ -2510,6 +2510,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
index f091be30bd..005f7a2a4a 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
@@ -2812,6 +2812,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
index 7ea73f9af8..ed8a9b02e1 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
@@ -2379,6 +2379,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
index 333fa62714..4912a496d8 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
@@ -2579,6 +2579,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
index a867467b12..ace32921fd 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
@@ -2753,6 +2753,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
index dbad5b3163..82375e7094 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
@@ -2547,6 +2547,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
index 6f755cc173..82c87626c6 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
@@ -2608,6 +2608,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
index 77d936aa3c..5c3ccd89fd 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
@@ -2605,6 +2605,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
index 09bb4363e1..cabde52e90 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
@@ -2748,6 +2748,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
index 9df9cb6adb..e188566fb2 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
@@ -2574,6 +2574,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
index 6e25b021ab..045176197e 100644
--- a/sysdeps/unix/sysv/linux/tls-internal.c
+++ b/sysdeps/unix/sysv/linux/tls-internal.c
@@ -1 +1,37 @@ 
-/* Empty.  */
+/* Per-thread state.  Linux version.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <tls-internal.h>
+
+void
+__glibc_tls_internal_free (void)
+{
+  struct pthread *self = THREAD_SELF;
+  free (self->tls_state.strsignal_buf);
+  free (self->tls_state.strerror_l_buf);
+
+  if (self->tls_state.rand_state != NULL)
+    {
+      /* Clear any lingering random state prior so if the thread stack is
+         cached it won't leak any data.  */
+      explicit_bzero (self->tls_state.rand_state,
+		      sizeof (struct arc4random_state_t));
+      free (self->tls_state.rand_state);
+    }
+}
diff --git a/sysdeps/unix/sysv/linux/tls-internal.h b/sysdeps/unix/sysv/linux/tls-internal.h
index f7a1a62135..f268a2d43b 100644
--- a/sysdeps/unix/sysv/linux/tls-internal.h
+++ b/sysdeps/unix/sysv/linux/tls-internal.h
@@ -22,17 +22,21 @@ 
 #include <stdlib.h>
 #include <pthreadP.h>
 
+static inline void
+__glibc_tls_internal_init (struct tls_internal_t *tls_state)
+{
+  tls_state->strsignal_buf = NULL;
+  tls_state->strerror_l_buf = NULL;
+  tls_state->rand_state = NULL;
+}
+
 static inline struct tls_internal_t *
 __glibc_tls_internal (void)
 {
   return &THREAD_SELF->tls_state;
 }
 
-static inline void
-__glibc_tls_internal_free (void)
-{
-  free (THREAD_SELF->tls_state.strsignal_buf);
-  free (THREAD_SELF->tls_state.strerror_l_buf);
-}
+/* Reset the arc4random TCB state on fork.  */
+extern void __glibc_tls_internal_free (void) attribute_hidden;
 
 #endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
index 4829450ad0..aad85b7f69 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
@@ -2525,6 +2525,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
index caea228bcb..8361f7b598 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
@@ -2631,6 +2631,9 @@  GLIBC_2.35 __memcmpeq F
 GLIBC_2.35 _dl_find_object F
 GLIBC_2.35 epoll_pwait2 F
 GLIBC_2.35 posix_spawn_file_actions_addtcsetpgrp_np F
+GLIBC_2.36 arc4random F
+GLIBC_2.36 arc4random_buf F
+GLIBC_2.36 arc4random_uniform F
 GLIBC_2.36 fsmount F
 GLIBC_2.36 fsopen F
 GLIBC_2.36 move_mount F