[v2] stdlib: Simplify arc4random_uniform
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
It uses the bitmask with rejection [1], which calculates a mask
being the lowest power of two bounding the request upper bound,
successively queries new random values, and rejects values
outside the requested range.
Performance-wise, there is no much gain in trying to conserve
bits since arc4random is wrapper on getrandom syscall. It should
be cheaper to just query a uint32_t value. The algorithm also
avoids modulo and divide operations, which might be costly
depending of the architecture.
[1] https://www.pcg-random.org/posts/bounded-rands.html
Reviewed-by: Yann Droneaud <ydroneaud@opteya.com>
---
v2: Fixed typos in commit message and simplify loop.
---
stdlib/arc4random_uniform.c | 129 +++++++++---------------------------
1 file changed, 30 insertions(+), 99 deletions(-)
Comments
Based on previous comments I will commit this shortly.
On 29/07/22 09:32, Adhemerval Zanella wrote:
> It uses the bitmask with rejection [1], which calculates a mask
> being the lowest power of two bounding the request upper bound,
> successively queries new random values, and rejects values
> outside the requested range.
>
> Performance-wise, there is no much gain in trying to conserve
> bits since arc4random is wrapper on getrandom syscall. It should
> be cheaper to just query a uint32_t value. The algorithm also
> avoids modulo and divide operations, which might be costly
> depending of the architecture.
>
> [1] https://www.pcg-random.org/posts/bounded-rands.html
>
> Reviewed-by: Yann Droneaud <ydroneaud@opteya.com>
> ---
> v2: Fixed typos in commit message and simplify loop.
> ---
> stdlib/arc4random_uniform.c | 129 +++++++++---------------------------
> 1 file changed, 30 insertions(+), 99 deletions(-)
>
> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
> index 1326dfa593..5aa98d1c13 100644
> --- a/stdlib/arc4random_uniform.c
> +++ b/stdlib/arc4random_uniform.c
> @@ -17,38 +17,19 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#include <endian.h>
> -#include <libc-lock.h>
> #include <stdlib.h>
> #include <sys/param.h>
>
> -/* Return the number of bytes which cover values up to the limit. */
> -__attribute__ ((const))
> -static uint32_t
> -byte_count (uint32_t n)
> -{
> - if (n < (1U << 8))
> - return 1;
> - else if (n < (1U << 16))
> - return 2;
> - else if (n < (1U << 24))
> - return 3;
> - else
> - return 4;
> -}
> +/* Return a uniformly distributed random number less than N. The algorithm
> + calculates a mask being the lowest power of two bounding the upper bound
> + N, successively queries new random values, and rejects values outside of
> + the request range.
>
> -/* Fill the lower bits of the result with randomness, according to the
> - number of bytes requested. */
> -static void
> -random_bytes (uint32_t *result, uint32_t byte_count)
> -{
> - *result = 0;
> - unsigned char *ptr = (unsigned char *) result;
> - if (__BYTE_ORDER == __BIG_ENDIAN)
> - ptr += 4 - byte_count;
> - __arc4random_buf (ptr, byte_count);
> -}
> + For reject values, it also tries if the remaining entropy could fit on
> + the asked range after range adjustment.
>
> + The algorithm avoids modulo and divide operations, which might be costly
> + depending on the architecture. */
> uint32_t
> __arc4random_uniform (uint32_t n)
> {
> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
> only possible result for limit 1. */
> return 0;
>
> - /* The bits variable serves as a source for bits. Prefetch the
> - minimum number of bytes needed. */
> - uint32_t count = byte_count (n);
> - uint32_t bits_length = count * CHAR_BIT;
> - uint32_t bits;
> - random_bytes (&bits, count);
> -
> /* Powers of two are easy. */
> if (powerof2 (n))
> - return bits & (n - 1);
> -
> - /* The general case. This algorithm follows Jérémie Lumbroso,
> - Optimal Discrete Uniform Generation from Coin Flips, and
> - Applications (2013), who credits Donald E. Knuth and Andrew
> - C. Yao, The complexity of nonuniform random number generation
> - (1976), for solving the general case.
> + return __arc4random () & (n - 1);
>
> - The implementation below unrolls the initialization stage of the
> - loop, where v is less than n. */
> + /* mask is the smallest power of 2 minus 1 number larger than n. */
> + int z = __builtin_clz (n);
> + uint32_t mask = ~UINT32_C(0) >> z;
> + int bits = CHAR_BIT * sizeof (uint32_t) - z;
>
> - /* Use 64-bit variables even though the intermediate results are
> - never larger than 33 bits. This ensures the code is easier to
> - compile on 64-bit architectures. */
> - uint64_t v;
> - uint64_t c;
> -
> - /* Initialize v and c. v is the smallest power of 2 which is larger
> - than n.*/
> - {
> - uint32_t log2p1 = 32 - __builtin_clz (n);
> - v = 1ULL << log2p1;
> - c = bits & (v - 1);
> - bits >>= log2p1;
> - bits_length -= log2p1;
> - }
> -
> - /* At the start of the loop, c is uniformly distributed within the
> - half-open interval [0, v), and v < 2n < 2**33. */
> - while (true)
> + while (1)
> {
> - if (v >= n)
> - {
> - /* If the candidate is less than n, accept it. */
> - if (c < n)
> - /* c is uniformly distributed on [0, n). */
> - return c;
> - else
> - {
> - /* c is uniformly distributed on [n, v). */
> - v -= n;
> - c -= n;
> - /* The distribution was shifted, so c is uniformly
> - distributed on [0, v) again. */
> - }
> - }
> - /* v < n here. */
> -
> - /* Replenish the bit source if necessary. */
> - if (bits_length == 0)
> - {
> - /* Overwrite the least significant byte. */
> - random_bytes (&bits, 1);
> - bits_length = CHAR_BIT;
> - }
> -
> - /* Double the range. No overflow because v < n < 2**32. */
> - v *= 2;
> - /* v < 2n here. */
> -
> - /* Extract a bit and append it to c. c remains less than v and
> - thus 2**33. */
> - c = (c << 1) | (bits & 1);
> - bits >>= 1;
> - --bits_length;
> -
> - /* At this point, c is uniformly distributed on [0, v) again,
> - and v < 2n < 2**33. */
> + uint32_t value = __arc4random ();
> +
> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
> + uint32_t r = value & mask;
> + if (r < n)
> + return r;
> +
> + /* Otherwise check if remaining bits of entropy provides fits in the
> + bound. */
> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
> + {
> + value >>= bits;
> + r = value & mask;
> + if (r < n)
> + return r;
> + }
> }
> }
> libc_hidden_def (__arc4random_uniform)
On Fri, Jul 29, 2022 at 8:32 PM Adhemerval Zanella via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> It uses the bitmask with rejection [1], which calculates a mask
> being the lowest power of two bounding the request upper bound,
> successively queries new random values, and rejects values
> outside the requested range.
>
> Performance-wise, there is no much gain in trying to conserve
> bits since arc4random is wrapper on getrandom syscall. It should
> be cheaper to just query a uint32_t value. The algorithm also
> avoids modulo and divide operations, which might be costly
> depending of the architecture.
>
> [1] https://www.pcg-random.org/posts/bounded-rands.html
>
> Reviewed-by: Yann Droneaud <ydroneaud@opteya.com>
> ---
> v2: Fixed typos in commit message and simplify loop.
> ---
> stdlib/arc4random_uniform.c | 129 +++++++++---------------------------
> 1 file changed, 30 insertions(+), 99 deletions(-)
>
> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
> index 1326dfa593..5aa98d1c13 100644
> --- a/stdlib/arc4random_uniform.c
> +++ b/stdlib/arc4random_uniform.c
> @@ -17,38 +17,19 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#include <endian.h>
> -#include <libc-lock.h>
> #include <stdlib.h>
> #include <sys/param.h>
>
> -/* Return the number of bytes which cover values up to the limit. */
> -__attribute__ ((const))
> -static uint32_t
> -byte_count (uint32_t n)
> -{
> - if (n < (1U << 8))
> - return 1;
> - else if (n < (1U << 16))
> - return 2;
> - else if (n < (1U << 24))
> - return 3;
> - else
> - return 4;
> -}
> +/* Return a uniformly distributed random number less than N. The algorithm
> + calculates a mask being the lowest power of two bounding the upper bound
> + N, successively queries new random values, and rejects values outside of
> + the request range.
>
> -/* Fill the lower bits of the result with randomness, according to the
> - number of bytes requested. */
> -static void
> -random_bytes (uint32_t *result, uint32_t byte_count)
> -{
> - *result = 0;
> - unsigned char *ptr = (unsigned char *) result;
> - if (__BYTE_ORDER == __BIG_ENDIAN)
> - ptr += 4 - byte_count;
> - __arc4random_buf (ptr, byte_count);
> -}
> + For reject values, it also tries if the remaining entropy could fit on
> + the asked range after range adjustment.
>
> + The algorithm avoids modulo and divide operations, which might be costly
> + depending on the architecture. */
> uint32_t
> __arc4random_uniform (uint32_t n)
> {
> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
> only possible result for limit 1. */
> return 0;
>
> - /* The bits variable serves as a source for bits. Prefetch the
> - minimum number of bytes needed. */
> - uint32_t count = byte_count (n);
> - uint32_t bits_length = count * CHAR_BIT;
> - uint32_t bits;
> - random_bytes (&bits, count);
> -
> /* Powers of two are easy. */
> if (powerof2 (n))
> - return bits & (n - 1);
> -
> - /* The general case. This algorithm follows Jérémie Lumbroso,
> - Optimal Discrete Uniform Generation from Coin Flips, and
> - Applications (2013), who credits Donald E. Knuth and Andrew
> - C. Yao, The complexity of nonuniform random number generation
> - (1976), for solving the general case.
> + return __arc4random () & (n - 1);
>
> - The implementation below unrolls the initialization stage of the
> - loop, where v is less than n. */
> + /* mask is the smallest power of 2 minus 1 number larger than n. */
> + int z = __builtin_clz (n);
> + uint32_t mask = ~UINT32_C(0) >> z;
> + int bits = CHAR_BIT * sizeof (uint32_t) - z;
>
> - /* Use 64-bit variables even though the intermediate results are
> - never larger than 33 bits. This ensures the code is easier to
> - compile on 64-bit architectures. */
> - uint64_t v;
> - uint64_t c;
> -
> - /* Initialize v and c. v is the smallest power of 2 which is larger
> - than n.*/
> - {
> - uint32_t log2p1 = 32 - __builtin_clz (n);
> - v = 1ULL << log2p1;
> - c = bits & (v - 1);
> - bits >>= log2p1;
> - bits_length -= log2p1;
> - }
> -
> - /* At the start of the loop, c is uniformly distributed within the
> - half-open interval [0, v), and v < 2n < 2**33. */
> - while (true)
> + while (1)
> {
> - if (v >= n)
> - {
> - /* If the candidate is less than n, accept it. */
> - if (c < n)
> - /* c is uniformly distributed on [0, n). */
> - return c;
> - else
> - {
> - /* c is uniformly distributed on [n, v). */
> - v -= n;
> - c -= n;
> - /* The distribution was shifted, so c is uniformly
> - distributed on [0, v) again. */
> - }
> - }
> - /* v < n here. */
> -
> - /* Replenish the bit source if necessary. */
> - if (bits_length == 0)
> - {
> - /* Overwrite the least significant byte. */
> - random_bytes (&bits, 1);
> - bits_length = CHAR_BIT;
> - }
> -
> - /* Double the range. No overflow because v < n < 2**32. */
> - v *= 2;
> - /* v < 2n here. */
> -
> - /* Extract a bit and append it to c. c remains less than v and
> - thus 2**33. */
> - c = (c << 1) | (bits & 1);
> - bits >>= 1;
> - --bits_length;
> -
> - /* At this point, c is uniformly distributed on [0, v) again,
> - and v < 2n < 2**33. */
> + uint32_t value = __arc4random ();
> +
> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
> + uint32_t r = value & mask;
> + if (r < n)
> + return r;
> +
> + /* Otherwise check if remaining bits of entropy provides fits in the
> + bound. */
> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
> + {
> + value >>= bits;
Can this just be shift by 1 and repeat (32 - z) times or does that
introduce bias (not seeing exactly why it would)?
> + r = value & mask;
> + if (r < n)
> + return r;
> + }
> }
> }
> libc_hidden_def (__arc4random_uniform)
> --
> 2.34.1
>
Hi,
Le 01/08/2022 à 21:20, Noah Goldstein a écrit :
>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
>> index 1326dfa593..5aa98d1c13 100644
>> --- a/stdlib/arc4random_uniform.c
>> +++ b/stdlib/arc4random_uniform.c
>>
>> uint32_t
>> __arc4random_uniform (uint32_t n)
>> {
>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
>> + while (1)
>> {
>> + uint32_t value = __arc4random ();
>> +
>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
>> + uint32_t r = value & mask;
>> + if (r < n)
>> + return r;
>> +
>> + /* Otherwise check if remaining bits of entropy provides fits in the
>> + bound. */
>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
>> + {
>> + value >>= bits;
> Can this just be shift by 1 and repeat (32 - z) times or does that
> introduce bias (not seeing exactly why it would)?
That was bothering me too, as I was thinking a rotation would be
possible instead of shift.
I posted the question
https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe
The answer: there's indeed a bias.
This explains why my attempt with rotation leads to dieharder
complaining. It was so obvious ... Damn
Regards.
On 02/08/22 09:08, Yann Droneaud wrote:
> Hi,
>
> Le 01/08/2022 à 21:20, Noah Goldstein a écrit :
>>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
>>> index 1326dfa593..5aa98d1c13 100644
>>> --- a/stdlib/arc4random_uniform.c
>>> +++ b/stdlib/arc4random_uniform.c
>>>
>>> uint32_t
>>> __arc4random_uniform (uint32_t n)
>>> {
>>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
>>> + while (1)
>>> {
>>> + uint32_t value = __arc4random ();
>>> +
>>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
>>> + uint32_t r = value & mask;
>>> + if (r < n)
>>> + return r;
>>> +
>>> + /* Otherwise check if remaining bits of entropy provides fits in the
>>> + bound. */
>>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
>>> + {
>>> + value >>= bits;
>> Can this just be shift by 1 and repeat (32 - z) times or does that
>> introduce bias (not seeing exactly why it would)?
>
>
> That was bothering me too, as I was thinking a rotation would be possible instead of shift.
>
> I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe
>
> The answer: there's indeed a bias.
>
> This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn
>
Thanks, I will remove it then. We might evaluate later if using the mask
and compare is indeed better than the other methods (as using by OpenBSD).
Le 02/08/2022 à 14:14, Adhemerval Zanella Netto a écrit :
>
> On 02/08/22 09:08, Yann Droneaud wrote:
>> Hi,
>>
>> Le 01/08/2022 à 21:20, Noah Goldstein a écrit :
>>>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
>>>> index 1326dfa593..5aa98d1c13 100644
>>>> --- a/stdlib/arc4random_uniform.c
>>>> +++ b/stdlib/arc4random_uniform.c
>>>>
>>>> uint32_t
>>>> __arc4random_uniform (uint32_t n)
>>>> {
>>>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
>>>> + while (1)
>>>> {
>>>> + uint32_t value = __arc4random ();
>>>> +
>>>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
>>>> + uint32_t r = value & mask;
>>>> + if (r < n)
>>>> + return r;
>>>> +
>>>> + /* Otherwise check if remaining bits of entropy provides fits in the
>>>> + bound. */
>>>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
>>>> + {
>>>> + value >>= bits;
>>> Can this just be shift by 1 and repeat (32 - z) times or does that
>>> introduce bias (not seeing exactly why it would)?
>>
>> That was bothering me too, as I was thinking a rotation would be possible instead of shift.
>>
>> I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe
>>
>> The answer: there's indeed a bias.
>>
>> This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn
>>
>
> Thanks, I will remove it then. We might evaluate later if using the mask
> and compare is indeed better than the other methods (as using by OpenBSD).
>
It's the proposal to shift by 1 bit which would introduce a bias.
The implementation used in Glibc is fine.
Regards.
On 02/08/22 09:26, Yann Droneaud wrote:
> Le 02/08/2022 à 14:14, Adhemerval Zanella Netto a écrit :
>>
>> On 02/08/22 09:08, Yann Droneaud wrote:
>>> Hi,
>>>
>>> Le 01/08/2022 à 21:20, Noah Goldstein a écrit :
>>>>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
>>>>> index 1326dfa593..5aa98d1c13 100644
>>>>> --- a/stdlib/arc4random_uniform.c
>>>>> +++ b/stdlib/arc4random_uniform.c
>>>>>
>>>>> uint32_t
>>>>> __arc4random_uniform (uint32_t n)
>>>>> {
>>>>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
>>>>> + while (1)
>>>>> {
>>>>> + uint32_t value = __arc4random ();
>>>>> +
>>>>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
>>>>> + uint32_t r = value & mask;
>>>>> + if (r < n)
>>>>> + return r;
>>>>> +
>>>>> + /* Otherwise check if remaining bits of entropy provides fits in the
>>>>> + bound. */
>>>>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
>>>>> + {
>>>>> + value >>= bits;
>>>> Can this just be shift by 1 and repeat (32 - z) times or does that
>>>> introduce bias (not seeing exactly why it would)?
>>>
>>> That was bothering me too, as I was thinking a rotation would be possible instead of shift.
>>>
>>> I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe
>>>
>>> The answer: there's indeed a bias.
>>>
>>> This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn
>>>
>>
>> Thanks, I will remove it then. We might evaluate later if using the mask
>> and compare is indeed better than the other methods (as using by OpenBSD).
>>
> It's the proposal to shift by 1 bit which would introduce a bias.
>
> The implementation used in Glibc is fine.
Hum I understood wrongly then, since it seemed you are answering to Noah question
for glibc implementation itself. I though I has this sort out, but I am not sure
anymore.
On Tue, Aug 2, 2022 at 8:08 PM Yann Droneaud <ydroneaud@opteya.com> wrote:
>
> Hi,
>
> Le 01/08/2022 à 21:20, Noah Goldstein a écrit :
> >> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c
> >> index 1326dfa593..5aa98d1c13 100644
> >> --- a/stdlib/arc4random_uniform.c
> >> +++ b/stdlib/arc4random_uniform.c
> >>
> >> uint32_t
> >> __arc4random_uniform (uint32_t n)
> >> {
> >> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
> >> + while (1)
> >> {
> >> + uint32_t value = __arc4random ();
> >> +
> >> + /* Return if the lower power of 2 minus 1 satisfy the condition. */
> >> + uint32_t r = value & mask;
> >> + if (r < n)
> >> + return r;
> >> +
> >> + /* Otherwise check if remaining bits of entropy provides fits in the
> >> + bound. */
> >> + for (int bits_left = z; bits_left >= bits; bits_left -= bits)
> >> + {
> >> + value >>= bits;
> > Can this just be shift by 1 and repeat (32 - z) times or does that
> > introduce bias (not seeing exactly why it would)?
>
>
> That was bothering me too, as I was thinking a rotation would be
> possible instead of shift.
>
> I posted the question
> https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe
>
> The answer: there's indeed a bias.
Hmm, based on the answer it seems we could shift by `popcnt(n)` (which is
guranteed to be <= bits). If I understand correctly the issue is the consecutive
leading 1s essentially fix ensuing ones but `popcnt(n)` will always clear
those fixed ones. After the first zero, the rest of the bits can be anything
I believe so there will be no bias from them.
>
> This explains why my attempt with rotation leads to dieharder
> complaining. It was so obvious ... Damn
>
>
> Regards.
>
>
> --
>
> Yann Droneaud
>
> OPTEYA
>
>
@@ -17,38 +17,19 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <endian.h>
-#include <libc-lock.h>
#include <stdlib.h>
#include <sys/param.h>
-/* Return the number of bytes which cover values up to the limit. */
-__attribute__ ((const))
-static uint32_t
-byte_count (uint32_t n)
-{
- if (n < (1U << 8))
- return 1;
- else if (n < (1U << 16))
- return 2;
- else if (n < (1U << 24))
- return 3;
- else
- return 4;
-}
+/* Return a uniformly distributed random number less than N. The algorithm
+ calculates a mask being the lowest power of two bounding the upper bound
+ N, successively queries new random values, and rejects values outside of
+ the request range.
-/* Fill the lower bits of the result with randomness, according to the
- number of bytes requested. */
-static void
-random_bytes (uint32_t *result, uint32_t byte_count)
-{
- *result = 0;
- unsigned char *ptr = (unsigned char *) result;
- if (__BYTE_ORDER == __BIG_ENDIAN)
- ptr += 4 - byte_count;
- __arc4random_buf (ptr, byte_count);
-}
+ For reject values, it also tries if the remaining entropy could fit on
+ the asked range after range adjustment.
+ The algorithm avoids modulo and divide operations, which might be costly
+ depending on the architecture. */
uint32_t
__arc4random_uniform (uint32_t n)
{
@@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
only possible result for limit 1. */
return 0;
- /* The bits variable serves as a source for bits. Prefetch the
- minimum number of bytes needed. */
- uint32_t count = byte_count (n);
- uint32_t bits_length = count * CHAR_BIT;
- uint32_t bits;
- random_bytes (&bits, count);
-
/* Powers of two are easy. */
if (powerof2 (n))
- return bits & (n - 1);
-
- /* The general case. This algorithm follows Jérémie Lumbroso,
- Optimal Discrete Uniform Generation from Coin Flips, and
- Applications (2013), who credits Donald E. Knuth and Andrew
- C. Yao, The complexity of nonuniform random number generation
- (1976), for solving the general case.
+ return __arc4random () & (n - 1);
- The implementation below unrolls the initialization stage of the
- loop, where v is less than n. */
+ /* mask is the smallest power of 2 minus 1 number larger than n. */
+ int z = __builtin_clz (n);
+ uint32_t mask = ~UINT32_C(0) >> z;
+ int bits = CHAR_BIT * sizeof (uint32_t) - z;
- /* Use 64-bit variables even though the intermediate results are
- never larger than 33 bits. This ensures the code is easier to
- compile on 64-bit architectures. */
- uint64_t v;
- uint64_t c;
-
- /* Initialize v and c. v is the smallest power of 2 which is larger
- than n.*/
- {
- uint32_t log2p1 = 32 - __builtin_clz (n);
- v = 1ULL << log2p1;
- c = bits & (v - 1);
- bits >>= log2p1;
- bits_length -= log2p1;
- }
-
- /* At the start of the loop, c is uniformly distributed within the
- half-open interval [0, v), and v < 2n < 2**33. */
- while (true)
+ while (1)
{
- if (v >= n)
- {
- /* If the candidate is less than n, accept it. */
- if (c < n)
- /* c is uniformly distributed on [0, n). */
- return c;
- else
- {
- /* c is uniformly distributed on [n, v). */
- v -= n;
- c -= n;
- /* The distribution was shifted, so c is uniformly
- distributed on [0, v) again. */
- }
- }
- /* v < n here. */
-
- /* Replenish the bit source if necessary. */
- if (bits_length == 0)
- {
- /* Overwrite the least significant byte. */
- random_bytes (&bits, 1);
- bits_length = CHAR_BIT;
- }
-
- /* Double the range. No overflow because v < n < 2**32. */
- v *= 2;
- /* v < 2n here. */
-
- /* Extract a bit and append it to c. c remains less than v and
- thus 2**33. */
- c = (c << 1) | (bits & 1);
- bits >>= 1;
- --bits_length;
-
- /* At this point, c is uniformly distributed on [0, v) again,
- and v < 2n < 2**33. */
+ uint32_t value = __arc4random ();
+
+ /* Return if the lower power of 2 minus 1 satisfy the condition. */
+ uint32_t r = value & mask;
+ if (r < n)
+ return r;
+
+ /* Otherwise check if remaining bits of entropy provides fits in the
+ bound. */
+ for (int bits_left = z; bits_left >= bits; bits_left -= bits)
+ {
+ value >>= bits;
+ r = value & mask;
+ if (r < n)
+ return r;
+ }
}
}
libc_hidden_def (__arc4random_uniform)