[3/3] stdlib: The qsort implementation needs to use heapsort in more cases
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
The existing logic avoided internal stack overflow. To avoid
a denial-of-service condition with adversarial input, it is necessary
to fall over to heapsort if tail-recursing deeply, too, which does
not result in a deep stack of pending partitions.
The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper
on this subject.
---
stdlib/Makefile | 3 +
stdlib/qsort.c | 17 +++--
stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 187 insertions(+), 4 deletions(-)
create mode 100644 stdlib/tst-qsort5.c
Comments
On 17/11/23 15:44, Florian Weimer wrote:
> The existing logic avoided internal stack overflow. To avoid
> a denial-of-service condition with adversarial input, it is necessary
> to fall over to heapsort if tail-recursing deeply, too, which does
> not result in a deep stack of pending partitions.
>
> The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper
> on this subject.
LGTM, thanks.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> stdlib/Makefile | 3 +
> stdlib/qsort.c | 17 +++--
> stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 187 insertions(+), 4 deletions(-)
> create mode 100644 stdlib/tst-qsort5.c
>
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 48688f6a27..6194d1cb22 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -215,6 +215,7 @@ tests := \
> tst-qsort \
> tst-qsort2 \
> tst-qsort3 \
> + tst-qsort5 \
> tst-quick_exit \
> tst-rand48 \
> tst-rand48-2 \
> @@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
> '$(run-program-env)' '$(test-program-prefix-after-env)' \
> $(common-objpfx)stdlib/; \
> $(evaluate-test)
> +
> +$(objpfx)tst-qsort5: $(libm)
> diff --git a/stdlib/qsort.c b/stdlib/qsort.c
> index a2f9e916ef..be01fb5598 100644
> --- a/stdlib/qsort.c
> +++ b/stdlib/qsort.c
> @@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
> {
> if ((size_t) (hi - left_ptr) <= max_thresh)
> /* Ignore both small partitions. */
> - top = pop (top, &lo, &hi, &depth);
> + {
> + top = pop (top, &lo, &hi, &depth);
> + --depth;
> + }
> else
> - /* Ignore small left partition. */
> - lo = left_ptr;
> + {
> + /* Ignore small left partition. */
> + lo = left_ptr;
> + --depth;
> + }
> }
> else if ((size_t) (hi - left_ptr) <= max_thresh)
> /* Ignore small right partition. */
> - hi = right_ptr;
> + {
> + hi = right_ptr;
> + --depth;
> + }
> else if ((right_ptr - lo) > (hi - left_ptr))
> {
> /* Push larger left partition indices. */
Ok (kinda embarrassing I missed this obvious depth update...).
> diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c
> new file mode 100644
> index 0000000000..d3a88c30f8
> --- /dev/null
> +++ b/stdlib/tst-qsort5.c
> @@ -0,0 +1,171 @@
> +/* Adversarial test for qsort_r.
> + Copyright (C) 2023 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* The approach follows Douglas McIlroy, A Killer Adversary for
> + Quicksort. Software—Practice and Experience 29 (1999) 341-344.
> + Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
> + (2023-11-17). */
> +
> +#include <math.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +
> +struct context
> +{
> + /* Called the gas value in the paper. This value is larger than all
> + other values (length minus one will do), so comparison with any
> + decided value has a known result. */
> + int undecided_value;
> +
> + /* If comparing undecided values, one of them as to be assigned a
> + value to ensure consistency with future comparisons. This is the
> + value that will be used. Starts out at zero. */
> + int next_decided;
> +
> + /* Used to trick pivot selection. Deciding the value for the last
> + seen undcided value in a decided/undecided comparison happens
> + to trick the many qsort implementations. */
> + int last_undecided_index;
> +
> + /* This array contains the actually asigned values. The call to
> + qsort_r sorts a different array that contains indices into this
> + array. */
> + int *decided_values;
> +};
> +
> +static int
> +compare_opponent (const void *l1, const void *r1, void *ctx1)
> +{
> + const int *l = l1;
> + const int *r = r1;
> + struct context *ctx = ctx1;
> + int rvalue = ctx->decided_values[*r];
> + int lvalue = ctx->decided_values[*l];
> +
> + if (lvalue == ctx->undecided_value)
> + {
> + if (rvalue == ctx->undecided_value)
> + {
> + /* Both values are undecided. In this case, make a decision
> + for the last-used undecided value. This is tweak is very
> + specific to quicksort. */
> + if (*l == ctx->last_undecided_index)
> + {
> + ctx->decided_values[*l] = ctx->next_decided;
> + ++ctx->next_decided;
> + /* The undecided value or *r is greater. */
> + return -1;
> + }
> + else
> + {
> + ctx->decided_values[*r] = ctx->next_decided;
> + ++ctx->next_decided;
> + /* The undecided value for *l is greater. */
> + return 1;
> + }
> + }
> + else
> + {
> + ctx->last_undecided_index = *l;
> + return 1;
> + }
> + }
> + else
> + {
> + /* *l is a decided value. */
> + if (rvalue == ctx->undecided_value)
> + {
> + ctx->last_undecided_index = *r;
> + /* The undecided value for *r is greater. */
> + return -1;
> + }
> + else
> + return lvalue - rvalue;
> + }
> +}
> +
> +/* Return a pointer to the adversarial permutation of length N. */
> +static int *
> +create_permutation (size_t n)
> +{
> + struct context ctx =
> + {
> + .undecided_value = n - 1, /* Larger than all other values. */
> + .decided_values = xcalloc (n, sizeof (int)),
> + };
> + for (size_t i = 0; i < n; ++i)
> + ctx.decided_values[i] = ctx.undecided_value;
> + int *scratch = xcalloc (n, sizeof (int));
> + for (size_t i = 0; i < n; ++i)
> + scratch[i] = i;
> + qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
> + free (scratch);
> + return ctx.decided_values;
> +}
> +
> +/* Callback function for qsort which counts the number of invocations
> + in *CLOSURE. */
> +static int
> +compare_counter (const void *l1, const void *r1, void *closure)
> +{
> + const int *l = l1;
> + const int *r = r1;
> + unsigned long long int *counter = closure;
> + ++*counter;
> + return *l - *r;
> +}
> +
> +/* Count the comparisons required for an adversarial permutation of
> + length N. */
> +static unsigned long long int
> +count_comparisons (size_t n)
> +{
> + int *array = create_permutation (n);
> + unsigned long long int counter = 0;
> + qsort_r (array, n, sizeof (*array), compare_counter, &counter);
> + free (array);
> + return counter;
> +}
> +
> +/* Check the scaling factor for one adversarial permutation of length
> + N, and report some statistics. */
> +static void
> +check_one_n (size_t n)
> +{
> + unsigned long long int count = count_comparisons (n);
> + double factor = count / (n * log (count));
> + printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
> + n, count, factor);
> + /* This is an arbitrary factor which is true for the current
> + implementation across a wide range of sizes. */
> + TEST_VERIFY (factor <= 4.5);
> +}
> +
> +static int
> +do_test (void)
> +{
> + check_one_n (100);
> + check_one_n (1000);
> + for (int i = 1; i <= 15; ++i)
> + check_one_n (i * 10 * 1000);
> + return 0;
> +}
> +
> +#include <support/test-driver.c>
Ok.
@@ -215,6 +215,7 @@ tests := \
tst-qsort \
tst-qsort2 \
tst-qsort3 \
+ tst-qsort5 \
tst-quick_exit \
tst-rand48 \
tst-rand48-2 \
@@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
'$(run-program-env)' '$(test-program-prefix-after-env)' \
$(common-objpfx)stdlib/; \
$(evaluate-test)
+
+$(objpfx)tst-qsort5: $(libm)
@@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
{
if ((size_t) (hi - left_ptr) <= max_thresh)
/* Ignore both small partitions. */
- top = pop (top, &lo, &hi, &depth);
+ {
+ top = pop (top, &lo, &hi, &depth);
+ --depth;
+ }
else
- /* Ignore small left partition. */
- lo = left_ptr;
+ {
+ /* Ignore small left partition. */
+ lo = left_ptr;
+ --depth;
+ }
}
else if ((size_t) (hi - left_ptr) <= max_thresh)
/* Ignore small right partition. */
- hi = right_ptr;
+ {
+ hi = right_ptr;
+ --depth;
+ }
else if ((right_ptr - lo) > (hi - left_ptr))
{
/* Push larger left partition indices. */
new file mode 100644
@@ -0,0 +1,171 @@
+/* Adversarial test for qsort_r.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* The approach follows Douglas McIlroy, A Killer Adversary for
+ Quicksort. Software—Practice and Experience 29 (1999) 341-344.
+ Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
+ (2023-11-17). */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/support.h>
+
+struct context
+{
+ /* Called the gas value in the paper. This value is larger than all
+ other values (length minus one will do), so comparison with any
+ decided value has a known result. */
+ int undecided_value;
+
+ /* If comparing undecided values, one of them as to be assigned a
+ value to ensure consistency with future comparisons. This is the
+ value that will be used. Starts out at zero. */
+ int next_decided;
+
+ /* Used to trick pivot selection. Deciding the value for the last
+ seen undcided value in a decided/undecided comparison happens
+ to trick the many qsort implementations. */
+ int last_undecided_index;
+
+ /* This array contains the actually asigned values. The call to
+ qsort_r sorts a different array that contains indices into this
+ array. */
+ int *decided_values;
+};
+
+static int
+compare_opponent (const void *l1, const void *r1, void *ctx1)
+{
+ const int *l = l1;
+ const int *r = r1;
+ struct context *ctx = ctx1;
+ int rvalue = ctx->decided_values[*r];
+ int lvalue = ctx->decided_values[*l];
+
+ if (lvalue == ctx->undecided_value)
+ {
+ if (rvalue == ctx->undecided_value)
+ {
+ /* Both values are undecided. In this case, make a decision
+ for the last-used undecided value. This is tweak is very
+ specific to quicksort. */
+ if (*l == ctx->last_undecided_index)
+ {
+ ctx->decided_values[*l] = ctx->next_decided;
+ ++ctx->next_decided;
+ /* The undecided value or *r is greater. */
+ return -1;
+ }
+ else
+ {
+ ctx->decided_values[*r] = ctx->next_decided;
+ ++ctx->next_decided;
+ /* The undecided value for *l is greater. */
+ return 1;
+ }
+ }
+ else
+ {
+ ctx->last_undecided_index = *l;
+ return 1;
+ }
+ }
+ else
+ {
+ /* *l is a decided value. */
+ if (rvalue == ctx->undecided_value)
+ {
+ ctx->last_undecided_index = *r;
+ /* The undecided value for *r is greater. */
+ return -1;
+ }
+ else
+ return lvalue - rvalue;
+ }
+}
+
+/* Return a pointer to the adversarial permutation of length N. */
+static int *
+create_permutation (size_t n)
+{
+ struct context ctx =
+ {
+ .undecided_value = n - 1, /* Larger than all other values. */
+ .decided_values = xcalloc (n, sizeof (int)),
+ };
+ for (size_t i = 0; i < n; ++i)
+ ctx.decided_values[i] = ctx.undecided_value;
+ int *scratch = xcalloc (n, sizeof (int));
+ for (size_t i = 0; i < n; ++i)
+ scratch[i] = i;
+ qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
+ free (scratch);
+ return ctx.decided_values;
+}
+
+/* Callback function for qsort which counts the number of invocations
+ in *CLOSURE. */
+static int
+compare_counter (const void *l1, const void *r1, void *closure)
+{
+ const int *l = l1;
+ const int *r = r1;
+ unsigned long long int *counter = closure;
+ ++*counter;
+ return *l - *r;
+}
+
+/* Count the comparisons required for an adversarial permutation of
+ length N. */
+static unsigned long long int
+count_comparisons (size_t n)
+{
+ int *array = create_permutation (n);
+ unsigned long long int counter = 0;
+ qsort_r (array, n, sizeof (*array), compare_counter, &counter);
+ free (array);
+ return counter;
+}
+
+/* Check the scaling factor for one adversarial permutation of length
+ N, and report some statistics. */
+static void
+check_one_n (size_t n)
+{
+ unsigned long long int count = count_comparisons (n);
+ double factor = count / (n * log (count));
+ printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
+ n, count, factor);
+ /* This is an arbitrary factor which is true for the current
+ implementation across a wide range of sizes. */
+ TEST_VERIFY (factor <= 4.5);
+}
+
+static int
+do_test (void)
+{
+ check_one_n (100);
+ check_one_n (1000);
+ for (int i = 1; i <= 15; ++i)
+ check_one_n (i * 10 * 1000);
+ return 0;
+}
+
+#include <support/test-driver.c>