Message ID | 20231230195940.1989559-1-visitorckw@gmail.com |
---|---|

State | New |

Headers | |

Series | Implement introsort for qsort | |

## Commit Message

## Comments

diff --git a/newlib/libc/search/qsort.c b/newlib/libc/search/qsort.c index b53400aa8..61db73746 100644 --- a/newlib/libc/search/qsort.c +++ b/newlib/libc/search/qsort.c @@ -65,6 +65,7 @@ PORTABILITY #include <_ansi.h> #include <sys/cdefs.h> #include <stdlib.h> +#include <limits.h> #ifndef __GNUC__ #define inline @@ -145,6 +146,130 @@ __unused :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); } +/* + * Sifts the element at index 'i' down into the heap. + * + * This variant of siftdown follows a bottom-up approach, reducing the overall + * number of comparisons. The algorithm identifies the path for the element to + * sift down, reaching the leaves with only one comparison per level. + * Subsequently, it backtracks to determine the appropriate position to insert + * the target element. + * + * As elements tend to sift down closer to the leaves, this approach minimizes + * the number of comparisons compared to the traditional two per level descent. + * On average, it uses slightly more than half as many comparisons, with a + * worst-case scenario of 3/4th the comparisons. + */ +#if defined(I_AM_QSORT_R) +static inline void +__bsd_siftdown_r (void *a, + size_t i, + size_t n, + size_t es, + int swaptype, + void *thunk, + cmp_t *cmp) +#elif defined(I_AM_GNU_QSORT_R) +static inline void +siftdown_r (void *a, + size_t i, + size_t n, + size_t es, + int swaptype, + cmp_t *cmp, + void *thunk) +#else +#define thunk NULL +static inline void +siftdown (void *a, + size_t i, + size_t n, + size_t es, + int swaptype, + cmp_t *cmp) +#endif +{ + size_t j, k; + + /* Find the sift-down path all the way to the leaves. */ + for (j = i; k = 2 * j + 1, k + 1 < n;) + j = CMP(thunk, (char *) a + k * es, (char *) a + (k + 1) * es) > 0 ? k : (k + 1); + + /* Special case for the last leaf with no sibling. */ + if (k + 1 == n) + j = k; + + /* Backtrack to the correct location. */ + while (i != j && CMP(thunk, (char *) a + i * es, (char *) a + j * es) > 0) + j = (j - 1) / 2; + + /* Shift the element into its correct place. */ + k = j; + while (i != j) { + j = (j - 1) / 2; + swap((char *) a + j * es, (char *) a + k * es); + } +} + +/* + * Heapsort is employed to achieve O(n log n) worst-case time complexity and + * O(1) additional space complexity for introsort implementation. When + * quicksort recursion becomes too deep, switching to heapsort helps maintain + * efficiency. + */ +#if defined(I_AM_QSORT_R) +static void +__bsd_heapsort_r (void *a, + size_t n, + size_t es, + int swaptype, + void *thunk, + cmp_t *cmp) +#elif defined(I_AM_GNU_QSORT_R) +static void +heapsort_r (void *a, + size_t n, + size_t es, + int swaptype, + cmp_t *cmp, + void *thunk) +#else +#define thunk NULL +static void +heapsort (void *a, + size_t n, + size_t es, + int swaptype, + cmp_t *cmp) +#endif +{ + size_t i = n / 2; + + /* Build a max heap. */ + while(i) { + --i; +#if defined(I_AM_QSORT_R) + __bsd_siftdown_r(a, i, n, es, swaptype, thunk, cmp); +#elif defined(I_AM_GNU_QSORT_R) + siftdown_r(a, i, n, es, swaptype, cmp, thunk); +#else + siftdown(a, i, n, es, swaptype, cmp); +#endif + } + + /* Extract elements from the heap one by one. */ + for(i = n - 1; i; i--) { + swap((char *) a, (char *) a + i * es); +#if defined(I_AM_QSORT_R) + __bsd_siftdown_r(a, 0, i, es, swaptype, thunk, cmp); +#elif defined(I_AM_GNU_QSORT_R) + siftdown_r(a, 0, i, es, swaptype, cmp, thunk); +#else + siftdown(a, 0, i, es, swaptype, cmp); +#endif + } +} + /* * Classical function call recursion wastes a lot of stack space. Each * recursion level requires a full stack frame comprising all local variables @@ -189,7 +314,9 @@ qsort (void *a, int cmp_result; int swaptype, swap_cnt; size_t recursion_level = 0; - struct { void *a; size_t n; } parameter_stack[PARAMETER_STACK_LEVELS]; + size_t current_level = 0; + size_t max_level = 2 * (sizeof(size_t) * CHAR_BIT - 1 - __builtin_clzl(n)); + struct { void *a; size_t n; size_t level; } parameter_stack[PARAMETER_STACK_LEVELS]; SWAPINIT(a, es); loop: swap_cnt = 0; @@ -202,6 +329,18 @@ loop: swap_cnt = 0; goto pop; } + /* Switch to heapsort when the current level exceeds the maximum level. */ + if(++current_level > max_level) { +#if defined(I_AM_QSORT_R) + __bsd_heapsort_r(a, n, es, swaptype, thunk, cmp); +#elif defined(I_AM_GNU_QSORT_R) + heapsort_r(a, n, es, swaptype, thunk, cmp); +#else + heapsort(a, n, es, swaptype, cmp); +#endif + goto pop; + } + /* Select a pivot element, move it to the left. */ pm = (char *) a + (n / 2) * es; if (n > 7) { @@ -310,6 +449,7 @@ loop: swap_cnt = 0; */ parameter_stack[recursion_level].a = a; parameter_stack[recursion_level].n = n / es; + parameter_stack[recursion_level].level = current_level; recursion_level++; a = pa; n = r / es; @@ -318,15 +458,15 @@ loop: swap_cnt = 0; else { /* * The parameter_stack array is full. The smaller part - * is sorted using function call recursion. The larger - * part will be sorted after the function call returns. + * is sorted using heapsort. The larger part will be + * sorted after the function call returns. */ #if defined(I_AM_QSORT_R) - __bsd_qsort_r(pa, r / es, es, thunk, cmp); + __bsd_heapsort_r(pa, r / es, es, swaptype, thunk, cmp); #elif defined(I_AM_GNU_QSORT_R) - qsort_r(pa, r / es, es, cmp, thunk); + heapsort_r(pa, r / es, es, swaptype, cmp, thunk); #else - qsort(pa, r / es, es, cmp); + heapsort(pa, r / es, es, swaptype, cmp); #endif } } @@ -340,6 +480,7 @@ pop: recursion_level--; a = parameter_stack[recursion_level].a; n = parameter_stack[recursion_level].n; + current_level = parameter_stack[recursion_level].level; goto loop; } }