[v3,4/7] stdlib: Move insertion sort out qsort

Message ID 20210903171144.952737-5-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Use introsort for qsort |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto Sept. 3, 2021, 5:11 p.m. UTC
  ---
 stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 47 deletions(-)
  

Comments

Fangrui Song Sept. 6, 2021, 8:35 p.m. UTC | #1
On 2021-09-03, Adhemerval Zanella via Libc-alpha wrote:
>---
> stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------
> 1 file changed, 53 insertions(+), 47 deletions(-)
>
>diff --git a/stdlib/qsort.c b/stdlib/qsort.c
>index 59458d151b..b69417dedd 100644
>--- a/stdlib/qsort.c
>+++ b/stdlib/qsort.c
>@@ -150,6 +150,58 @@ typedef struct
>       smaller partition.  This *guarantees* no more than log (total_elems)
>       stack size is needed (actually O(1) in this case)!  */
>
>+static void
>+insertion_sort (void *const pbase, size_t total_elems, size_t size,
>+                swap_func_t swap_func,
>+	        __compar_d_fn_t cmp, void *arg)
>+{
>+  char *base_ptr = (char *) pbase;
>+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];
>+  char *tmp_ptr = base_ptr;
>+#define min(x, y) ((x) < (y) ? (x) : (y))
>+  const size_t max_thresh = MAX_THRESH * size;

But I think MAX_THRESH being 4 is unfortunate.
All modern architectures want a value larger than 4 :)

Reviewed-by: Fangrui Song <maskray@google.com>

>+  char *thresh = min(end_ptr, base_ptr + max_thresh);
>+  char *run_ptr;
>+
>+  /* Find smallest element in first threshold and place it at the
>+     array's beginning.  This is the smallest array element,
>+     and the operation speeds up insertion sort's inner loop. */
>+
>+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
>+    if (cmp (run_ptr, tmp_ptr, arg) < 0)
>+      tmp_ptr = run_ptr;
>+
>+  if (tmp_ptr != base_ptr)
>+    do_swap (tmp_ptr, base_ptr, size, swap_func);
>+
>+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */
>+
>+  run_ptr = base_ptr + size;
>+  while ((run_ptr += size) <= end_ptr)
>+    {
>+      tmp_ptr = run_ptr - size;
>+      while (cmp (run_ptr, tmp_ptr, arg) < 0)
>+        tmp_ptr -= size;
>+
>+      tmp_ptr += size;
>+      if (tmp_ptr != run_ptr)
>+        {
>+          char *trav;
>+
>+          trav = run_ptr + size;
>+          while (--trav >= run_ptr)
>+            {
>+              char c = *trav;
>+              char *hi, *lo;
>+
>+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
>+                *hi = *lo;
>+              *hi = c;
>+            }

The bytewise move is a bit unfortunate and may slow down the insertion sort
quite a bit... But without allocation or code duplication I don't know a
better approach...

>+        }
>+    }
>+}
>+
> void
> _quicksort (void *const pbase, size_t total_elems, size_t size,
> 	    __compar_d_fn_t cmp, void *arg)
>@@ -272,51 +324,5 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
>      for partitions below MAX_THRESH size. BASE_PTR points to the beginning
>      of the array to sort, and END_PTR points at the very last element in
>      the array (*not* one beyond it!). */
>-
>-#define min(x, y) ((x) < (y) ? (x) : (y))
>-
>-  {
>-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];
>-    char *tmp_ptr = base_ptr;
>-    char *thresh = min(end_ptr, base_ptr + max_thresh);
>-    char *run_ptr;
>-
>-    /* Find smallest element in first threshold and place it at the
>-       array's beginning.  This is the smallest array element,
>-       and the operation speeds up insertion sort's inner loop. */
>-
>-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
>-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
>-        tmp_ptr = run_ptr;
>-
>-    if (tmp_ptr != base_ptr)
>-      do_swap (tmp_ptr, base_ptr, size, swap_func);
>-
>-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
>-
>-    run_ptr = base_ptr + size;
>-    while ((run_ptr += size) <= end_ptr)
>-      {
>-	tmp_ptr = run_ptr - size;
>-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
>-	  tmp_ptr -= size;
>-
>-	tmp_ptr += size;
>-        if (tmp_ptr != run_ptr)
>-          {
>-            char *trav;
>-
>-	    trav = run_ptr + size;
>-	    while (--trav >= run_ptr)
>-              {
>-                char c = *trav;
>-                char *hi, *lo;
>-
>-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
>-                  *hi = *lo;
>-                *hi = c;
>-              }
>-          }
>-      }
>-  }
>+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);
> }
>-- 
>2.30.2
>
  
Fangrui Song Sept. 6, 2021, 8:48 p.m. UTC | #2
On 2021-09-06, Fangrui Song wrote:
>On 2021-09-03, Adhemerval Zanella via Libc-alpha wrote:
>>---
>>stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------
>>1 file changed, 53 insertions(+), 47 deletions(-)
>>
>>diff --git a/stdlib/qsort.c b/stdlib/qsort.c
>>index 59458d151b..b69417dedd 100644
>>--- a/stdlib/qsort.c
>>+++ b/stdlib/qsort.c
>>@@ -150,6 +150,58 @@ typedef struct
>>      smaller partition.  This *guarantees* no more than log (total_elems)
>>      stack size is needed (actually O(1) in this case)!  */
>>
>>+static void
>>+insertion_sort (void *const pbase, size_t total_elems, size_t size,
>>+                swap_func_t swap_func,
>>+	        __compar_d_fn_t cmp, void *arg)
>>+{
>>+  char *base_ptr = (char *) pbase;
>>+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];
>>+  char *tmp_ptr = base_ptr;
>>+#define min(x, y) ((x) < (y) ? (x) : (y))
>>+  const size_t max_thresh = MAX_THRESH * size;
>
>But I think MAX_THRESH being 4 is unfortunate.
>All modern architectures want a value larger than 4 :)
>
>Reviewed-by: Fangrui Song <maskray@google.com>
>
>>+  char *thresh = min(end_ptr, base_ptr + max_thresh);
>>+  char *run_ptr;
>>+
>>+  /* Find smallest element in first threshold and place it at the
>>+     array's beginning.  This is the smallest array element,
>>+     and the operation speeds up insertion sort's inner loop. */
>>+
>>+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
>>+    if (cmp (run_ptr, tmp_ptr, arg) < 0)
>>+      tmp_ptr = run_ptr;
>>+
>>+  if (tmp_ptr != base_ptr)
>>+    do_swap (tmp_ptr, base_ptr, size, swap_func);
>>+
>>+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */
>>+
>>+  run_ptr = base_ptr + size;
>>+  while ((run_ptr += size) <= end_ptr)
>>+    {
>>+      tmp_ptr = run_ptr - size;
>>+      while (cmp (run_ptr, tmp_ptr, arg) < 0)
>>+        tmp_ptr -= size;
>>+
>>+      tmp_ptr += size;
>>+      if (tmp_ptr != run_ptr)
>>+        {
>>+          char *trav;
>>+
>>+          trav = run_ptr + size;
>>+          while (--trav >= run_ptr)
>>+            {
>>+              char c = *trav;
>>+              char *hi, *lo;
>>+
>>+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
>>+                *hi = *lo;
>>+              *hi = c;
>>+            }
>
>The bytewise move is a bit unfortunate and may slow down the insertion sort
>quite a bit... But without allocation or code duplication I don't know a
>better approach...

If we want to optimize insertion sort for the common case,
perhaps also optimize the cases when the element size is <= SWAP_GENERIC_SIZE.

Use an   unsigned char tmp[SWAP_GENERIC_SIZE];
as you do in another patch.

There will be a bit code bloat, though...

>
>>+        }
>>+    }
>>+}
>>+
>>void
>>_quicksort (void *const pbase, size_t total_elems, size_t size,
>>	    __compar_d_fn_t cmp, void *arg)
>>@@ -272,51 +324,5 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
>>     for partitions below MAX_THRESH size. BASE_PTR points to the beginning
>>     of the array to sort, and END_PTR points at the very last element in
>>     the array (*not* one beyond it!). */
>>-
>>-#define min(x, y) ((x) < (y) ? (x) : (y))
>>-
>>-  {
>>-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];
>>-    char *tmp_ptr = base_ptr;
>>-    char *thresh = min(end_ptr, base_ptr + max_thresh);
>>-    char *run_ptr;
>>-
>>-    /* Find smallest element in first threshold and place it at the
>>-       array's beginning.  This is the smallest array element,
>>-       and the operation speeds up insertion sort's inner loop. */
>>-
>>-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
>>-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
>>-        tmp_ptr = run_ptr;
>>-
>>-    if (tmp_ptr != base_ptr)
>>-      do_swap (tmp_ptr, base_ptr, size, swap_func);
>>-
>>-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
>>-
>>-    run_ptr = base_ptr + size;
>>-    while ((run_ptr += size) <= end_ptr)
>>-      {
>>-	tmp_ptr = run_ptr - size;
>>-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
>>-	  tmp_ptr -= size;
>>-
>>-	tmp_ptr += size;
>>-        if (tmp_ptr != run_ptr)
>>-          {
>>-            char *trav;
>>-
>>-	    trav = run_ptr + size;
>>-	    while (--trav >= run_ptr)
>>-              {
>>-                char c = *trav;
>>-                char *hi, *lo;
>>-
>>-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
>>-                  *hi = *lo;
>>-                *hi = c;
>>-              }
>>-          }
>>-      }
>>-  }
>>+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);
>>}
>>-- 
>>2.30.2
>>
  

Patch

diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 59458d151b..b69417dedd 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -150,6 +150,58 @@  typedef struct
       smaller partition.  This *guarantees* no more than log (total_elems)
       stack size is needed (actually O(1) in this case)!  */
 
+static void
+insertion_sort (void *const pbase, size_t total_elems, size_t size,
+                swap_func_t swap_func,
+	        __compar_d_fn_t cmp, void *arg)
+{
+  char *base_ptr = (char *) pbase;
+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+  char *tmp_ptr = base_ptr;
+#define min(x, y) ((x) < (y) ? (x) : (y))
+  const size_t max_thresh = MAX_THRESH * size;
+  char *thresh = min(end_ptr, base_ptr + max_thresh);
+  char *run_ptr;
+
+  /* Find smallest element in first threshold and place it at the
+     array's beginning.  This is the smallest array element,
+     and the operation speeds up insertion sort's inner loop. */
+
+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+    if (cmp (run_ptr, tmp_ptr, arg) < 0)
+      tmp_ptr = run_ptr;
+
+  if (tmp_ptr != base_ptr)
+    do_swap (tmp_ptr, base_ptr, size, swap_func);
+
+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */
+
+  run_ptr = base_ptr + size;
+  while ((run_ptr += size) <= end_ptr)
+    {
+      tmp_ptr = run_ptr - size;
+      while (cmp (run_ptr, tmp_ptr, arg) < 0)
+        tmp_ptr -= size;
+
+      tmp_ptr += size;
+      if (tmp_ptr != run_ptr)
+        {
+          char *trav;
+
+          trav = run_ptr + size;
+          while (--trav >= run_ptr)
+            {
+              char c = *trav;
+              char *hi, *lo;
+
+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+                *hi = *lo;
+              *hi = c;
+            }
+        }
+    }
+}
+
 void
 _quicksort (void *const pbase, size_t total_elems, size_t size,
 	    __compar_d_fn_t cmp, void *arg)
@@ -272,51 +324,5 @@  _quicksort (void *const pbase, size_t total_elems, size_t size,
      for partitions below MAX_THRESH size. BASE_PTR points to the beginning
      of the array to sort, and END_PTR points at the very last element in
      the array (*not* one beyond it!). */
-
-#define min(x, y) ((x) < (y) ? (x) : (y))
-
-  {
-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];
-    char *tmp_ptr = base_ptr;
-    char *thresh = min(end_ptr, base_ptr + max_thresh);
-    char *run_ptr;
-
-    /* Find smallest element in first threshold and place it at the
-       array's beginning.  This is the smallest array element,
-       and the operation speeds up insertion sort's inner loop. */
-
-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
-        tmp_ptr = run_ptr;
-
-    if (tmp_ptr != base_ptr)
-      do_swap (tmp_ptr, base_ptr, size, swap_func);
-
-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
-
-    run_ptr = base_ptr + size;
-    while ((run_ptr += size) <= end_ptr)
-      {
-	tmp_ptr = run_ptr - size;
-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
-	  tmp_ptr -= size;
-
-	tmp_ptr += size;
-        if (tmp_ptr != run_ptr)
-          {
-            char *trav;
-
-	    trav = run_ptr + size;
-	    while (--trav >= run_ptr)
-              {
-                char c = *trav;
-                char *hi, *lo;
-
-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
-                  *hi = *lo;
-                *hi = c;
-              }
-          }
-      }
-  }
+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);
 }