[v2] Benchmark strstr hard needles

Message ID VI1PR0801MB2127C0E07A27296FDC38C9AB833E0@VI1PR0801MB2127.eurprd08.prod.outlook.com
State Committed
Headers

Commit Message

Wilco Dijkstra April 26, 2019, 11:57 a.m. UTC
  v2: Add another hard needle case.

Benchmark hard needles both for Two-way and the new strstr algorithm.
This shows how the worst cases are quite close and both much faster than
the quadratic basic_strstr.  Thanks to Szabolcs for providing various
hard needles.

ChangeLog:
2019-04-25  Wilco Dijkstra  <wdijkstr@arm.com>

        * benchtests/bench-strstr.c (test_hard_needle): New function.

--
  

Comments

Adhemerval Zanella May 20, 2019, 6:49 p.m. UTC | #1
On 26/04/2019 08:57, Wilco Dijkstra wrote:
> v2: Add another hard needle case.
> 
> Benchmark hard needles both for Two-way and the new strstr algorithm.
> This shows how the worst cases are quite close and both much faster than
> the quadratic basic_strstr.  Thanks to Szabolcs for providing various
> hard needles.
> 
> ChangeLog:
> 2019-04-25  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         * benchtests/bench-strstr.c (test_hard_needle): New function.


LGTM with some nits below, thanks.

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>

> 
> --
> 
> diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
> index 31309b24029a96de7381e1050fd89e5d26642e5f..f604460c60002e745a76a30835976df95f753234 100644
> --- a/benchtests/bench-strstr.c
> +++ b/benchtests/bench-strstr.c
> @@ -203,6 +203,74 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>    putchar ('\n');
>  }
>  
> +
> +char * volatile null = NULL;
> +

I am not seeing why exactly it requires a global non static volatile
variable to call do_one_test.

> +static void
> +test_hard_needle (size_t ne_len, size_t hs_len)
> +{

Maybe add some context from where these tests came from, from the
libc-alpha discussion?

> +  char *ne = (char *) buf1;
> +  char *hs = (char *) buf2;
> +
> +  /* Hard needle for strstr algorithm using skip table.  */
> +  {
> +    memset (ne, 'a', ne_len);
> +    ne[ne_len] = '\0';
> +    ne[ne_len - 14] = 'b';
> +
> +    memset (hs, 'a', hs_len);
> +    for (size_t i = ne_len; i <= hs_len; i += ne_len)
> +      {
> +	hs[i-5] = 'b';
> +	hs[i-62] = 'b';
> +      }
> +
> +    printf ("Length %4zd/%3zd, complex needle 1:", hs_len, ne_len);
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      do_one_test (impl, hs, ne, null);
> +    putchar ('\n');
> +  }
> +
> +  /* 2nd hard needle for strstr algorithm using skip table.  */
> +  {
> +    memset (ne, 'a', ne_len);
> +    ne[ne_len] = '\0';
> +    ne[ne_len - 6] = 'b';
> +
> +    memset (hs, 'a', hs_len);
> +    for (size_t i = ne_len; i <= hs_len; i += ne_len)
> +      {
> +	hs[i-5] = 'b';
> +	hs[i-6] = 'b';
> +      }
> +
> +    printf ("Length %4zd/%3zd, complex needle 2:", hs_len, ne_len);
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      do_one_test (impl, hs, ne, null);
> +    putchar ('\n');
> +  }
> +
> +  /* Hard needle for Two-way algorithm.  */
> +  {
> +    for (int i = 0; i < hs_len; i++)
> +      hs[i] = (rand () & 255) > 155 ? 'a' : 'b';
> +    hs[hs_len] = 0;
> +
> +    memset (ne, 'a', ne_len);
> +    ne[ne_len-2] = 'b';
> +    ne[0] = 'b';
> +    ne[ne_len] = 0;
> +
> +    printf ("Length %4zd/%3zd, complex needle 3:", hs_len, ne_len);
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      do_one_test (impl, hs, ne, null);
> +    putchar ('\n');
> +  }
> +}
> +
>  static int
>  test_main (void)
>  {
> @@ -227,6 +295,10 @@ test_main (void)
>  	do_test (14, 5, hlen, klen, 1);
>        }
>  
> +  test_hard_needle (64, 65536);
> +  test_hard_needle (256, 65536);
> +  test_hard_needle (1024, 65536);
> +
>    return ret;
>  }
>  
>
  

Patch

diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
index 31309b24029a96de7381e1050fd89e5d26642e5f..f604460c60002e745a76a30835976df95f753234 100644
--- a/benchtests/bench-strstr.c
+++ b/benchtests/bench-strstr.c
@@ -203,6 +203,74 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2,
   putchar ('\n');
 }
 
+
+char * volatile null = NULL;
+
+static void
+test_hard_needle (size_t ne_len, size_t hs_len)
+{
+  char *ne = (char *) buf1;
+  char *hs = (char *) buf2;
+
+  /* Hard needle for strstr algorithm using skip table.  */
+  {
+    memset (ne, 'a', ne_len);
+    ne[ne_len] = '\0';
+    ne[ne_len - 14] = 'b';
+
+    memset (hs, 'a', hs_len);
+    for (size_t i = ne_len; i <= hs_len; i += ne_len)
+      {
+	hs[i-5] = 'b';
+	hs[i-62] = 'b';
+      }
+
+    printf ("Length %4zd/%3zd, complex needle 1:", hs_len, ne_len);
+
+    FOR_EACH_IMPL (impl, 0)
+      do_one_test (impl, hs, ne, null);
+    putchar ('\n');
+  }
+
+  /* 2nd hard needle for strstr algorithm using skip table.  */
+  {
+    memset (ne, 'a', ne_len);
+    ne[ne_len] = '\0';
+    ne[ne_len - 6] = 'b';
+
+    memset (hs, 'a', hs_len);
+    for (size_t i = ne_len; i <= hs_len; i += ne_len)
+      {
+	hs[i-5] = 'b';
+	hs[i-6] = 'b';
+      }
+
+    printf ("Length %4zd/%3zd, complex needle 2:", hs_len, ne_len);
+
+    FOR_EACH_IMPL (impl, 0)
+      do_one_test (impl, hs, ne, null);
+    putchar ('\n');
+  }
+
+  /* Hard needle for Two-way algorithm.  */
+  {
+    for (int i = 0; i < hs_len; i++)
+      hs[i] = (rand () & 255) > 155 ? 'a' : 'b';
+    hs[hs_len] = 0;
+
+    memset (ne, 'a', ne_len);
+    ne[ne_len-2] = 'b';
+    ne[0] = 'b';
+    ne[ne_len] = 0;
+
+    printf ("Length %4zd/%3zd, complex needle 3:", hs_len, ne_len);
+
+    FOR_EACH_IMPL (impl, 0)
+      do_one_test (impl, hs, ne, null);
+    putchar ('\n');
+  }
+}
+
 static int
 test_main (void)
 {
@@ -227,6 +295,10 @@  test_main (void)
 	do_test (14, 5, hlen, klen, 1);
       }
 
+  test_hard_needle (64, 65536);
+  test_hard_needle (256, 65536);
+  test_hard_needle (1024, 65536);
+
   return ret;
 }