[2/3] powerpc64: Check cacheline size before using optimised memset routines

Message ID 20210727054751.3972562-2-anton@ozlabs.org
State Committed
Commit f2a15dd668913c5a1388ba7e1131b25162b2ea75
Delegated to: Tulio Magno Quites Machado Filho
Headers
Series [1/3] powerpc64: Replace some PPC_FEATURE_HAS_VSX with PPC_FEATURE_ARCH_2_06 |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Anton Blanchard July 27, 2021, 5:47 a.m. UTC
  A number of optimised memset routines assume the cacheline size is 128B,
so we better check before using them.
---
 .../powerpc64/multiarch/ifunc-impl-list.c      | 18 +++++++++++++-----
 sysdeps/powerpc/powerpc64/multiarch/memset.c   | 15 ++++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)
  

Comments

Tulio Magno Quites Machado Filho Aug. 6, 2021, 7:50 p.m. UTC | #1
Anton Blanchard via Libc-alpha <libc-alpha@sourceware.org> writes:

> A number of optimised memset routines assume the cacheline size is 128B,
> so we better check before using them.

LGTM.
The patch is still missing the changes for bzero(), but I understand this is
already an improvement over what we had.
I plan to modify bzero later.

Pushed as f2a15dd668913c5a1388ba7e1131b25162b2ea75.

Thanks!
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 32564c8f1f..a3fdcd43bd 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -35,6 +35,9 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   unsigned long int hwcap = GLRO(dl_hwcap);
   unsigned long int hwcap2 = GLRO(dl_hwcap2);
+#ifdef SHARED
+  int cacheline_size = GLRO(dl_cache_line_size);
+#endif
 
   /* hwcap contains only the latest supported ISA, the code checks which is
      and fills the previous supported ones.  */
@@ -90,16 +93,21 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      hwcap2 & PPC_FEATURE2_ARCH_3_1
 			      && hwcap2 & PPC_FEATURE2_HAS_ISEL
-			      && hwcap & PPC_FEATURE_HAS_VSX,
+			      && hwcap & PPC_FEATURE_HAS_VSX
+			      && cacheline_size == 128,
 			      __memset_power10)
 #endif
-	      IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07,
+	      IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07
+			      && cacheline_size == 128,
 			      __memset_power8)
-	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_06,
+	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_06
+			      && cacheline_size == 128,
 			      __memset_power7)
-	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05,
+	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05
+			      && cacheline_size == 128,
 			      __memset_power6)
-	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_POWER4,
+	      IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_POWER4
+			      && cacheline_size == 128,
 			      __memset_power4)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc))
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c
index c1aa143f60..056e911699 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memset.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c
@@ -43,16 +43,21 @@  libc_ifunc (__libc_memset,
 # ifdef __LITTLE_ENDIAN__
 	    (hwcap2 & PPC_FEATURE2_ARCH_3_1
 	     && hwcap2 & PPC_FEATURE2_HAS_ISEL
-	     && hwcap & PPC_FEATURE_HAS_VSX)
+	     && hwcap & PPC_FEATURE_HAS_VSX
+	     && GLRO(dl_cache_line_size) == 128)
 	    ? __memset_power10 :
 # endif
-            (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+            (hwcap2 & PPC_FEATURE2_ARCH_2_07
+	     && GLRO(dl_cache_line_size) == 128)
             ? __memset_power8 :
-	      (hwcap & PPC_FEATURE_ARCH_2_06)
+	      (hwcap & PPC_FEATURE_ARCH_2_06
+	       && GLRO(dl_cache_line_size) == 128)
 	      ? __memset_power7 :
-		(hwcap & PPC_FEATURE_ARCH_2_05)
+		(hwcap & PPC_FEATURE_ARCH_2_05
+	         && GLRO(dl_cache_line_size) == 128)
 		? __memset_power6 :
-		  (hwcap & PPC_FEATURE_POWER4)
+		  (hwcap & PPC_FEATURE_POWER4
+	           && GLRO(dl_cache_line_size) == 128)
 		  ? __memset_power4
             : __memset_ppc);