[5/7] aarch64: Mitigations for string functions when MTE is enabled.

Message ID 20200615144029.19771-6-rearnsha@arm.com
State Superseded
Headers
Series RFC Memory tagging support |

Commit Message

Richard Earnshaw June 15, 2020, 2:40 p.m. UTC
  This is a place-holder patch for the changes needed to the string
functions to make them safe when using memory tagging.  It is expected
that this patch will be replaced before the final series is committed.

When memory tagging is enabled, functions must not fetch data beyond a
granule boundary.  Unfortunately, this affects a number of the
optimized string operations for aarch64 which assume that provided a
page boundary is not being crossed any amount of data within the page
may be accessed.  This patch replaces the existing string functions
with variants that do not violate the granule size limitations that
now exist.

This patch has not been tuned for performance.
---
 sysdeps/aarch64/memchr.S                 | 21 ++++++++++++++++++++-
 sysdeps/aarch64/multiarch/strlen_asimd.S |  2 +-
 sysdeps/aarch64/strchr.S                 | 15 +++++++++++++++
 sysdeps/aarch64/strchrnul.S              | 14 +++++++++++++-
 sysdeps/aarch64/strcmp.S                 | 12 +++++++++---
 sysdeps/aarch64/strcpy.S                 |  2 +-
 sysdeps/aarch64/strlen.S                 |  2 +-
 sysdeps/aarch64/strncmp.S                | 10 ++++++++--
 sysdeps/aarch64/strrchr.S                | 15 ++++++++++++++-
 9 files changed, 82 insertions(+), 11 deletions(-)
  

Patch

diff --git a/sysdeps/aarch64/memchr.S b/sysdeps/aarch64/memchr.S
index 85c65cbfca..6e01a0a0a9 100644
--- a/sysdeps/aarch64/memchr.S
+++ b/sysdeps/aarch64/memchr.S
@@ -64,6 +64,25 @@ 
  */
 
 ENTRY (MEMCHR)
+#ifdef _LIBC_MTAG
+	/* Quick-and-dirty implementation for MTE.  Needs a rewrite as
+	   granules are only 16 bytes in size.  */
+	/* Do not dereference srcin if no bytes to compare.  */
+	cbz	cntin, L(zero_length)
+	and	chrin, chrin, #255
+L(next_byte):	
+	ldrb	wtmp2, [srcin], #1
+	cmp	wtmp2, chrin
+	b.eq	L(found)
+	subs	cntin, cntin, #1
+	b.ne	L(next_byte)
+L(zero_length):
+	mov	result, #0
+	ret
+L(found):
+	sub	result, srcin, #1
+	ret
+#else	
 	/* Do not dereference srcin if no bytes to compare.  */
 	cbz	cntin, L(zero_length)
 	/*
@@ -152,10 +171,10 @@  L(tail):
 	/* Select result or NULL */
 	csel	result, xzr, result, eq
 	ret
-
 L(zero_length):
 	mov	result, #0
 	ret
+#endif /* _LIBC_MTAG */
 END (MEMCHR)
 weak_alias (MEMCHR, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
index 236a2c96a6..c2c718e493 100644
--- a/sysdeps/aarch64/multiarch/strlen_asimd.S
+++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
@@ -51,7 +51,7 @@ 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 
-#ifdef TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
 # define MIN_PAGE_SIZE 16
 #else
 # define MIN_PAGE_SIZE 4096
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index 4a75e73945..32c500609e 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -63,6 +63,20 @@ 
 
 ENTRY (strchr)
 	DELOUSE (0)
+#ifdef _LIBC_MTAG
+	/* Quick and dirty implementation for MTE  */
+	and	chrin, chrin, #255
+L(next_byte):
+	ldrb	wtmp2, [srcin], #1
+	cbz	wtmp2, L(end)
+	cmp	wtmp2, chrin
+	b.ne	L(next_byte)
+	sub	result, srcin, #1
+	ret
+L(end):
+	mov	result, #0
+	ret
+#else
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
@@ -134,6 +148,7 @@  L(tail):
 	add	result, src, tmp1, lsr #1
 	csel	result, result, xzr, eq
 	ret
+#endif
 END (strchr)
 libc_hidden_builtin_def (strchr)
 weak_alias (strchr, index)
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index a65be6cba8..78a9252eb8 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -61,6 +61,18 @@ 
 
 ENTRY (__strchrnul)
 	DELOUSE (0)
+#ifdef _LIBC_MTAG
+	/* Quick and dirty implementation for MTE  */
+	and	chrin, chrin, #255
+L(next_byte):
+	ldrb	wtmp2, [srcin], #1
+	cmp	wtmp2, #0
+	ccmp	wtmp2, chrin, #4, ne	/* NZCV = 0x0100  */
+	b.ne	L(next_byte)
+	
+	sub	result, srcin, #1
+	ret
+#else
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the termination condition.  */
 	mov	wtmp2, #0x0401
@@ -126,6 +138,6 @@  L(tail):
 	/* tmp1 is twice the offset into the fragment.  */
 	add	result, src, tmp1, lsr #1
 	ret
-
+#endif /* _LIBC_MTAG */
 END(__strchrnul)
 weak_alias (__strchrnul, strchrnul)
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index d044c29e9b..d01b199ab3 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -46,6 +46,12 @@ 
 #define zeroones	x10
 #define pos		x11
 
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 16
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
 	/* Start of performance-critical section  -- one 64B cache line.  */
 ENTRY_ALIGN(strcmp, 6)
 
@@ -161,10 +167,10 @@  L(do_misaligned):
 	b.ne	L(do_misaligned)
 
 L(loop_misaligned):
-	/* Test if we are within the last dword of the end of a 4K page.  If
+	/* Test if we are within the last dword of the end of a page.  If
 	   yes then jump back to the misaligned loop to copy a byte at a time.  */
-	and	tmp1, src2, #0xff8
-	eor	tmp1, tmp1, #0xff8
+	and	tmp1, src2, #(MIN_PAGE_SIZE - 8)
+	eor	tmp1, tmp1, #(MIN_PAGE_SIZE - 8)
 	cbz	tmp1, L(do_misaligned)
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 548130e413..82548f3d53 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -87,7 +87,7 @@ 
 	   misaligned, crosses a page boundary - after that we move to aligned
 	   fetches for the remainder of the string.  */
 
-#ifdef STRCPY_TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined STRCPY_TEST_PAGE_CROSS
 	/* Make everything that isn't Qword aligned look like a page cross.  */
 #define MIN_PAGE_P2 4
 #else
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index e01fab7c2a..7455a668bb 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -57,7 +57,7 @@ 
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-#ifdef TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
 # define MIN_PAGE_SIZE 16
 #else
 # define MIN_PAGE_SIZE 4096
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index c5141fab8a..40c805f609 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -51,6 +51,12 @@ 
 #define endloop		x15
 #define count		mask
 
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 16
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
 ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
 	DELOUSE (0)
 	DELOUSE (1)
@@ -233,8 +239,8 @@  L(do_misaligned):
 	subs	limit_wd, limit_wd, #1
 	b.lo	L(done_loop)
 L(loop_misaligned):
-	and	tmp2, src2, #0xff8
-	eor	tmp2, tmp2, #0xff8
+	and	tmp2, src2, #(MIN_PAGE_SIZE - 8)
+	eor	tmp2, tmp2, #(MIN_PAGE_SIZE - 8)
 	cbz	tmp2, L(page_end_loop)
 
 	ldr	data1, [src1], #8
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index 94da08d351..ef00e969d9 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -70,6 +70,19 @@ 
 ENTRY(strrchr)
 	DELOUSE (0)
 	cbz	x1, L(null_search)
+#ifdef _LIBC_MTAG
+	/* Quick and dirty version for MTE.  */
+	and	chrin, chrin, #255
+	mov	src_match, #0
+L(next_byte):	
+	ldrb	wtmp2, [srcin]
+	cmp	wtmp2, chrin
+	csel	src_match, src_match, srcin, ne
+	add	srcin, srcin, #1
+	cbnz	wtmp2, L(next_byte)
+	mov 	result, src_match
+	ret
+#else
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the requested byte.  Magic constant 0x80200802 used
 	   similarly for NUL termination.  */
@@ -158,9 +171,9 @@  L(tail):
 	csel	result, result, xzr, ne
 
 	ret
+#endif
 L(null_search):
 	b	__strchrnul
-
 END(strrchr)
 weak_alias (strrchr, rindex)
 libc_hidden_builtin_def (strrchr)