@@ -64,6 +64,25 @@
*/
ENTRY (MEMCHR)
+#ifdef _LIBC_MTAG
+ /* Quick-and-dirty implementation for MTE. Needs a rewrite as
+ granules are only 16 bytes in size. */
+ /* Do not dereference srcin if no bytes to compare. */
+ cbz cntin, L(zero_length)
+ and chrin, chrin, #255
+L(next_byte):
+ ldrb wtmp2, [srcin], #1
+ cmp wtmp2, chrin
+ b.eq L(found)
+ subs cntin, cntin, #1
+ b.ne L(next_byte)
+L(zero_length):
+ mov result, #0
+ ret
+L(found):
+ sub result, srcin, #1
+ ret
+#else
/* Do not dereference srcin if no bytes to compare. */
cbz cntin, L(zero_length)
/*
@@ -152,10 +171,10 @@ L(tail):
/* Select result or NULL */
csel result, xzr, result, eq
ret
-
L(zero_length):
mov result, #0
ret
+#endif /* _LIBC_MTAG */
END (MEMCHR)
weak_alias (MEMCHR, memchr)
libc_hidden_builtin_def (memchr)
@@ -51,7 +51,7 @@
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#ifdef TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
# define MIN_PAGE_SIZE 16
#else
# define MIN_PAGE_SIZE 4096
@@ -63,6 +63,20 @@
ENTRY (strchr)
DELOUSE (0)
+#ifdef _LIBC_MTAG
+ /* Quick and dirty implementation for MTE */
+ and chrin, chrin, #255
+L(next_byte):
+ ldrb wtmp2, [srcin], #1
+ cbz wtmp2, L(end)
+ cmp wtmp2, chrin
+ b.ne L(next_byte)
+ sub result, srcin, #1
+ ret
+L(end):
+ mov result, #0
+ ret
+#else
mov wtmp2, #0x0401
movk wtmp2, #0x4010, lsl #16
dup vrepchr.16b, chrin
@@ -134,6 +148,7 @@ L(tail):
add result, src, tmp1, lsr #1
csel result, result, xzr, eq
ret
+#endif
END (strchr)
libc_hidden_builtin_def (strchr)
weak_alias (strchr, index)
@@ -61,6 +61,18 @@
ENTRY (__strchrnul)
DELOUSE (0)
+#ifdef _LIBC_MTAG
+ /* Quick and dirty implementation for MTE */
+ and chrin, chrin, #255
+L(next_byte):
+ ldrb wtmp2, [srcin], #1
+ cmp wtmp2, #0
+ ccmp wtmp2, chrin, #4, ne /* NZCV = 0x0100 */
+ b.ne L(next_byte)
+
+ sub result, srcin, #1
+ ret
+#else
/* Magic constant 0x40100401 to allow us to identify which lane
matches the termination condition. */
mov wtmp2, #0x0401
@@ -126,6 +138,6 @@ L(tail):
/* tmp1 is twice the offset into the fragment. */
add result, src, tmp1, lsr #1
ret
-
+#endif /* _LIBC_MTAG */
END(__strchrnul)
weak_alias (__strchrnul, strchrnul)
@@ -46,6 +46,12 @@
#define zeroones x10
#define pos x11
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 16
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
/* Start of performance-critical section -- one 64B cache line. */
ENTRY_ALIGN(strcmp, 6)
@@ -161,10 +167,10 @@ L(do_misaligned):
b.ne L(do_misaligned)
L(loop_misaligned):
- /* Test if we are within the last dword of the end of a 4K page. If
+ /* Test if we are within the last dword of the end of a page. If
yes then jump back to the misaligned loop to copy a byte at a time. */
- and tmp1, src2, #0xff8
- eor tmp1, tmp1, #0xff8
+ and tmp1, src2, #(MIN_PAGE_SIZE - 8)
+ eor tmp1, tmp1, #(MIN_PAGE_SIZE - 8)
cbz tmp1, L(do_misaligned)
ldr data1, [src1], #8
ldr data2, [src2], #8
@@ -87,7 +87,7 @@
misaligned, crosses a page boundary - after that we move to aligned
fetches for the remainder of the string. */
-#ifdef STRCPY_TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined STRCPY_TEST_PAGE_CROSS
/* Make everything that isn't Qword aligned look like a page cross. */
#define MIN_PAGE_P2 4
#else
@@ -57,7 +57,7 @@
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
-#ifdef TEST_PAGE_CROSS
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
# define MIN_PAGE_SIZE 16
#else
# define MIN_PAGE_SIZE 4096
@@ -51,6 +51,12 @@
#define endloop x15
#define count mask
+#if defined _LIBC_MTAG || defined TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 16
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
DELOUSE (0)
DELOUSE (1)
@@ -233,8 +239,8 @@ L(do_misaligned):
subs limit_wd, limit_wd, #1
b.lo L(done_loop)
L(loop_misaligned):
- and tmp2, src2, #0xff8
- eor tmp2, tmp2, #0xff8
+ and tmp2, src2, #(MIN_PAGE_SIZE - 8)
+ eor tmp2, tmp2, #(MIN_PAGE_SIZE - 8)
cbz tmp2, L(page_end_loop)
ldr data1, [src1], #8
@@ -70,6 +70,19 @@
ENTRY(strrchr)
DELOUSE (0)
cbz x1, L(null_search)
+#ifdef _LIBC_MTAG
+ /* Quick and dirty version for MTE. */
+ and chrin, chrin, #255
+ mov src_match, #0
+L(next_byte):
+ ldrb wtmp2, [srcin]
+ cmp wtmp2, chrin
+ csel src_match, src_match, srcin, ne
+ add srcin, srcin, #1
+ cbnz wtmp2, L(next_byte)
+ mov result, src_match
+ ret
+#else
/* Magic constant 0x40100401 to allow us to identify which lane
matches the requested byte. Magic constant 0x80200802 used
similarly for NUL termination. */
@@ -158,9 +171,9 @@ L(tail):
csel result, result, xzr, ne
ret
+#endif
L(null_search):
b __strchrnul
-
END(strrchr)
weak_alias (strrchr, rindex)
libc_hidden_builtin_def (strrchr)