Patchwork [07/24] i386: Add _CET_ENDBR to indirect jump targets in add_n.S/sub_n.S

login
register
mail settings
Submitter H.J. Lu
Date June 13, 2018, 3:31 p.m.
Message ID <20180613153207.57232-8-hjl.tools@gmail.com>
Download mbox | patch
Permalink /patch/27788/
State New
Headers show

Comments

H.J. Lu - June 13, 2018, 3:31 p.m.
i386 add_n.S and sub_n.S use a trick to implment jump tables with LEA.
We can't use conditional branches nor normal jump tables since jump
table entries use EFLAGS set by jump table index.  This patch adds
_CET_ENDBR to indirect jump targets and adjust destination for
_CET_ENDBR.

	* sysdeps/i386/add_n.S: Include <sysdep.h>, instead of
	"sysdep.h".
	(__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
	_CET_ENDBR to indirect jump targets and adjust jump destination
	for _CET_ENDBR.
	* sysdeps/i386/i686/add_n.S: Include <sysdep.h>, instead of
	"sysdep.h".
	(__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
	_CET_ENDBR to indirect jump targets and adjust jump destination
	for _CET_ENDBR.
	* sysdeps/i386/sub_n.S: Include <sysdep.h>, instead of
	"sysdep.h".
	(__mpn_sub_n): Save and restore %ebx if IBT is enabed.  Add
	_CET_ENDBR to indirect jump targets and adjust jump destination
	for _CET_ENDBR.
---
 sysdeps/i386/add_n.S      | 27 ++++++++++++++++++++++++++-
 sysdeps/i386/i686/add_n.S | 27 ++++++++++++++++++++++++++-
 sysdeps/i386/sub_n.S      | 26 +++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 3 deletions(-)
Carlos O'Donell - July 6, 2018, 2:50 p.m.
On 06/13/2018 11:31 AM, H.J. Lu wrote:
> i386 add_n.S and sub_n.S use a trick to implment jump tables with LEA.
> We can't use conditional branches nor normal jump tables since jump
> table entries use EFLAGS set by jump table index.  This patch adds
> _CET_ENDBR to indirect jump targets and adjust destination for
> _CET_ENDBR.
> 
> 	* sysdeps/i386/add_n.S: Include <sysdep.h>, instead of
> 	"sysdep.h".
> 	(__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
> 	_CET_ENDBR to indirect jump targets and adjust jump destination
> 	for _CET_ENDBR.
> 	* sysdeps/i386/i686/add_n.S: Include <sysdep.h>, instead of
> 	"sysdep.h".
> 	(__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
> 	_CET_ENDBR to indirect jump targets and adjust jump destination
> 	for _CET_ENDBR.
> 	* sysdeps/i386/sub_n.S: Include <sysdep.h>, instead of
> 	"sysdep.h".
> 	(__mpn_sub_n): Save and restore %ebx if IBT is enabed.  Add
> 	_CET_ENDBR to indirect jump targets and adjust jump destination
> 	for _CET_ENDBR.

Please commit.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

> ---
>  sysdeps/i386/add_n.S      | 27 ++++++++++++++++++++++++++-
>  sysdeps/i386/i686/add_n.S | 27 ++++++++++++++++++++++++++-
>  sysdeps/i386/sub_n.S      | 26 +++++++++++++++++++++++++-
>  3 files changed, 77 insertions(+), 3 deletions(-)
> 
> diff --git a/sysdeps/i386/add_n.S b/sysdeps/i386/add_n.S
> index d6a35237b0..d3805ba213 100644
> --- a/sysdeps/i386/add_n.S
> +++ b/sysdeps/i386/add_n.S
> @@ -17,7 +17,7 @@
>     along with the GNU MP Library; see the file COPYING.LIB.  If not,
>     see <http://www.gnu.org/licenses/>.  */
>  
> -#include "sysdep.h"
> +#include <sysdep.h>

OK.

>  #include "asm-syntax.h"
>  
>  #define PARMS	4+8	/* space for 2 saved regs */
> @@ -40,6 +40,13 @@ ENTRY (__mpn_add_n)
>  	cfi_rel_offset (esi, 0)
>  	movl S2(%esp),%edx
>  	movl SIZE(%esp),%ecx
> +
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	pushl	%ebx
> +	cfi_adjust_cfa_offset (4)
> +	cfi_rel_offset (ebx, 0)
> +#endif

OK.

> +
>  	movl	%ecx,%eax
>  	shrl	$3,%ecx			/* compute count for unrolled loop */
>  	negl	%eax
> @@ -51,6 +58,9 @@ ENTRY (__mpn_add_n)
>  	subl	%eax,%esi		/* ... by a constant when we ... */
>  	subl	%eax,%edx		/* ... enter the loop */
>  	shrl	$2,%eax			/* restore previous value */
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
> +#endif

OK.

>  #ifdef PIC
>  /* Calculate start address in loop for PIC.  Due to limitations in some
>     assemblers, Loop-L0-3 cannot be put into the leal */
> @@ -64,30 +74,40 @@ L(0):	leal	(%eax,%eax,8),%eax
>  #else
>  /* Calculate start address in loop for non-PIC.  */
>   	leal	(L(oop) - 3)(%eax,%eax,8),%eax
> +#endif
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	addl	%ebx,%eax		/* Adjust for endbr32 */
>  #endif
>  	jmp	*%eax			/* jump into loop */
>  	ALIGN (3)
>  L(oop):	movl	(%esi),%eax
>  	adcl	(%edx),%eax
>  	movl	%eax,(%edi)
> +	_CET_ENDBR
>  	movl	4(%esi),%eax
>  	adcl	4(%edx),%eax
>  	movl	%eax,4(%edi)
> +	_CET_ENDBR
>  	movl	8(%esi),%eax
>  	adcl	8(%edx),%eax
>  	movl	%eax,8(%edi)
> +	_CET_ENDBR
>  	movl	12(%esi),%eax
>  	adcl	12(%edx),%eax
>  	movl	%eax,12(%edi)
> +	_CET_ENDBR
>  	movl	16(%esi),%eax
>  	adcl	16(%edx),%eax
>  	movl	%eax,16(%edi)
> +	_CET_ENDBR
>  	movl	20(%esi),%eax
>  	adcl	20(%edx),%eax
>  	movl	%eax,20(%edi)
> +	_CET_ENDBR
>  	movl	24(%esi),%eax
>  	adcl	24(%edx),%eax
>  	movl	%eax,24(%edi)
> +	_CET_ENDBR
>  	movl	28(%esi),%eax
>  	adcl	28(%edx),%eax
>  	movl	%eax,28(%edi)

OK.

> @@ -100,6 +120,11 @@ L(oop):	movl	(%esi),%eax
>  	sbbl	%eax,%eax
>  	negl	%eax
>  
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	popl	%ebx
> +	cfi_adjust_cfa_offset (-4)
> +	cfi_restore (ebx)
> +#endif

OK.

>  	popl %esi
>  	cfi_adjust_cfa_offset (-4)
>  	cfi_restore (esi)
> diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S
> index 5c6c1448d6..d2d532cd30 100644
> --- a/sysdeps/i386/i686/add_n.S
> +++ b/sysdeps/i386/i686/add_n.S
> @@ -17,7 +17,7 @@
>     along with the GNU MP Library; see the file COPYING.LIB.  If not,
>     see <http://www.gnu.org/licenses/>.  */
>  
> -#include "sysdep.h"
> +#include <sysdep.h>
>  #include "asm-syntax.h"
>  
>  #define PARMS	4+8		/* space for 2 saved regs */
> @@ -44,6 +44,13 @@ ENTRY (__mpn_add_n)
>  	cfi_rel_offset (esi, 0)
>  	movl	S2(%esp),%edx
>  	movl	SIZE(%esp),%ecx
> +
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	pushl	%ebx
> +	cfi_adjust_cfa_offset (4)
> +	cfi_rel_offset (ebx, 0)
> +#endif
> +
>  	movl	%ecx,%eax
>  	shrl	$3,%ecx			/* compute count for unrolled loop */
>  	negl	%eax
> @@ -55,6 +62,9 @@ ENTRY (__mpn_add_n)
>  	subl	%eax,%esi		/* ... by a constant when we ... */
>  	subl	%eax,%edx		/* ... enter the loop */
>  	shrl	$2,%eax			/* restore previous value */
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
> +#endif
>  #ifdef PIC
>  /* Calculate start address in loop for PIC.  */
>  	leal	(L(oop)-L(0)-3)(%eax,%eax,8),%eax
> @@ -63,30 +73,40 @@ L(0):
>  #else
>  /* Calculate start address in loop for non-PIC.  */
>   	leal	(L(oop) - 3)(%eax,%eax,8),%eax
> +#endif
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	addl	%ebx,%eax		/* Adjust for endbr32 */
>  #endif
>  	jmp	*%eax			/* jump into loop */
>  	ALIGN (3)
>  L(oop):	movl	(%esi),%eax
>  	adcl	(%edx),%eax
>  	movl	%eax,(%edi)
> +	_CET_ENDBR
>  	movl	4(%esi),%eax
>  	adcl	4(%edx),%eax
>  	movl	%eax,4(%edi)
> +	_CET_ENDBR
>  	movl	8(%esi),%eax
>  	adcl	8(%edx),%eax
>  	movl	%eax,8(%edi)
> +	_CET_ENDBR
>  	movl	12(%esi),%eax
>  	adcl	12(%edx),%eax
>  	movl	%eax,12(%edi)
> +	_CET_ENDBR
>  	movl	16(%esi),%eax
>  	adcl	16(%edx),%eax
>  	movl	%eax,16(%edi)
> +	_CET_ENDBR
>  	movl	20(%esi),%eax
>  	adcl	20(%edx),%eax
>  	movl	%eax,20(%edi)
> +	_CET_ENDBR
>  	movl	24(%esi),%eax
>  	adcl	24(%edx),%eax
>  	movl	%eax,24(%edi)
> +	_CET_ENDBR
>  	movl	28(%esi),%eax
>  	adcl	28(%edx),%eax
>  	movl	%eax,28(%edi)
> @@ -99,6 +119,11 @@ L(oop):	movl	(%esi),%eax
>  	sbbl	%eax,%eax
>  	negl	%eax
>  
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	popl	%ebx
> +	cfi_adjust_cfa_offset (-4)
> +	cfi_restore (ebx)
> +#endif
>  	popl %esi
>  	cfi_adjust_cfa_offset (-4)
>  	cfi_restore (esi)
> diff --git a/sysdeps/i386/sub_n.S b/sysdeps/i386/sub_n.S
> index 7c6f48c32e..8f321b8c6c 100644
> --- a/sysdeps/i386/sub_n.S
> +++ b/sysdeps/i386/sub_n.S
> @@ -17,7 +17,7 @@
>     along with the GNU MP Library; see the file COPYING.LIB.  If not,
>     see <http://www.gnu.org/licenses/>.  */
>  
> -#include "sysdep.h"
> +#include <sysdep.h>
>  #include "asm-syntax.h"
>  
>  #define PARMS	4+8		/* space for 2 saved regs */
> @@ -40,6 +40,13 @@ ENTRY (__mpn_sub_n)
>  	cfi_rel_offset (esi, 0)
>  	movl	S2(%esp),%edx
>  	movl	SIZE(%esp),%ecx
> +
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	pushl	%ebx
> +	cfi_adjust_cfa_offset (4)
> +	cfi_rel_offset (ebx, 0)
> +#endif
> +
>  	movl	%ecx,%eax
>  	shrl	$3,%ecx			/* compute count for unrolled loop */
>  	negl	%eax
> @@ -51,6 +58,9 @@ ENTRY (__mpn_sub_n)
>  	subl	%eax,%esi		/* ... by a constant when we ... */
>  	subl	%eax,%edx		/* ... enter the loop */
>  	shrl	$2,%eax			/* restore previous value */
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
> +#endif
>  #ifdef PIC
>  /* Calculate start address in loop for PIC.  Due to limitations in some
>     assemblers, Loop-L0-3 cannot be put into the leal */
> @@ -64,30 +74,39 @@ L(0):	leal	(%eax,%eax,8),%eax
>  #else
>  /* Calculate start address in loop for non-PIC.  */
>   	leal	(L(oop) - 3)(%eax,%eax,8),%eax
> +#endif
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	addl	%ebx,%eax		/* Adjust for endbr32 */
>  #endif
>  	jmp	*%eax			/* jump into loop */
>  	ALIGN (3)
>  L(oop):	movl	(%esi),%eax
>  	sbbl	(%edx),%eax
>  	movl	%eax,(%edi)
> +	_CET_ENDBR
>  	movl	4(%esi),%eax
>  	sbbl	4(%edx),%eax
>  	movl	%eax,4(%edi)
> +	_CET_ENDBR
>  	movl	8(%esi),%eax
>  	sbbl	8(%edx),%eax
>  	movl	%eax,8(%edi)
>  	movl	12(%esi),%eax
>  	sbbl	12(%edx),%eax
>  	movl	%eax,12(%edi)
> +	_CET_ENDBR
>  	movl	16(%esi),%eax
>  	sbbl	16(%edx),%eax
>  	movl	%eax,16(%edi)
> +	_CET_ENDBR
>  	movl	20(%esi),%eax
>  	sbbl	20(%edx),%eax
>  	movl	%eax,20(%edi)
> +	_CET_ENDBR
>  	movl	24(%esi),%eax
>  	sbbl	24(%edx),%eax
>  	movl	%eax,24(%edi)
> +	_CET_ENDBR
>  	movl	28(%esi),%eax
>  	sbbl	28(%edx),%eax
>  	movl	%eax,28(%edi)
> @@ -100,6 +119,11 @@ L(oop):	movl	(%esi),%eax
>  	sbbl	%eax,%eax
>  	negl	%eax
>  
> +#if defined __CET__ && (__CET__ & 1) != 0
> +	popl	%ebx
> +	cfi_adjust_cfa_offset (-4)
> +	cfi_restore (ebx)
> +#endif
>  	popl %esi
>  	cfi_adjust_cfa_offset (-4)
>  	cfi_restore (esi)
> 

OK, repeated mechanical changes.

Patch

diff --git a/sysdeps/i386/add_n.S b/sysdeps/i386/add_n.S
index d6a35237b0..d3805ba213 100644
--- a/sysdeps/i386/add_n.S
+++ b/sysdeps/i386/add_n.S
@@ -17,7 +17,7 @@ 
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS	4+8	/* space for 2 saved regs */
@@ -40,6 +40,13 @@  ENTRY (__mpn_add_n)
 	cfi_rel_offset (esi, 0)
 	movl S2(%esp),%edx
 	movl SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+#endif
+
 	movl	%ecx,%eax
 	shrl	$3,%ecx			/* compute count for unrolled loop */
 	negl	%eax
@@ -51,6 +58,9 @@  ENTRY (__mpn_add_n)
 	subl	%eax,%esi		/* ... by a constant when we ... */
 	subl	%eax,%edx		/* ... enter the loop */
 	shrl	$2,%eax			/* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
@@ -64,30 +74,40 @@  L(0):	leal	(%eax,%eax,8),%eax
 #else
 /* Calculate start address in loop for non-PIC.  */
  	leal	(L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+	addl	%ebx,%eax		/* Adjust for endbr32 */
 #endif
 	jmp	*%eax			/* jump into loop */
 	ALIGN (3)
 L(oop):	movl	(%esi),%eax
 	adcl	(%edx),%eax
 	movl	%eax,(%edi)
+	_CET_ENDBR
 	movl	4(%esi),%eax
 	adcl	4(%edx),%eax
 	movl	%eax,4(%edi)
+	_CET_ENDBR
 	movl	8(%esi),%eax
 	adcl	8(%edx),%eax
 	movl	%eax,8(%edi)
+	_CET_ENDBR
 	movl	12(%esi),%eax
 	adcl	12(%edx),%eax
 	movl	%eax,12(%edi)
+	_CET_ENDBR
 	movl	16(%esi),%eax
 	adcl	16(%edx),%eax
 	movl	%eax,16(%edi)
+	_CET_ENDBR
 	movl	20(%esi),%eax
 	adcl	20(%edx),%eax
 	movl	%eax,20(%edi)
+	_CET_ENDBR
 	movl	24(%esi),%eax
 	adcl	24(%edx),%eax
 	movl	%eax,24(%edi)
+	_CET_ENDBR
 	movl	28(%esi),%eax
 	adcl	28(%edx),%eax
 	movl	%eax,28(%edi)
@@ -100,6 +120,11 @@  L(oop):	movl	(%esi),%eax
 	sbbl	%eax,%eax
 	negl	%eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+#endif
 	popl %esi
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (esi)
diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S
index 5c6c1448d6..d2d532cd30 100644
--- a/sysdeps/i386/i686/add_n.S
+++ b/sysdeps/i386/i686/add_n.S
@@ -17,7 +17,7 @@ 
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS	4+8		/* space for 2 saved regs */
@@ -44,6 +44,13 @@  ENTRY (__mpn_add_n)
 	cfi_rel_offset (esi, 0)
 	movl	S2(%esp),%edx
 	movl	SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+#endif
+
 	movl	%ecx,%eax
 	shrl	$3,%ecx			/* compute count for unrolled loop */
 	negl	%eax
@@ -55,6 +62,9 @@  ENTRY (__mpn_add_n)
 	subl	%eax,%esi		/* ... by a constant when we ... */
 	subl	%eax,%edx		/* ... enter the loop */
 	shrl	$2,%eax			/* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  */
 	leal	(L(oop)-L(0)-3)(%eax,%eax,8),%eax
@@ -63,30 +73,40 @@  L(0):
 #else
 /* Calculate start address in loop for non-PIC.  */
  	leal	(L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+	addl	%ebx,%eax		/* Adjust for endbr32 */
 #endif
 	jmp	*%eax			/* jump into loop */
 	ALIGN (3)
 L(oop):	movl	(%esi),%eax
 	adcl	(%edx),%eax
 	movl	%eax,(%edi)
+	_CET_ENDBR
 	movl	4(%esi),%eax
 	adcl	4(%edx),%eax
 	movl	%eax,4(%edi)
+	_CET_ENDBR
 	movl	8(%esi),%eax
 	adcl	8(%edx),%eax
 	movl	%eax,8(%edi)
+	_CET_ENDBR
 	movl	12(%esi),%eax
 	adcl	12(%edx),%eax
 	movl	%eax,12(%edi)
+	_CET_ENDBR
 	movl	16(%esi),%eax
 	adcl	16(%edx),%eax
 	movl	%eax,16(%edi)
+	_CET_ENDBR
 	movl	20(%esi),%eax
 	adcl	20(%edx),%eax
 	movl	%eax,20(%edi)
+	_CET_ENDBR
 	movl	24(%esi),%eax
 	adcl	24(%edx),%eax
 	movl	%eax,24(%edi)
+	_CET_ENDBR
 	movl	28(%esi),%eax
 	adcl	28(%edx),%eax
 	movl	%eax,28(%edi)
@@ -99,6 +119,11 @@  L(oop):	movl	(%esi),%eax
 	sbbl	%eax,%eax
 	negl	%eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+#endif
 	popl %esi
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (esi)
diff --git a/sysdeps/i386/sub_n.S b/sysdeps/i386/sub_n.S
index 7c6f48c32e..8f321b8c6c 100644
--- a/sysdeps/i386/sub_n.S
+++ b/sysdeps/i386/sub_n.S
@@ -17,7 +17,7 @@ 
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS	4+8		/* space for 2 saved regs */
@@ -40,6 +40,13 @@  ENTRY (__mpn_sub_n)
 	cfi_rel_offset (esi, 0)
 	movl	S2(%esp),%edx
 	movl	SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+#endif
+
 	movl	%ecx,%eax
 	shrl	$3,%ecx			/* compute count for unrolled loop */
 	negl	%eax
@@ -51,6 +58,9 @@  ENTRY (__mpn_sub_n)
 	subl	%eax,%esi		/* ... by a constant when we ... */
 	subl	%eax,%edx		/* ... enter the loop */
 	shrl	$2,%eax			/* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
@@ -64,30 +74,39 @@  L(0):	leal	(%eax,%eax,8),%eax
 #else
 /* Calculate start address in loop for non-PIC.  */
  	leal	(L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+	addl	%ebx,%eax		/* Adjust for endbr32 */
 #endif
 	jmp	*%eax			/* jump into loop */
 	ALIGN (3)
 L(oop):	movl	(%esi),%eax
 	sbbl	(%edx),%eax
 	movl	%eax,(%edi)
+	_CET_ENDBR
 	movl	4(%esi),%eax
 	sbbl	4(%edx),%eax
 	movl	%eax,4(%edi)
+	_CET_ENDBR
 	movl	8(%esi),%eax
 	sbbl	8(%edx),%eax
 	movl	%eax,8(%edi)
 	movl	12(%esi),%eax
 	sbbl	12(%edx),%eax
 	movl	%eax,12(%edi)
+	_CET_ENDBR
 	movl	16(%esi),%eax
 	sbbl	16(%edx),%eax
 	movl	%eax,16(%edi)
+	_CET_ENDBR
 	movl	20(%esi),%eax
 	sbbl	20(%edx),%eax
 	movl	%eax,20(%edi)
+	_CET_ENDBR
 	movl	24(%esi),%eax
 	sbbl	24(%edx),%eax
 	movl	%eax,24(%edi)
+	_CET_ENDBR
 	movl	28(%esi),%eax
 	sbbl	28(%edx),%eax
 	movl	%eax,28(%edi)
@@ -100,6 +119,11 @@  L(oop):	movl	(%esi),%eax
 	sbbl	%eax,%eax
 	negl	%eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+#endif
 	popl %esi
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (esi)