[v4,2/8] x86: Add COND_VZEROUPPER that can replace vzeroupper if no `ret`
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
The RTM vzeroupper mitigation has no way of replacing inline
vzeroupper not before a return.
This can be useful when hoisting a vzeroupper to save code size
for example:
```
L(foo):
cmpl %eax, %edx
jz L(bar)
tzcntl %eax, %eax
addq %rdi, %rax
VZEROUPPER_RETURN
L(bar):
xorl %eax, %eax
VZEROUPPER_RETURN
```
Can become:
```
L(foo):
COND_VZEROUPPER
cmpl %eax, %edx
jz L(bar)
tzcntl %eax, %eax
addq %rdi, %rax
ret
L(bar):
xorl %eax, %eax
ret
```
This code does not change any existing functionality.
There is no difference in the objdump of libc.so before and after this
patch.
---
sysdeps/x86_64/multiarch/avx-rtm-vecs.h | 1 +
sysdeps/x86_64/sysdep.h | 18 ++++++++++++++++++
2 files changed, 19 insertions(+)
Comments
On Mon, Jun 6, 2022 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The RTM vzeroupper mitigation has no way of replacing inline
> vzeroupper not before a return.
>
> This can be useful when hoisting a vzeroupper to save code size
> for example:
>
> ```
> L(foo):
> cmpl %eax, %edx
> jz L(bar)
> tzcntl %eax, %eax
> addq %rdi, %rax
> VZEROUPPER_RETURN
>
> L(bar):
> xorl %eax, %eax
> VZEROUPPER_RETURN
> ```
>
> Can become:
>
> ```
> L(foo):
> COND_VZEROUPPER
> cmpl %eax, %edx
> jz L(bar)
> tzcntl %eax, %eax
> addq %rdi, %rax
> ret
>
> L(bar):
> xorl %eax, %eax
> ret
> ```
>
> This code does not change any existing functionality.
>
> There is no difference in the objdump of libc.so before and after this
> patch.
> ---
> sysdeps/x86_64/multiarch/avx-rtm-vecs.h | 1 +
> sysdeps/x86_64/sysdep.h | 18 ++++++++++++++++++
> 2 files changed, 19 insertions(+)
>
> diff --git a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> index 3f531dd47f..6ca9f5e6ba 100644
> --- a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> +++ b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> @@ -20,6 +20,7 @@
> #ifndef _AVX_RTM_VECS_H
> #define _AVX_RTM_VECS_H 1
>
> +#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
> #define ZERO_UPPER_VEC_REGISTERS_RETURN \
> ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
>
> diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
> index f14d50786d..4f512d5566 100644
> --- a/sysdeps/x86_64/sysdep.h
> +++ b/sysdeps/x86_64/sysdep.h
> @@ -106,6 +106,24 @@ lose: \
> vzeroupper; \
> ret
>
> +/* Can be used to replace vzeroupper that is not directly before a
> + return. This is useful when hoisting a vzeroupper from multiple
> + return paths to decrease the total number of vzerouppers and code
> + size. */
> +#define COND_VZEROUPPER_XTEST \
> + xtest; \
> + jz 1f; \
> + vzeroall; \
> + jmp 2f; \
> +1: \
> + vzeroupper; \
> +2:
> +
> +/* In RTM define this as COND_VZEROUPPER_XTEST. */
> +#ifndef COND_VZEROUPPER
> +# define COND_VZEROUPPER vzeroupper
> +#endif
> +
> /* Zero upper vector registers and return. */
> #ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
> # define ZERO_UPPER_VEC_REGISTERS_RETURN \
> --
> 2.34.1
>
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
On Mon, Jun 6, 2022 at 7:46 PM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Mon, Jun 6, 2022 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > The RTM vzeroupper mitigation has no way of replacing inline
> > vzeroupper not before a return.
> >
> > This can be useful when hoisting a vzeroupper to save code size
> > for example:
> >
> > ```
> > L(foo):
> > cmpl %eax, %edx
> > jz L(bar)
> > tzcntl %eax, %eax
> > addq %rdi, %rax
> > VZEROUPPER_RETURN
> >
> > L(bar):
> > xorl %eax, %eax
> > VZEROUPPER_RETURN
> > ```
> >
> > Can become:
> >
> > ```
> > L(foo):
> > COND_VZEROUPPER
> > cmpl %eax, %edx
> > jz L(bar)
> > tzcntl %eax, %eax
> > addq %rdi, %rax
> > ret
> >
> > L(bar):
> > xorl %eax, %eax
> > ret
> > ```
> >
> > This code does not change any existing functionality.
> >
> > There is no difference in the objdump of libc.so before and after this
> > patch.
> > ---
> > sysdeps/x86_64/multiarch/avx-rtm-vecs.h | 1 +
> > sysdeps/x86_64/sysdep.h | 18 ++++++++++++++++++
> > 2 files changed, 19 insertions(+)
> >
> > diff --git a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> > index 3f531dd47f..6ca9f5e6ba 100644
> > --- a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> > +++ b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
> > @@ -20,6 +20,7 @@
> > #ifndef _AVX_RTM_VECS_H
> > #define _AVX_RTM_VECS_H 1
> >
> > +#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
> > #define ZERO_UPPER_VEC_REGISTERS_RETURN \
> > ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
> >
> > diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
> > index f14d50786d..4f512d5566 100644
> > --- a/sysdeps/x86_64/sysdep.h
> > +++ b/sysdeps/x86_64/sysdep.h
> > @@ -106,6 +106,24 @@ lose: \
> > vzeroupper; \
> > ret
> >
> > +/* Can be used to replace vzeroupper that is not directly before a
> > + return. This is useful when hoisting a vzeroupper from multiple
> > + return paths to decrease the total number of vzerouppers and code
> > + size. */
> > +#define COND_VZEROUPPER_XTEST \
> > + xtest; \
> > + jz 1f; \
> > + vzeroall; \
> > + jmp 2f; \
> > +1: \
> > + vzeroupper; \
> > +2:
> > +
> > +/* In RTM define this as COND_VZEROUPPER_XTEST. */
> > +#ifndef COND_VZEROUPPER
> > +# define COND_VZEROUPPER vzeroupper
> > +#endif
> > +
> > /* Zero upper vector registers and return. */
> > #ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
> > # define ZERO_UPPER_VEC_REGISTERS_RETURN \
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
>
> Thanks.
>
>
> --
> H.J.
I would like to backport this patch to release branches.
Any comments or objections?
--Sunil
@@ -20,6 +20,7 @@
#ifndef _AVX_RTM_VECS_H
#define _AVX_RTM_VECS_H 1
+#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
@@ -106,6 +106,24 @@ lose: \
vzeroupper; \
ret
+/* Can be used to replace vzeroupper that is not directly before a
+ return. This is useful when hoisting a vzeroupper from multiple
+ return paths to decrease the total number of vzerouppers and code
+ size. */
+#define COND_VZEROUPPER_XTEST \
+ xtest; \
+ jz 1f; \
+ vzeroall; \
+ jmp 2f; \
+1: \
+ vzeroupper; \
+2:
+
+/* In RTM define this as COND_VZEROUPPER_XTEST. */
+#ifndef COND_VZEROUPPER
+# define COND_VZEROUPPER vzeroupper
+#endif
+
/* Zero upper vector registers and return. */
#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
# define ZERO_UPPER_VEC_REGISTERS_RETURN \