[v1,2/2] x86: Add `prepare_context_switch` to initialize register inuse states
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
pending
|
Patch applied
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
xsave/xrstor have optimization to skip saving/restoring register
classes if those register classes are in the init state
(inuse[bit]==0).
We can get:
SSE state
AVX state
ZMM_HI256 state
to init state using `vzeroall`. Doing this before syscalls that will
cause a proper context switch can be beneficial in terms of the amount
of state the kernel needs to save/restore. This can save time and
memory.
---
sysdeps/generic/prepare-context-switch.h | 28 +++++++++++++
sysdeps/unix/sysv/linux/clock_nanosleep.c | 2 +
sysdeps/unix/sysv/linux/sched_yield.c | 2 +
sysdeps/x86/prepare-context-switch.h | 50 +++++++++++++++++++++++
4 files changed, 82 insertions(+)
create mode 100644 sysdeps/generic/prepare-context-switch.h
create mode 100644 sysdeps/x86/prepare-context-switch.h
Comments
On Wed, Jun 7, 2023 at 12:46 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> xsave/xrstor have optimization to skip saving/restoring register
> classes if those register classes are in the init state
> (inuse[bit]==0).
>
> We can get:
> SSE state
> AVX state
> ZMM_HI256 state
>
> to init state using `vzeroall`. Doing this before syscalls that will
> cause a proper context switch can be beneficial in terms of the amount
> of state the kernel needs to save/restore. This can save time and
> memory.
> ---
> sysdeps/generic/prepare-context-switch.h | 28 +++++++++++++
> sysdeps/unix/sysv/linux/clock_nanosleep.c | 2 +
> sysdeps/unix/sysv/linux/sched_yield.c | 2 +
> sysdeps/x86/prepare-context-switch.h | 50 +++++++++++++++++++++++
> 4 files changed, 82 insertions(+)
> create mode 100644 sysdeps/generic/prepare-context-switch.h
> create mode 100644 sysdeps/x86/prepare-context-switch.h
>
> diff --git a/sysdeps/generic/prepare-context-switch.h b/sysdeps/generic/prepare-context-switch.h
> new file mode 100644
> index 0000000000..6153847905
> --- /dev/null
> +++ b/sysdeps/generic/prepare-context-switch.h
> @@ -0,0 +1,28 @@
> +/* Prepare process for context switch. generic version
> + Copyright (C) 2023 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _PREPARE_CONTEXT_SWITCH_H
> +#define _PREPARE_CONTEXT_SWITCH_H
> +
> +static void
> +prepare_context_switch (void)
> +{
> + /* Empty. */
> +}
> +
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/clock_nanosleep.c b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> index ac2d810632..e674f0ac54 100644
> --- a/sysdeps/unix/sysv/linux/clock_nanosleep.c
> +++ b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> @@ -23,6 +23,7 @@
> #include "kernel-posix-cpu-timers.h"
>
> #include <shlib-compat.h>
> +#include <prepare-context-switch.h>
>
> /* We can simply use the syscall. The CPU clocks are not supported
> with this function. */
> @@ -44,6 +45,7 @@ __clock_nanosleep_time64 (clockid_t clock_id, int flags,
> #endif
>
> int r;
> + prepare_context_switch();
> #ifdef __ASSUME_TIME64_SYSCALLS
> r = INTERNAL_SYSCALL_CANCEL (clock_nanosleep_time64, clock_id, flags, req,
> rem);
> diff --git a/sysdeps/unix/sysv/linux/sched_yield.c b/sysdeps/unix/sysv/linux/sched_yield.c
> index 154bf725b0..d26c0f8a9f 100644
> --- a/sysdeps/unix/sysv/linux/sched_yield.c
> +++ b/sysdeps/unix/sysv/linux/sched_yield.c
> @@ -17,10 +17,12 @@
> <https://www.gnu.org/licenses/>. */
>
> #include <sysdep.h>
> +#include <prepare-context-switch.h>
>
> int
> __sched_yield (void)
> {
> + prepare_context_switch();
> return INLINE_SYSCALL_CALL (sched_yield);
> }
> libc_hidden_def (__sched_yield);
> diff --git a/sysdeps/x86/prepare-context-switch.h b/sysdeps/x86/prepare-context-switch.h
> new file mode 100644
> index 0000000000..bf33a7a1b3
> --- /dev/null
> +++ b/sysdeps/x86/prepare-context-switch.h
> @@ -0,0 +1,50 @@
> +/* Prepare process for context switch. x86 version
> + Copyright (C) 2023 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _PREPARE_CONTEXT_SWITCH_H
> +#define _PREPARE_CONTEXT_SWITCH_H
> +
> +#ifdef __AVX__
Please use
if (CPU_FEATURE_ACTIVE (AVX))
to detect it at run-time.
> +static void
> +prepare_context_switch (void)
> +{
> + /* vzeroall before context switch will restore xsave/xrstor state of the
> + following to init state:
> + - SSE state
> + - AVX state
> + - ZMM_HI256 state
> + This saves a touch of overhead and memory in context switches.
> + This function can/should be used before an operation that will
> + cause a context switch in the current process (sched_yield,
> + *sleep, etc...).
> + */
> + __asm__ volatile ("vzeroall"
Can you use _mm256_zeroall?
> + :
> + :
> + : "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6",
> + "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12",
> + "zmm13", "zmm14", "zmm15");
> + /* TODO: Add xtilerelease for amx state. */
> +}
> +
> +#else
> +# undef _PREPARE_CONTEXT_SWITCH_H
> +# include <sysdeps/generic/prepare-context-switch.h>
> +#endif
> +
> +#endif
> --
> 2.34.1
>
On Wed, Jun 7, 2023 at 3:46 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Jun 7, 2023 at 12:46 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > xsave/xrstor have optimization to skip saving/restoring register
> > classes if those register classes are in the init state
> > (inuse[bit]==0).
> >
> > We can get:
> > SSE state
> > AVX state
> > ZMM_HI256 state
> >
> > to init state using `vzeroall`. Doing this before syscalls that will
> > cause a proper context switch can be beneficial in terms of the amount
> > of state the kernel needs to save/restore. This can save time and
> > memory.
> > ---
> > sysdeps/generic/prepare-context-switch.h | 28 +++++++++++++
> > sysdeps/unix/sysv/linux/clock_nanosleep.c | 2 +
> > sysdeps/unix/sysv/linux/sched_yield.c | 2 +
> > sysdeps/x86/prepare-context-switch.h | 50 +++++++++++++++++++++++
> > 4 files changed, 82 insertions(+)
> > create mode 100644 sysdeps/generic/prepare-context-switch.h
> > create mode 100644 sysdeps/x86/prepare-context-switch.h
> >
> > diff --git a/sysdeps/generic/prepare-context-switch.h b/sysdeps/generic/prepare-context-switch.h
> > new file mode 100644
> > index 0000000000..6153847905
> > --- /dev/null
> > +++ b/sysdeps/generic/prepare-context-switch.h
> > @@ -0,0 +1,28 @@
> > +/* Prepare process for context switch. generic version
> > + Copyright (C) 2023 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#ifndef _PREPARE_CONTEXT_SWITCH_H
> > +#define _PREPARE_CONTEXT_SWITCH_H
> > +
> > +static void
> > +prepare_context_switch (void)
> > +{
> > + /* Empty. */
> > +}
> > +
> > +#endif
> > diff --git a/sysdeps/unix/sysv/linux/clock_nanosleep.c b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > index ac2d810632..e674f0ac54 100644
> > --- a/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > +++ b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > @@ -23,6 +23,7 @@
> > #include "kernel-posix-cpu-timers.h"
> >
> > #include <shlib-compat.h>
> > +#include <prepare-context-switch.h>
> >
> > /* We can simply use the syscall. The CPU clocks are not supported
> > with this function. */
> > @@ -44,6 +45,7 @@ __clock_nanosleep_time64 (clockid_t clock_id, int flags,
> > #endif
> >
> > int r;
> > + prepare_context_switch();
> > #ifdef __ASSUME_TIME64_SYSCALLS
> > r = INTERNAL_SYSCALL_CANCEL (clock_nanosleep_time64, clock_id, flags, req,
> > rem);
> > diff --git a/sysdeps/unix/sysv/linux/sched_yield.c b/sysdeps/unix/sysv/linux/sched_yield.c
> > index 154bf725b0..d26c0f8a9f 100644
> > --- a/sysdeps/unix/sysv/linux/sched_yield.c
> > +++ b/sysdeps/unix/sysv/linux/sched_yield.c
> > @@ -17,10 +17,12 @@
> > <https://www.gnu.org/licenses/>. */
> >
> > #include <sysdep.h>
> > +#include <prepare-context-switch.h>
> >
> > int
> > __sched_yield (void)
> > {
> > + prepare_context_switch();
> > return INLINE_SYSCALL_CALL (sched_yield);
> > }
> > libc_hidden_def (__sched_yield);
> > diff --git a/sysdeps/x86/prepare-context-switch.h b/sysdeps/x86/prepare-context-switch.h
> > new file mode 100644
> > index 0000000000..bf33a7a1b3
> > --- /dev/null
> > +++ b/sysdeps/x86/prepare-context-switch.h
> > @@ -0,0 +1,50 @@
> > +/* Prepare process for context switch. x86 version
> > + Copyright (C) 2023 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#ifndef _PREPARE_CONTEXT_SWITCH_H
> > +#define _PREPARE_CONTEXT_SWITCH_H
> > +
> > +#ifdef __AVX__
>
> Please use
>
> if (CPU_FEATURE_ACTIVE (AVX))
>
> to detect it at run-time.
>
Wanted to avoid overhead. Think if we want runtime check should ifunc
the functions
we want to put it in (just clock_nanosleep64 and sched_yield). WDYT?
> > +static void
> > +prepare_context_switch (void)
> > +{
> > + /* vzeroall before context switch will restore xsave/xrstor state of the
> > + following to init state:
> > + - SSE state
> > + - AVX state
> > + - ZMM_HI256 state
> > + This saves a touch of overhead and memory in context switches.
> > + This function can/should be used before an operation that will
> > + cause a context switch in the current process (sched_yield,
> > + *sleep, etc...).
> > + */
> > + __asm__ volatile ("vzeroall"
>
> Can you use _mm256_zeroall?
>
> > + :
> > + :
> > + : "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6",
> > + "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12",
> > + "zmm13", "zmm14", "zmm15");
> > + /* TODO: Add xtilerelease for amx state. */
> > +}
> > +
> > +#else
> > +# undef _PREPARE_CONTEXT_SWITCH_H
> > +# include <sysdeps/generic/prepare-context-switch.h>
> > +#endif
> > +
> > +#endif
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
On Wed, Jun 7, 2023 at 4:59 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Jun 7, 2023 at 3:46 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Jun 7, 2023 at 12:46 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > xsave/xrstor have optimization to skip saving/restoring register
> > > classes if those register classes are in the init state
> > > (inuse[bit]==0).
> > >
> > > We can get:
> > > SSE state
> > > AVX state
> > > ZMM_HI256 state
> > >
> > > to init state using `vzeroall`. Doing this before syscalls that will
> > > cause a proper context switch can be beneficial in terms of the amount
> > > of state the kernel needs to save/restore. This can save time and
> > > memory.
> > > ---
> > > sysdeps/generic/prepare-context-switch.h | 28 +++++++++++++
> > > sysdeps/unix/sysv/linux/clock_nanosleep.c | 2 +
> > > sysdeps/unix/sysv/linux/sched_yield.c | 2 +
> > > sysdeps/x86/prepare-context-switch.h | 50 +++++++++++++++++++++++
> > > 4 files changed, 82 insertions(+)
> > > create mode 100644 sysdeps/generic/prepare-context-switch.h
> > > create mode 100644 sysdeps/x86/prepare-context-switch.h
> > >
> > > diff --git a/sysdeps/generic/prepare-context-switch.h b/sysdeps/generic/prepare-context-switch.h
> > > new file mode 100644
> > > index 0000000000..6153847905
> > > --- /dev/null
> > > +++ b/sysdeps/generic/prepare-context-switch.h
> > > @@ -0,0 +1,28 @@
> > > +/* Prepare process for context switch. generic version
> > > + Copyright (C) 2023 Free Software Foundation, Inc.
> > > + This file is part of the GNU C Library.
> > > +
> > > + The GNU C Library is free software; you can redistribute it and/or
> > > + modify it under the terms of the GNU Lesser General Public
> > > + License as published by the Free Software Foundation; either
> > > + version 2.1 of the License, or (at your option) any later version.
> > > +
> > > + The GNU C Library is distributed in the hope that it will be useful,
> > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > + Lesser General Public License for more details.
> > > +
> > > + You should have received a copy of the GNU Lesser General Public
> > > + License along with the GNU C Library; if not, see
> > > + <https://www.gnu.org/licenses/>. */
> > > +
> > > +#ifndef _PREPARE_CONTEXT_SWITCH_H
> > > +#define _PREPARE_CONTEXT_SWITCH_H
> > > +
> > > +static void
> > > +prepare_context_switch (void)
> > > +{
> > > + /* Empty. */
> > > +}
> > > +
> > > +#endif
> > > diff --git a/sysdeps/unix/sysv/linux/clock_nanosleep.c b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > > index ac2d810632..e674f0ac54 100644
> > > --- a/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > > +++ b/sysdeps/unix/sysv/linux/clock_nanosleep.c
> > > @@ -23,6 +23,7 @@
> > > #include "kernel-posix-cpu-timers.h"
> > >
> > > #include <shlib-compat.h>
> > > +#include <prepare-context-switch.h>
> > >
> > > /* We can simply use the syscall. The CPU clocks are not supported
> > > with this function. */
> > > @@ -44,6 +45,7 @@ __clock_nanosleep_time64 (clockid_t clock_id, int flags,
> > > #endif
> > >
> > > int r;
> > > + prepare_context_switch();
> > > #ifdef __ASSUME_TIME64_SYSCALLS
> > > r = INTERNAL_SYSCALL_CANCEL (clock_nanosleep_time64, clock_id, flags, req,
> > > rem);
> > > diff --git a/sysdeps/unix/sysv/linux/sched_yield.c b/sysdeps/unix/sysv/linux/sched_yield.c
> > > index 154bf725b0..d26c0f8a9f 100644
> > > --- a/sysdeps/unix/sysv/linux/sched_yield.c
> > > +++ b/sysdeps/unix/sysv/linux/sched_yield.c
> > > @@ -17,10 +17,12 @@
> > > <https://www.gnu.org/licenses/>. */
> > >
> > > #include <sysdep.h>
> > > +#include <prepare-context-switch.h>
> > >
> > > int
> > > __sched_yield (void)
> > > {
> > > + prepare_context_switch();
> > > return INLINE_SYSCALL_CALL (sched_yield);
> > > }
> > > libc_hidden_def (__sched_yield);
> > > diff --git a/sysdeps/x86/prepare-context-switch.h b/sysdeps/x86/prepare-context-switch.h
> > > new file mode 100644
> > > index 0000000000..bf33a7a1b3
> > > --- /dev/null
> > > +++ b/sysdeps/x86/prepare-context-switch.h
> > > @@ -0,0 +1,50 @@
> > > +/* Prepare process for context switch. x86 version
> > > + Copyright (C) 2023 Free Software Foundation, Inc.
> > > + This file is part of the GNU C Library.
> > > +
> > > + The GNU C Library is free software; you can redistribute it and/or
> > > + modify it under the terms of the GNU Lesser General Public
> > > + License as published by the Free Software Foundation; either
> > > + version 2.1 of the License, or (at your option) any later version.
> > > +
> > > + The GNU C Library is distributed in the hope that it will be useful,
> > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > + Lesser General Public License for more details.
> > > +
> > > + You should have received a copy of the GNU Lesser General Public
> > > + License along with the GNU C Library; if not, see
> > > + <https://www.gnu.org/licenses/>. */
> > > +
> > > +#ifndef _PREPARE_CONTEXT_SWITCH_H
> > > +#define _PREPARE_CONTEXT_SWITCH_H
> > > +
> > > +#ifdef __AVX__
> >
> > Please use
> >
> > if (CPU_FEATURE_ACTIVE (AVX))
> >
> > to detect it at run-time.
> >
> Wanted to avoid overhead. Think if we want runtime check should ifunc
> the functions
> we want to put it in (just clock_nanosleep64 and sched_yield). WDYT?
>
> > > +static void
> > > +prepare_context_switch (void)
> > > +{
> > > + /* vzeroall before context switch will restore xsave/xrstor state of the
> > > + following to init state:
> > > + - SSE state
> > > + - AVX state
> > > + - ZMM_HI256 state
> > > + This saves a touch of overhead and memory in context switches.
> > > + This function can/should be used before an operation that will
> > > + cause a context switch in the current process (sched_yield,
> > > + *sleep, etc...).
> > > + */
> > > + __asm__ volatile ("vzeroall"
> >
> > Can you use _mm256_zeroall?
> >
> > > + :
> > > + :
> > > + : "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6",
> > > + "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12",
> > > + "zmm13", "zmm14", "zmm15");
> > > + /* TODO: Add xtilerelease for amx state. */
> > > +}
> > > +
> > > +#else
> > > +# undef _PREPARE_CONTEXT_SWITCH_H
> > > +# include <sysdeps/generic/prepare-context-switch.h>
> > > +#endif
> > > +
> > > +#endif
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.
Abandoning this patch in favor of the versions at:
"x86: Implement sched_yield syscall for x86 only."
and
"x86: Implement clock_nanosleep{_time64} syscall for x86 only."
new file mode 100644
@@ -0,0 +1,28 @@
+/* Prepare process for context switch. generic version
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _PREPARE_CONTEXT_SWITCH_H
+#define _PREPARE_CONTEXT_SWITCH_H
+
+static void
+prepare_context_switch (void)
+{
+ /* Empty. */
+}
+
+#endif
@@ -23,6 +23,7 @@
#include "kernel-posix-cpu-timers.h"
#include <shlib-compat.h>
+#include <prepare-context-switch.h>
/* We can simply use the syscall. The CPU clocks are not supported
with this function. */
@@ -44,6 +45,7 @@ __clock_nanosleep_time64 (clockid_t clock_id, int flags,
#endif
int r;
+ prepare_context_switch();
#ifdef __ASSUME_TIME64_SYSCALLS
r = INTERNAL_SYSCALL_CANCEL (clock_nanosleep_time64, clock_id, flags, req,
rem);
@@ -17,10 +17,12 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <prepare-context-switch.h>
int
__sched_yield (void)
{
+ prepare_context_switch();
return INLINE_SYSCALL_CALL (sched_yield);
}
libc_hidden_def (__sched_yield);
new file mode 100644
@@ -0,0 +1,50 @@
+/* Prepare process for context switch. x86 version
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _PREPARE_CONTEXT_SWITCH_H
+#define _PREPARE_CONTEXT_SWITCH_H
+
+#ifdef __AVX__
+static void
+prepare_context_switch (void)
+{
+ /* vzeroall before context switch will restore xsave/xrstor state of the
+ following to init state:
+ - SSE state
+ - AVX state
+ - ZMM_HI256 state
+ This saves a touch of overhead and memory in context switches.
+ This function can/should be used before an operation that will
+ cause a context switch in the current process (sched_yield,
+ *sleep, etc...).
+ */
+ __asm__ volatile ("vzeroall"
+ :
+ :
+ : "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6",
+ "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12",
+ "zmm13", "zmm14", "zmm15");
+ /* TODO: Add xtilerelease for amx state. */
+}
+
+#else
+# undef _PREPARE_CONTEXT_SWITCH_H
+# include <sysdeps/generic/prepare-context-switch.h>
+#endif
+
+#endif