[v8,1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
Commit Message
In an upcoming change we are going to introduce per-task SCTLR_EL1
bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
MTE-specific code so that we will be able to use it from both the
PAC and MTE code paths and make the task switching code more efficient.
Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
---
v8:
- rebase to 5.12-rc3
v7:
- drop CONFIG_ARM64_NEED_SCTLR_USER
arch/arm64/include/asm/mte.h | 4 ---
arch/arm64/include/asm/processor.h | 6 +++-
arch/arm64/kernel/mte.c | 47 ++++++------------------------
arch/arm64/kernel/process.c | 30 +++++++++++++++----
4 files changed, 38 insertions(+), 49 deletions(-)
Comments
On Thu, 18 Mar 2021 20:10:52 -0700, Peter Collingbourne wrote:
> In an upcoming change we are going to introduce per-task SCTLR_EL1
> bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> MTE-specific code so that we will be able to use it from both the
> PAC and MTE code paths and make the task switching code more efficient.
Applied to arm64 (for-next/pac-set-get-enabled-keys).
Peter, can you please have a look and give it a try as part of the arm64
for-next/core branch? I rebased your patches on top of the
for-next/mte-async-kernel-mode branch as this was adding more code to
mte_thread_switch(), so I kept the function for now.
Thanks.
[1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
https://git.kernel.org/arm64/c/2f79d2fc391e
[2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
https://git.kernel.org/arm64/c/201698626fbc
[3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
https://git.kernel.org/arm64/c/b90e483938ce
On Wed, Apr 14, 2021 at 3:10 AM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Thu, 18 Mar 2021 20:10:52 -0700, Peter Collingbourne wrote:
> > In an upcoming change we are going to introduce per-task SCTLR_EL1
> > bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> > MTE-specific code so that we will be able to use it from both the
> > PAC and MTE code paths and make the task switching code more efficient.
>
> Applied to arm64 (for-next/pac-set-get-enabled-keys).
>
> Peter, can you please have a look and give it a try as part of the arm64
> for-next/core branch? I rebased your patches on top of the
> for-next/mte-async-kernel-mode branch as this was adding more code to
> mte_thread_switch(), so I kept the function for now.
>
> Thanks.
>
> [1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
> https://git.kernel.org/arm64/c/2f79d2fc391e
> [2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
> https://git.kernel.org/arm64/c/201698626fbc
> [3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
> https://git.kernel.org/arm64/c/b90e483938ce
Hi Catalin,
I tested the rebased patch series on an Apple M1 under a hypervisor
with my Android forward-edge PAC prototype and it seems to work.
I think it should be possible to get rid of at least one of the ISBs
that are now on the task switch path, but let's leave that to a later
patch. The patch series looks good otherwise.
Peter
@@ -40,7 +40,6 @@ void mte_free_tag_storage(char *storage);
void mte_sync_tags(pte_t *ptep, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void flush_mte_state(void);
-void mte_thread_switch(struct task_struct *next);
void mte_suspend_exit(void);
long set_mte_ctrl(struct task_struct *task, unsigned long arg);
long get_mte_ctrl(struct task_struct *task);
@@ -63,9 +62,6 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
static inline void flush_mte_state(void)
{
}
-static inline void mte_thread_switch(struct task_struct *next)
-{
-}
static inline void mte_suspend_exit(void)
{
}
@@ -151,11 +151,13 @@ struct thread_struct {
struct ptrauth_keys_kernel keys_kernel;
#endif
#ifdef CONFIG_ARM64_MTE
- u64 sctlr_tcf0;
u64 gcr_user_excl;
#endif
+ u64 sctlr_user;
};
+#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK
+
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
@@ -247,6 +249,8 @@ extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
+void set_task_sctlr_el1(u64 sctlr);
+
/* Thread switching */
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
@@ -124,26 +124,6 @@ bool mte_report_once(void)
return READ_ONCE(report_fault_once);
}
-static void update_sctlr_el1_tcf0(u64 tcf0)
-{
- /* ISB required for the kernel uaccess routines */
- sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
- isb();
-}
-
-static void set_sctlr_el1_tcf0(u64 tcf0)
-{
- /*
- * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
- * optimisation. Disable preemption so that it does not see
- * the variable update before the SCTLR_EL1.TCF0 one.
- */
- preempt_disable();
- current->thread.sctlr_tcf0 = tcf0;
- update_sctlr_el1_tcf0(tcf0);
- preempt_enable();
-}
-
static void update_gcr_el1_excl(u64 excl)
{
@@ -176,21 +156,12 @@ void flush_mte_state(void)
write_sysreg_s(0, SYS_TFSRE0_EL1);
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
/* disable tag checking */
- set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+ set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
+ SCTLR_EL1_TCF0_NONE);
/* reset tag generation mask */
set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
}
-void mte_thread_switch(struct task_struct *next)
-{
- if (!system_supports_mte())
- return;
-
- /* avoid expensive SCTLR_EL1 accesses if no change */
- if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
- update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
-}
-
void mte_suspend_exit(void)
{
if (!system_supports_mte())
@@ -201,7 +172,7 @@ void mte_suspend_exit(void)
long set_mte_ctrl(struct task_struct *task, unsigned long arg)
{
- u64 tcf0;
+ u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
SYS_GCR_EL1_EXCL_MASK;
@@ -210,23 +181,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
switch (arg & PR_MTE_TCF_MASK) {
case PR_MTE_TCF_NONE:
- tcf0 = SCTLR_EL1_TCF0_NONE;
+ sctlr |= SCTLR_EL1_TCF0_NONE;
break;
case PR_MTE_TCF_SYNC:
- tcf0 = SCTLR_EL1_TCF0_SYNC;
+ sctlr |= SCTLR_EL1_TCF0_SYNC;
break;
case PR_MTE_TCF_ASYNC:
- tcf0 = SCTLR_EL1_TCF0_ASYNC;
+ sctlr |= SCTLR_EL1_TCF0_ASYNC;
break;
default:
return -EINVAL;
}
if (task != current) {
- task->thread.sctlr_tcf0 = tcf0;
+ task->thread.sctlr_user = sctlr;
task->thread.gcr_user_excl = gcr_excl;
} else {
- set_sctlr_el1_tcf0(tcf0);
+ set_task_sctlr_el1(sctlr);
set_gcr_el1_excl(gcr_excl);
}
@@ -243,7 +214,7 @@ long get_mte_ctrl(struct task_struct *task)
ret = incl << PR_MTE_TAG_SHIFT;
- switch (task->thread.sctlr_tcf0) {
+ switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
case SCTLR_EL1_TCF0_NONE:
ret |= PR_MTE_TCF_NONE;
break;
@@ -529,6 +529,27 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
write_sysreg(val, cntkctl_el1);
}
+static void update_sctlr_el1(u64 sctlr)
+{
+ sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr);
+
+ /* ISB required for the kernel uaccess routines when setting TCF0. */
+ isb();
+}
+
+void set_task_sctlr_el1(u64 sctlr)
+{
+ /*
+ * __switch_to() checks current->thread.sctlr as an
+ * optimisation. Disable preemption so that it does not see
+ * the variable update before the SCTLR_EL1 one.
+ */
+ preempt_disable();
+ current->thread.sctlr_user = sctlr;
+ update_sctlr_el1(sctlr);
+ preempt_enable();
+}
+
/*
* Thread switching.
*/
@@ -553,12 +574,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
*/
dsb(ish);
- /*
- * MTE thread switching must happen after the DSB above to ensure that
- * any asynchronous tag check faults have been logged in the TFSR*_EL1
- * registers.
- */
- mte_thread_switch(next);
+ /* avoid expensive SCTLR_EL1 accesses if no change */
+ if (prev->thread.sctlr_user != next->thread.sctlr_user)
+ update_sctlr_el1(next->thread.sctlr_user);
/* the actual thread switch */
last = cpu_switch_to(prev, next);