[RFC,1/1] elf: mseal non-writable segments

Message ID 20240522112933.2005066-2-sroettger@google.com
State Changes Requested
Headers
Series elf: mseal non-writable segments |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit fail Patch series failed to build
linaro-tcwg-bot/tcwg_glibc_build--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 fail Testing failed

Commit Message

Stephen Röttger May 22, 2024, 11:29 a.m. UTC
  Mseal is a new Linux syscall that blocks any modifications to given
memory mappings like unmapping them or changing the permission bits.

This patch applies mseal to segments during loading if:
* the writable bit is not set
* mode is RTLD_NODELETE

In addition, it adds RTLD_NODELETE to the main binary/libraries and
propagates the RTLD_NODELETE to auxialliary library loads.
---
 elf/dl-load.c         |  6 ++++++
 elf/dl-load.h         |  1 +
 elf/dl-map-segments.h |  6 ++++++
 elf/dl-open.c         |  3 ++-
 elf/rtld.c            | 12 +++++++++---
 5 files changed, 24 insertions(+), 4 deletions(-)
  

Comments

Cristian Rodríguez May 22, 2024, 4:24 p.m. UTC | #1
On Wed, May 22, 2024 at 7:30 AM Stephen Roettger <sroettger@google.com> wrote:
>
> Mseal is a new Linux syscall that blocks any modifications to given
> memory mappings like unmapping them or changing the permission bits.

OOk but has this been merged into at least some tree that will be
pulled by linus at some point in the future AND you are sure the
syscall will not change arguments/semantics again?
  
Stephen Röttger May 22, 2024, 4:29 p.m. UTC | #2
The syscall has been merged to linux-next and I think it's unlikely it will
change at this point:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/mm/mseal.c

But in any case, I think any glibc changes should wait until mseal made it
to a Linux release.

On Wed, May 22, 2024 at 6:24 PM Cristian Rodríguez <cristian@rodriguez.im>
wrote:

> On Wed, May 22, 2024 at 7:30 AM Stephen Roettger <sroettger@google.com>
> wrote:
> >
> > Mseal is a new Linux syscall that blocks any modifications to given
> > memory mappings like unmapping them or changing the permission bits.
>
> OOk but has this been merged into at least some tree that will be
> pulled by linus at some point in the future AND you are sure the
> syscall will not change arguments/semantics again?
>
  
Carlos O'Donell May 22, 2024, 6:39 p.m. UTC | #3
On 5/22/24 12:29 PM, Stephen Röttger wrote:
> The syscall has been merged to linux-next and I think it's unlikely
> it will change at this point: 
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/mm/mseal.c
> <https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/mm/mseal.c>
>
>  But in any case, I think any glibc changes should wait until mseal
> made it to a Linux release.

Correct. I think an RFC is the right way forward for now to discuss the
semantics for the dynamic loader.
  

Patch

diff --git a/elf/dl-load.c b/elf/dl-load.c
index a34cb3559c..638028d6da 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1136,6 +1136,7 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
 	  c->mapend = ALIGN_UP (ph->p_vaddr + ph->p_filesz, GLRO(dl_pagesize));
 	  c->dataend = ph->p_vaddr + ph->p_filesz;
 	  c->allocend = ph->p_vaddr + ph->p_memsz;
+	  c->seal = false;
 	  /* Remember the maximum p_align.  */
 	  if (powerof2 (ph->p_align) && ph->p_align > p_align_max)
 	    p_align_max = ph->p_align;
@@ -1169,6 +1170,11 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
 	  if (ph->p_flags & PF_X)
 	    c->prot |= PROT_EXEC;
 #endif
+
+	  if (mode & RTLD_NODELETE && ((c->prot & PROT_WRITE) == 0)) {
+	    c->seal = true;
+	  }
+
 	  break;
 
 	case PT_TLS:
diff --git a/elf/dl-load.h b/elf/dl-load.h
index 656ec229bf..040e744908 100644
--- a/elf/dl-load.h
+++ b/elf/dl-load.h
@@ -78,6 +78,7 @@  struct loadcmd
   ElfW(Addr) mapstart, mapend, dataend, allocend, mapalign;
   ElfW(Off) mapoff;
   int prot;                             /* PROT_* bits.  */
+  bool seal;
 };
 
 
diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
index 30977cf800..4cddb4b294 100644
--- a/elf/dl-map-segments.h
+++ b/elf/dl-map-segments.h
@@ -19,6 +19,8 @@ 
 
 #include <dl-load.h>
 
+#define SYS_mseal 462
+
 /* Map a segment and align it properly.  */
 
 static __always_inline ElfW(Addr)
@@ -143,6 +145,10 @@  _dl_map_segments (struct link_map *l, int fd,
               == MAP_FAILED))
         return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
 
+      if (c->seal) {
+        syscall(SYS_mseal, (void*) mm, map_size, 0);
+      }
+
     postmap:
       _dl_postprocess_loadcmd (l, header, c);
 
diff --git a/elf/dl-open.c b/elf/dl-open.c
index c378da16c0..a6c89134f8 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -636,7 +636,8 @@  dl_open_worker_begin (void *a)
 
   /* Load that object's dependencies.  */
   _dl_map_object_deps (new, NULL, 0, 0,
-		       mode & (__RTLD_DLOPEN | RTLD_DEEPBIND | __RTLD_AUDIT));
+		       mode & (__RTLD_DLOPEN | RTLD_DEEPBIND |
+			       __RTLD_AUDIT | RTLD_NODELETE));
 
   /* So far, so good.  Now check the versions.  */
   for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
diff --git a/elf/rtld.c b/elf/rtld.c
index e9525ea987..b471e4c0af 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -79,6 +79,8 @@ 
 # define RTLD_TIMING_SET(var, value) (var) = (value)
 # define RTLD_TIMING_REF(var)        &(var)
 
+#define SYS_mseal 462
+
 static inline void
 rtld_timer_start (hp_timing_t *var)
 {
@@ -809,7 +811,7 @@  do_preload (const char *fname, struct link_map *main_map, const char *where)
 
   args.str = fname;
   args.loader = main_map;
-  args.mode = __RTLD_SECURE;
+  args.mode = __RTLD_SECURE | RTLD_NODELETE;
 
   unsigned int old_nloaded = GL(dl_ns)[LM_ID_BASE]._ns_nloaded;
 
@@ -1214,6 +1216,10 @@  rtld_setup_main_map (struct link_map *main_map)
 	     segment.  */
 	  expected_load_address = ((allocend + GLRO(dl_pagesize) - 1)
 				   & ~(GLRO(dl_pagesize) - 1));
+
+	  if ((ph->p_flags & PF_W) == 0) {
+	    syscall(SYS_mseal, mapstart, expected_load_address - mapstart, 0);
+	  }
 	}
 	break;
 
@@ -1636,7 +1642,7 @@  dl_main (const ElfW(Phdr) *phdr,
       /* Create a link_map for the executable itself.
 	 This will be what dlopen on "" returns.  */
       main_map = _dl_new_object ((char *) "", "", lt_executable, NULL,
-				 __RTLD_OPENEXEC, LM_ID_BASE);
+				 __RTLD_OPENEXEC | RTLD_NODELETE, LM_ID_BASE);
       assert (main_map != NULL);
       main_map->l_phdr = phdr;
       main_map->l_phnum = phnum;
@@ -1964,7 +1970,7 @@  dl_main (const ElfW(Phdr) *phdr,
     RTLD_TIMING_VAR (start);
     rtld_timer_start (&start);
     _dl_map_object_deps (main_map, preloads, npreloads,
-			 state.mode == rtld_mode_trace, 0);
+			 state.mode == rtld_mode_trace, RTLD_NODELETE);
     rtld_timer_accum (&load_time, start);
   }