elf: Handle ld.so with LOAD segment gaps in _dl_find_object (bug 31943)

Message ID 87frsqy09u.fsf@oldenburg3.str.redhat.com
State Superseded
Headers
Series elf: Handle ld.so with LOAD segment gaps in _dl_find_object (bug 31943) |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm fail Test failed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 fail Test failed

Commit Message

Florian Weimer July 3, 2024, 3:15 p.m. UTC
  As ld.so is loaded by the kernel, gaps are not filled by the initial
mapping. Check the program headers for a gap, and if there is one,
handle a non-contiguous ld.so link map similar to a non-contiguous
main link map in _dlfo_process_initial.

Ideally, the extra code should not be compiled in if the binutils is
recent enough and used in a configuration that does not produce gaps.
For example, binutils 2.39 and later have commit 9833b7757d246f22db4
("PR28824, relro security issues"), which should avoid creation of
gaps on the x86 architecture.

Tested on aarch64-linux-gnu with an ld.so that had gaps.

---
 elf/dl-find_object.c | 76 +++++++++++++++++++++++++++++++++-------------------
 elf/rtld.c           | 22 +++++++++++++++
 2 files changed, 70 insertions(+), 28 deletions(-)


base-commit: d2f6ceaccbae2f645075dedad2b762896da1ec04
  

Comments

H.J. Lu July 3, 2024, 10:12 p.m. UTC | #1
On Wed, Jul 3, 2024, 11:16 PM Florian Weimer <fweimer@redhat.com> wrote:

> As ld.so is loaded by the kernel, gaps are not filled by the initial
> mapping. Check the program headers for a gap, and if there is one,
> handle a non-contiguous ld.so link map similar to a non-contiguous
> main link map in _dlfo_process_initial.
>
> Ideally, the extra code should not be compiled in if the binutils is
> recent enough and used in a configuration that does not produce gaps.
> For example, binutils 2.39 and later have commit 9833b7757d246f22db4
> ("PR28824, relro security issues"), which should avoid creation of
> gaps on the x86 architecture.
>

We should add a linker test to check if this is needed.


> Tested on aarch64-linux-gnu with an ld.so that had gaps.
>
> ---
>  elf/dl-find_object.c | 76
> +++++++++++++++++++++++++++++++++-------------------
>  elf/rtld.c           | 22 +++++++++++++++
>  2 files changed, 70 insertions(+), 28 deletions(-)
>
> diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
> index 449302eda3..056fbda64c 100644
> --- a/elf/dl-find_object.c
> +++ b/elf/dl-find_object.c
> @@ -466,6 +466,38 @@ __dl_find_object (void *pc1, struct dl_find_object
> *result)
>  hidden_def (__dl_find_object)
>  weak_alias (__dl_find_object, _dl_find_object)
>
> +/* Subroutine of _dlfo_process_initial to split out noncontigous link
> +   maps.  NODELETE is the number of used _dlfo_nodelete_mappings
> +   elements.  It is incremented as needed, and the new NODELETE value
> +   is returned.  */
> +static size_t
> +_dlfo_process_initial_noncontiguous_map (struct link_map *map,
> +                                         size_t nodelete)
> +{
> +  struct dl_find_object_internal dlfo;
> +  _dl_find_object_from_map (map, &dlfo);
> +
> +  /* PT_LOAD segments for a non-contiguous link map are added to the
> +     non-closeable mappings.  */
> +  const ElfW(Phdr) *ph = map->l_phdr;
> +  const ElfW(Phdr) *ph_end = map->l_phdr + map->l_phnum;
> +  for (; ph < ph_end; ++ph)
> +    if (ph->p_type == PT_LOAD)
> +      {
> +        if (_dlfo_nodelete_mappings != NULL)
> +          {
> +            /* Second pass only.  */
> +            _dlfo_nodelete_mappings[nodelete] = dlfo;
> +            _dlfo_nodelete_mappings[nodelete].map_start
> +              = ph->p_vaddr + map->l_addr;
> +            _dlfo_nodelete_mappings[nodelete].map_end
> +              = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz;
> +          }
> +        ++nodelete;
> +      }
> +  return nodelete;
> +}
> +
>  /* _dlfo_process_initial is called twice.  First to compute the array
>     sizes from the initial loaded mappings.  Second to fill in the
>     bases and infos arrays with the (still unsorted) data.  Returns the
> @@ -477,29 +509,8 @@ _dlfo_process_initial (void)
>
>    size_t nodelete = 0;
>    if (!main_map->l_contiguous)
> -    {
> -      struct dl_find_object_internal dlfo;
> -      _dl_find_object_from_map (main_map, &dlfo);
> -
> -      /* PT_LOAD segments for a non-contiguous are added to the
> -         non-closeable mappings.  */
> -      for (const ElfW(Phdr) *ph = main_map->l_phdr,
> -             *ph_end = main_map->l_phdr + main_map->l_phnum;
> -           ph < ph_end; ++ph)
> -        if (ph->p_type == PT_LOAD)
> -          {
> -            if (_dlfo_nodelete_mappings != NULL)
> -              {
> -                /* Second pass only.  */
> -                _dlfo_nodelete_mappings[nodelete] = dlfo;
> -                _dlfo_nodelete_mappings[nodelete].map_start
> -                  = ph->p_vaddr + main_map->l_addr;
> -                _dlfo_nodelete_mappings[nodelete].map_end
> -                  = _dlfo_nodelete_mappings[nodelete].map_start +
> ph->p_memsz;
> -              }
> -            ++nodelete;
> -          }
> -    }
> +    /* Contiguous case already handled in _dl_find_object_init.  */
> +    nodelete = _dlfo_process_initial_noncontiguous_map (main_map,
> nodelete);
>
>    size_t loaded = 0;
>    for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns)
> @@ -511,11 +522,20 @@ _dlfo_process_initial (void)
>            /* lt_library link maps are implicitly NODELETE.  */
>            if (l->l_type == lt_library || l->l_nodelete_active)
>              {
> -              if (_dlfo_nodelete_mappings != NULL)
> -                /* Second pass only.  */
> -                _dl_find_object_from_map
> -                  (l, _dlfo_nodelete_mappings + nodelete);
> -              ++nodelete;
> +#ifdef SHARED
> +              /* The kernel may have loaded ld.so with gaps.   */
> +              if (!l->l_contiguous && l == &GL(dl_rtld_map))
> +                nodelete
> +                  = _dlfo_process_initial_noncontiguous_map (l, nodelete);
> +              else
> +#endif
> +                {
> +                  if (_dlfo_nodelete_mappings != NULL)
> +                    /* Second pass only.  */
> +                    _dl_find_object_from_map
> +                      (l, _dlfo_nodelete_mappings + nodelete);
> +                  ++nodelete;
> +                }
>              }
>            else if (l->l_type == lt_loaded)
>              {
> diff --git a/elf/rtld.c b/elf/rtld.c
> index bfdf632e77..388ad456ac 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -1766,6 +1766,28 @@ dl_main (const ElfW(Phdr) *phdr,
>
>    GL(dl_rtld_map).l_phdr = rtld_phdr;
>    GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum;
> +  GL(dl_rtld_map).l_contiguous = 1;
> +  /* The linker may not have produced a contiguous object.  The kernel
> +     will load the object with actual gaps (unlike the glibc loader
> +     for shared objects, which always produces a contiguous mapping).
> +     See similar logic in rtld_setup_main_map.  */
> +  {
> +    ElfW(Addr) expected_load_address = 0;
> +    for (const ElfW(Phdr) *ph = rtld_phdr; ph < &phdr[rtld_ehdr->e_phnum];
> +        ++ph)
> +      if (ph->p_type == PT_LOAD)
> +       {
> +         ElfW(Addr) mapstart = ph->p_vaddr & ~(GLRO(dl_pagesize) - 1);
> +         if (GL(dl_rtld_map).l_contiguous && expected_load_address != 0
> +             && expected_load_address != mapstart)
> +           GL(dl_rtld_map).l_contiguous = 0;
> +         ElfW(Addr) allocend = ph->p_vaddr + ph->p_memsz;
> +         /* The next expected address is the page following this load
> +            segment.  */
> +         expected_load_address = ((allocend + GLRO(dl_pagesize) - 1)
> +                                  & ~(GLRO(dl_pagesize) - 1));
> +       }
> +  }
>
>
>    /* PT_GNU_RELRO is usually the last phdr.  */
>
> base-commit: d2f6ceaccbae2f645075dedad2b762896da1ec04
>
>
  
Florian Weimer July 4, 2024, 7:20 a.m. UTC | #2
* ci notify:

> In glibc_check master-aarch64 after:
>
>   | glibc patch https://patchwork.sourceware.org/patch/93298
>   | Author: Florian Weimer <fweimer@redhat.com>
>   | Date:   Wed Jul 3 17:15:41 2024 +0200
>   | 
>   |     elf: Handle ld.so with LOAD segment gaps in _dl_find_object (bug 31943)
>   |     
>   |     As ld.so is loaded by the kernel, gaps are not filled by the initial
>   |     mapping. Check the program headers for a gap, and if there is one,
>   |     handle a non-contiguous ld.so link map similar to a non-contiguous
>   |     main link map in _dlfo_process_initial.
>   |     
>   | ... 7 lines of the commit log omitted.
>   | ... applied on top of baseline commit:
>   | 2b92982e236 nptl: fix potential merge of __rseq_* relro symbols
>
> FAIL: 1 regressions
>
> regressions.sum:
> 		=== glibc tests ===
>
> Running glibc:elf ...
> FAIL: elf/tst-valgrind-smoke 
>
>
> You can find the failure logs in *.log.1.xz files in
>  - https://ci.linaro.org/job/tcwg_glibc_check--master-aarch64-precommit/2016/artifact/artifacts/artifacts.precommit/00-sumfiles/
> The full lists of regressions and progressions as well as configure and make commands are in
>  - https://ci.linaro.org/job/tcwg_glibc_check--master-aarch64-precommit/2016/artifact/artifacts/artifacts.precommit/notify/
> The list of [ignored] baseline and flaky failures are in
>  - https://ci.linaro.org/job/tcwg_glibc_check--master-aarch64-precommit/2016/artifact/artifacts/artifacts.precommit/sumfiles/xfails.xfail

The error is:

==1009133== Invalid read of size 4
==1009133==    at 0x11F6FC: dl_main (rtld.c:1778)
==1009133==    by 0x11CAE7: _dl_sysdep_start (dl-sysdep.c:141)
==1009133==    by 0x11DF5F: _dl_start_final (rtld.c:494)
==1009133==    by 0x11DF5F: _dl_start (rtld.c:581)
==1009133==    by 0x121CD3: (below main) (dl-start.S:30)
==1009133==  Address 0x12f020 is not stack'd, malloc'd or (recently) free'd
==1009133== 
==1009133== 
==1009133== Process terminating with default action of signal 11 (SIGSEGV): dumping core
==1009133==  Access not within mapped region at address 0x12F020
==1009133==    at 0x11F6FC: dl_main (rtld.c:1778)
==1009133==    by 0x11CAE7: _dl_sysdep_start (dl-sysdep.c:141)
==1009133==    by 0x11DF5F: _dl_start_final (rtld.c:494)
==1009133==    by 0x11DF5F: _dl_start (rtld.c:581)
==1009133==    by 0x121CD3: (below main) (dl-start.S:30)
==1009133==  If you believe this happened as a result of a stack
==1009133==  overflow in your program's main thread (unlikely but
==1009133==  possible), you can try to increase the size of the
==1009133==  main thread stack using the --main-stacksize= flag.
==1009133==  The main thread stack size used in this run was 8388608.
==1034959== Invalid read of size 4
==1034959==    at 0x11F6FC: dl_main (rtld.c:1778)
==1034959==    by 0x11CAE7: _dl_sysdep_start (dl-sysdep.c:141)
==1034959==    by 0x11DF5F: _dl_start_final (rtld.c:494)
==1034959==    by 0x11DF5F: _dl_start (rtld.c:581)
==1034959==    by 0x121CD3: (below main) (dl-start.S:30)
==1034959==  Address 0x12f020 is not stack'd, malloc'd or (recently) free'd
==1034959== 
==1034959== 
==1034959== Process terminating with default action of signal 11 (SIGSEGV): dumping core
==1034959==  Access not within mapped region at address 0x12F020
==1034959==    at 0x11F6FC: dl_main (rtld.c:1778)
==1034959==    by 0x11CAE7: _dl_sysdep_start (dl-sysdep.c:141)
==1034959==    by 0x11DF5F: _dl_start_final (rtld.c:494)
==1034959==    by 0x11DF5F: _dl_start (rtld.c:581)
==1034959==    by 0x121CD3: (below main) (dl-start.S:30)
==1034959==  If you believe this happened as a result of a stack
==1034959==  overflow in your program's main thread (unlikely but
==1034959==  possible), you can try to increase the size of the
==1034959==  main thread stack using the --main-stacksize= flag.
==1034959==  The main thread stack size used in this run was 8388608.

It's a silly typo in the loop condition:

@@ -1773,7 +1773,7 @@ dl_main (const ElfW(Phdr) *phdr,
      See similar logic in rtld_setup_main_map.  */
   {
     ElfW(Addr) expected_load_address = 0;
-    for (const ElfW(Phdr) *ph = rtld_phdr; ph < &phdr[rtld_ehdr->e_phnum];
+    for (const ElfW(Phdr) *ph = rtld_phdr; ph < &rtld_phdr[rtld_ehdr->e_phnum];
 	 ++ph)
       if (ph->p_type == PT_LOAD)
 	{

Thanks,
Florian
  
Florian Weimer July 5, 2024, 10:44 a.m. UTC | #3
* H. J. Lu:

> On Wed, Jul 3, 2024, 11:16 PM Florian Weimer <fweimer@redhat.com> wrote:
>
>  As ld.so is loaded by the kernel, gaps are not filled by the initial
>  mapping. Check the program headers for a gap, and if there is one,
>  handle a non-contiguous ld.so link map similar to a non-contiguous
>  main link map in _dlfo_process_initial.
>
>  Ideally, the extra code should not be compiled in if the binutils is
>  recent enough and used in a configuration that does not produce gaps.
>  For example, binutils 2.39 and later have commit 9833b7757d246f22db4
>  ("PR28824, relro security issues"), which should avoid creation of
>  gaps on the x86 architecture.
>
> We should add a linker test to check if this is needed.

I created a version with a linker version check, but I'm not entirely
sure the bug is indeed fixed, especially for targets that build to 64K
pages, but can run also on 4K page kernels.

Thanks,
Florian
  

Patch

diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
index 449302eda3..056fbda64c 100644
--- a/elf/dl-find_object.c
+++ b/elf/dl-find_object.c
@@ -466,6 +466,38 @@  __dl_find_object (void *pc1, struct dl_find_object *result)
 hidden_def (__dl_find_object)
 weak_alias (__dl_find_object, _dl_find_object)
 
+/* Subroutine of _dlfo_process_initial to split out noncontigous link
+   maps.  NODELETE is the number of used _dlfo_nodelete_mappings
+   elements.  It is incremented as needed, and the new NODELETE value
+   is returned.  */
+static size_t
+_dlfo_process_initial_noncontiguous_map (struct link_map *map,
+                                         size_t nodelete)
+{
+  struct dl_find_object_internal dlfo;
+  _dl_find_object_from_map (map, &dlfo);
+
+  /* PT_LOAD segments for a non-contiguous link map are added to the
+     non-closeable mappings.  */
+  const ElfW(Phdr) *ph = map->l_phdr;
+  const ElfW(Phdr) *ph_end = map->l_phdr + map->l_phnum;
+  for (; ph < ph_end; ++ph)
+    if (ph->p_type == PT_LOAD)
+      {
+        if (_dlfo_nodelete_mappings != NULL)
+          {
+            /* Second pass only.  */
+            _dlfo_nodelete_mappings[nodelete] = dlfo;
+            _dlfo_nodelete_mappings[nodelete].map_start
+              = ph->p_vaddr + map->l_addr;
+            _dlfo_nodelete_mappings[nodelete].map_end
+              = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz;
+          }
+        ++nodelete;
+      }
+  return nodelete;
+}
+
 /* _dlfo_process_initial is called twice.  First to compute the array
    sizes from the initial loaded mappings.  Second to fill in the
    bases and infos arrays with the (still unsorted) data.  Returns the
@@ -477,29 +509,8 @@  _dlfo_process_initial (void)
 
   size_t nodelete = 0;
   if (!main_map->l_contiguous)
-    {
-      struct dl_find_object_internal dlfo;
-      _dl_find_object_from_map (main_map, &dlfo);
-
-      /* PT_LOAD segments for a non-contiguous are added to the
-         non-closeable mappings.  */
-      for (const ElfW(Phdr) *ph = main_map->l_phdr,
-             *ph_end = main_map->l_phdr + main_map->l_phnum;
-           ph < ph_end; ++ph)
-        if (ph->p_type == PT_LOAD)
-          {
-            if (_dlfo_nodelete_mappings != NULL)
-              {
-                /* Second pass only.  */
-                _dlfo_nodelete_mappings[nodelete] = dlfo;
-                _dlfo_nodelete_mappings[nodelete].map_start
-                  = ph->p_vaddr + main_map->l_addr;
-                _dlfo_nodelete_mappings[nodelete].map_end
-                  = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz;
-              }
-            ++nodelete;
-          }
-    }
+    /* Contiguous case already handled in _dl_find_object_init.  */
+    nodelete = _dlfo_process_initial_noncontiguous_map (main_map, nodelete);
 
   size_t loaded = 0;
   for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns)
@@ -511,11 +522,20 @@  _dlfo_process_initial (void)
           /* lt_library link maps are implicitly NODELETE.  */
           if (l->l_type == lt_library || l->l_nodelete_active)
             {
-              if (_dlfo_nodelete_mappings != NULL)
-                /* Second pass only.  */
-                _dl_find_object_from_map
-                  (l, _dlfo_nodelete_mappings + nodelete);
-              ++nodelete;
+#ifdef SHARED
+              /* The kernel may have loaded ld.so with gaps.   */
+              if (!l->l_contiguous && l == &GL(dl_rtld_map))
+                nodelete
+                  = _dlfo_process_initial_noncontiguous_map (l, nodelete);
+              else
+#endif
+                {
+                  if (_dlfo_nodelete_mappings != NULL)
+                    /* Second pass only.  */
+                    _dl_find_object_from_map
+                      (l, _dlfo_nodelete_mappings + nodelete);
+                  ++nodelete;
+                }
             }
           else if (l->l_type == lt_loaded)
             {
diff --git a/elf/rtld.c b/elf/rtld.c
index bfdf632e77..388ad456ac 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1766,6 +1766,28 @@  dl_main (const ElfW(Phdr) *phdr,
 
   GL(dl_rtld_map).l_phdr = rtld_phdr;
   GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum;
+  GL(dl_rtld_map).l_contiguous = 1;
+  /* The linker may not have produced a contiguous object.  The kernel
+     will load the object with actual gaps (unlike the glibc loader
+     for shared objects, which always produces a contiguous mapping).
+     See similar logic in rtld_setup_main_map.  */
+  {
+    ElfW(Addr) expected_load_address = 0;
+    for (const ElfW(Phdr) *ph = rtld_phdr; ph < &phdr[rtld_ehdr->e_phnum];
+	 ++ph)
+      if (ph->p_type == PT_LOAD)
+	{
+	  ElfW(Addr) mapstart = ph->p_vaddr & ~(GLRO(dl_pagesize) - 1);
+	  if (GL(dl_rtld_map).l_contiguous && expected_load_address != 0
+	      && expected_load_address != mapstart)
+	    GL(dl_rtld_map).l_contiguous = 0;
+	  ElfW(Addr) allocend = ph->p_vaddr + ph->p_memsz;
+	  /* The next expected address is the page following this load
+	     segment.  */
+	  expected_load_address = ((allocend + GLRO(dl_pagesize) - 1)
+				   & ~(GLRO(dl_pagesize) - 1));
+	}
+  }
 
 
   /* PT_GNU_RELRO is usually the last phdr.  */