[02/13] elf: switch _dl_map_segment() to anonymous mapping

Message ID 20230318165110.3672749-3-stsp2@yandex.ru
State Superseded
Headers
Series implement dlmem() function |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

stsp March 18, 2023, 4:50 p.m. UTC
  _dl_map_segment() was mapping entire file image and then was skipping
the load of the first segment. Switch _dl_map_segment() to anonymous
mapping and do not skip the map of the first segment.

Use PROT_READ|PROT_WRITE as a protection. _dl_map_segments() later
sets the proper protection for both file-mapped and anonymous parts.

The test-suite was run on x86_64/64 and showed no regressions.

Signed-off-by: Stas Sergeev <stsp2@yandex.ru>
---
 elf/dl-map-segments.h | 73 +++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 34 deletions(-)
  

Comments

Adhemerval Zanella Netto March 29, 2023, 5:01 p.m. UTC | #1
On 18/03/23 13:50, Stas Sergeev via Libc-alpha wrote:
> _dl_map_segment() was mapping entire file image and then was skipping
> the load of the first segment. Switch _dl_map_segment() to anonymous
> mapping and do not skip the map of the first segment.
> 
> Use PROT_READ|PROT_WRITE as a protection. _dl_map_segments() later
> sets the proper protection for both file-mapped and anonymous parts.
> 
> The test-suite was run on x86_64/64 and showed no regressions.
> 
> Signed-off-by: Stas Sergeev <stsp2@yandex.ru>
> ---
>  elf/dl-map-segments.h | 73 +++++++++++++++++++++++--------------------
>  1 file changed, 39 insertions(+), 34 deletions(-)
> 
> diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
> index 504cfc0a41..9af8cae188 100644
> --- a/elf/dl-map-segments.h
> +++ b/elf/dl-map-segments.h
> @@ -22,18 +22,26 @@
>  /* Map a segment and align it properly.  */
>  
>  static __always_inline ElfW(Addr)
> -_dl_map_segment (const struct loadcmd *c, ElfW(Addr) mappref,
> -		 const size_t maplength, int fd)
> +_dl_map_segment (ElfW(Addr) mappref, size_t maplength, size_t mapalign)
>  {
> -  if (__glibc_likely (c->mapalign <= GLRO(dl_pagesize)))
> -    return (ElfW(Addr)) __mmap ((void *) mappref, maplength, c->prot,
> -				MAP_COPY|MAP_FILE, fd, c->mapoff);
> +  int err;
> +  unsigned map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
> +  unsigned prot = PROT_READ | PROT_WRITE;

glibc code guidelines [1] suggest to explicit define the types, so
'unsigned int' here.

[1] https://sourceware.org/glibc/wiki/Style_and_Conventions

> +
> +#ifdef MAP_DENYWRITE
> +  /* Tell mmap() that we are mapping solib. This flag enables things
> +     like LD_PREFER_MAP_32BIT_EXEC. */
> +  map_flags |= MAP_DENYWRITE;
> +#endif

Why do you need o add MAP_DENYWRITE? They are complete ignored by Linux,
as stated in:

include/linux/mman.h
 31 /*
 32  * The historical set of flags that all mmap implementations implicitly
 33  * support when a ->mmap_validate() op is not provided in file_operations.
 34  *
 35  * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the
 36  * kernel.
 37  */
 38 #define LEGACY_MAP_MASK (MAP_SHARED \

(if you grep in Linux source code you will see there are only defined for
historical/compatibility reasons, there is indeed no code that actually uses
it).

> +  if (__glibc_likely (mapalign <= GLRO(dl_pagesize)))
> +    return (ElfW(Addr)) __mmap ((void *) mappref, maplength, prot,
> +				map_flags, -1, 0);
>  
>    /* If the segment alignment > the page size, alocate enough space to
>       ensure that the segment can be properly aligned.  */
> -  ElfW(Addr) maplen = (maplength >= c->mapalign
> -		       ? (maplength + c->mapalign)
> -		       : (2 * c->mapalign));
> +  ElfW(Addr) maplen = (maplength >= mapalign
> +		       ? (maplength + mapalign)
> +		       : (2 * mapalign));
>    ElfW(Addr) map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplen,
>  					      PROT_NONE,
>  					      MAP_ANONYMOUS|MAP_PRIVATE,
> @@ -41,26 +49,24 @@ _dl_map_segment (const struct loadcmd *c, ElfW(Addr) mappref,
>    if (__glibc_unlikely ((void *) map_start == MAP_FAILED))
>      return map_start;
>  
> -  ElfW(Addr) map_start_aligned = ALIGN_UP (map_start, c->mapalign);
> -  map_start_aligned = (ElfW(Addr)) __mmap ((void *) map_start_aligned,
> -					   maplength, c->prot,
> -					   MAP_COPY|MAP_FILE|MAP_FIXED,
> -					   fd, c->mapoff);
> -  if (__glibc_unlikely ((void *) map_start_aligned == MAP_FAILED))
> -    __munmap ((void *) map_start, maplen);
> -  else
> +  ElfW(Addr) map_start_aligned = ALIGN_UP (map_start, mapalign);
> +  err = __mprotect ((void *) map_start_aligned, maplength, prot);
> +  if (__glibc_unlikely (err))
>      {
> -      /* Unmap the unused regions.  */
> -      ElfW(Addr) delta = map_start_aligned - map_start;
> -      if (delta)
> -	__munmap ((void *) map_start, delta);
> -      ElfW(Addr) map_end = map_start_aligned + maplength;
> -      map_end = ALIGN_UP (map_end, GLRO(dl_pagesize));
> -      delta = map_start + maplen - map_end;
> -      if (delta)
> -	__munmap ((void *) map_end, delta);
> +      __munmap ((void *) map_start, maplen);
> +      return (ElfW(Addr)) MAP_FAILED;
>      }
>  
> +  /* Unmap the unused regions.  */
> +  ElfW(Addr) delta = map_start_aligned - map_start;
> +  if (delta)
> +    __munmap ((void *) map_start, delta);
> +  ElfW(Addr) map_end = map_start_aligned + maplength;
> +  map_end = ALIGN_UP (map_end, GLRO(dl_pagesize));
> +  delta = map_start + maplen - map_end;
> +  if (delta)
> +    __munmap ((void *) map_end, delta);
> +
>    return map_start_aligned;
>  }
>  

So basically it would add another mmap on program loading.  For instance, loading
a simple empty main programs:

* Before:

  openat(AT_FDCWD, "./main", O_RDONLY|O_CLOEXEC) = 3
  mmap(NULL, 16408, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9d922a9000
  mmap(0x7f9d922aa000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f9d922aa000
  mmap(0x7f9d922ab000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f9d922ab000
  mmap(0x7f9d922ac000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f9d922ac000
  [...]
  openat(AT_FDCWD, "/home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
  mmap(NULL, 2080624, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9d920ab000
  mprotect(0x7f9d920d1000, 1847296, PROT_NONE) = 0
  mmap(0x7f9d920d1000, 1490944, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f9d920d1000
  mmap(0x7f9d9223d000, 352256, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x192000) = 0x7f9d9223d000
  mmap(0x7f9d92294000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e8000) = 0x7f9d92294000
  mmap(0x7f9d9229a000, 53104, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9d9229a000

* With this patch:

  openat(AT_FDCWD, "./main", O_RDONLY|O_CLOEXEC) = 3
  mmap(NULL, 16408, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_DENYWRITE, -1, 0) = 0x7f705da76000
  mmap(0x7f705da76000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0) = 0x7f705da76000
  mmap(0x7f705da77000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f705da77000
  mmap(0x7f705da78000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f705da78000
  mmap(0x7f705da79000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f705da79000
  [...]
  openat(AT_FDCWD, "/home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
  mmap(NULL, 2080624, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_DENYWRITE, -1, 0) = 0x7f705d878000
  mprotect(0x7f705d89e000, 1847296, PROT_NONE) = 0
  mmap(0x7f705d878000, 155648, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0) = 0x7f705d878000
  mmap(0x7f705d89e000, 1490944, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f705d89e000
  mmap(0x7f705da0a000, 352256, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x192000) = 0x7f705da0a000
  mmap(0x7f705da61000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e8000) = 0x7f705da61000
  mmap(0x7f705da67000, 53104, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f705da67000

And it also slight change the mapping, using the same program:

* Before:

      0x7ffff7dc2000     0x7ffff7de8000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7de8000     0x7ffff7f54000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7f54000     0x7ffff7faa000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7faa000     0x7ffff7fab000     0x1000   0x1e8000  ---p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7fab000     0x7ffff7faf000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7faf000     0x7ffff7fb1000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so

* With this patch:

      0x7ffff7dc1000     0x7ffff7de7000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7de7000     0x7ffff7f53000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7f53000     0x7ffff7fa9000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
      0x7ffff7faa000     0x7ffff7fae000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
      0x7ffff7fae000     0x7ffff7fb0000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so

So I am not seeing any advantage of this refactor: it slight increase the
number of syscalls for library loading and changes the 'debuggability' of
resulting shared library maps.

> @@ -98,7 +104,7 @@ _dl_map_segments (struct link_map *l, int fd,
>             - MAP_BASE_ADDR (l));
>  
>        /* Remember which part of the address space this object uses.  */
> -      l->l_map_start = _dl_map_segment (c, mappref, maplength, fd);
> +      l->l_map_start = _dl_map_segment (mappref, maplength, c->mapalign);
>        if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
>          return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
>  
> @@ -123,14 +129,14 @@ _dl_map_segments (struct link_map *l, int fd,
>          }
>  
>        l->l_contiguous = 1;
> -
> -      goto postmap;
>      }
> -
> -  /* Remember which part of the address space this object uses.  */
> -  l->l_map_start = c->mapstart + l->l_addr;
> -  l->l_map_end = l->l_map_start + maplength;
> -  l->l_contiguous = !has_holes;
> +  else
> +    {
> +      /* Remember which part of the address space this object uses.  */
> +      l->l_map_start = c->mapstart + l->l_addr;
> +      l->l_map_end = l->l_map_start + maplength;
> +      l->l_contiguous = !has_holes;
> +    }
>  
>    while (c < &loadcmds[nloadcmds])
>      {
> @@ -143,7 +149,6 @@ _dl_map_segments (struct link_map *l, int fd,
>                == MAP_FAILED))
>          return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
>  
> -    postmap:
>        _dl_postprocess_loadcmd (l, header, c);
>  
>        if (c->allocend > c->dataend)
  
stsp March 29, 2023, 6 p.m. UTC | #2
29.03.2023 22:01, Adhemerval Zanella Netto пишет:
> glibc code guidelines [1] suggest to explicit define the types, so
> 'unsigned int' here.

Fixed locally this and a few other
similar instances. Queued to v10.


> [1] https://sourceware.org/glibc/wiki/Style_and_Conventions
>
>> +
>> +#ifdef MAP_DENYWRITE
>> +  /* Tell mmap() that we are mapping solib. This flag enables things
>> +     like LD_PREFER_MAP_32BIT_EXEC. */
>> +  map_flags |= MAP_DENYWRITE;
>> +#endif
> Why do you need o add MAP_DENYWRITE? They are complete ignored by Linux,

But its not ignored in glibc, see

sysdeps/unix/sysv/linux/x86_64/64/mmap_internal.h

Without that flag PREFER_MAP_32BIT_EXEC
test fails.


> So basically it would add another mmap on program loading.  For instance, loading
> a simple empty main programs:

Yes, that's true.
Is this a problem?


> And it also slight change the mapping, using the same program:
>
> * Before:
>
>        0x7ffff7dc2000     0x7ffff7de8000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7de8000     0x7ffff7f54000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7f54000     0x7ffff7faa000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7faa000     0x7ffff7fab000     0x1000   0x1e8000  ---p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7fab000     0x7ffff7faf000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7faf000     0x7ffff7fb1000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>
> * With this patch:
>
>        0x7ffff7dc1000     0x7ffff7de7000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7de7000     0x7ffff7f53000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7f53000     0x7ffff7fa9000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>        0x7ffff7faa000     0x7ffff7fae000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>        0x7ffff7fae000     0x7ffff7fb0000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so

Mm, was staring on this for a while,
and file offsets and perms looks the
same. What differences do you mean
exactly?

> So I am not seeing any advantage of this refactor: it slight increase the
> number of syscalls for library loading and changes the 'debuggability' of
> resulting shared library maps.

This is not a "pure refactor", but a preparation
to dlmem(). dlmem() allows the user to pre-map
the memory area (optionally), so in the subsequent
patches I first call the user's callback, and if its
not there or doesn't want to pre-map, then I call
_dl_load_segment() for doing the same thing as
a fall-back.

Do you want me to squash also that patch
into the one that actually needs it?
  
Adhemerval Zanella Netto March 29, 2023, 6:29 p.m. UTC | #3
On 29/03/23 15:00, stsp wrote:
> 
> 29.03.2023 22:01, Adhemerval Zanella Netto пишет:
>> glibc code guidelines [1] suggest to explicit define the types, so
>> 'unsigned int' here.
> 
> Fixed locally this and a few other
> similar instances. Queued to v10.
> 
> 
>> [1] https://sourceware.org/glibc/wiki/Style_and_Conventions
>>
>>> +
>>> +#ifdef MAP_DENYWRITE
>>> +  /* Tell mmap() that we are mapping solib. This flag enables things
>>> +     like LD_PREFER_MAP_32BIT_EXEC. */
>>> +  map_flags |= MAP_DENYWRITE;
>>> +#endif
>> Why do you need o add MAP_DENYWRITE? They are complete ignored by Linux,
> 
> But its not ignored in glibc, see
> 
> sysdeps/unix/sysv/linux/x86_64/64/mmap_internal.h
> 
> Without that flag PREFER_MAP_32BIT_EXEC
> test fails.

Because elf/dl-load.h already defines MAP_COPY that handles it, so why
not use it instead?

> 
> 
>> So basically it would add another mmap on program loading.  For instance, loading
>> a simple empty main programs:
> 
> Yes, that's true.
> Is this a problem?
> 
> 

Yes, Linux limits a maximum mmap both per process [1].  This code increase both
the total mapping requires and runtime cost to setup a new shared library.

[1] https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/sysctl/vm.rst#max_map_count


>> And it also slight change the mapping, using the same program:
>>
>> * Before:
>>
>>        0x7ffff7dc2000     0x7ffff7de8000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7de8000     0x7ffff7f54000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7f54000     0x7ffff7faa000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7faa000     0x7ffff7fab000     0x1000   0x1e8000  ---p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7fab000     0x7ffff7faf000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7faf000     0x7ffff7fb1000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>
>> * With this patch:
>>
>>        0x7ffff7dc1000     0x7ffff7de7000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7de7000     0x7ffff7f53000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7f53000     0x7ffff7fa9000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>>        0x7ffff7faa000     0x7ffff7fae000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>        0x7ffff7fae000     0x7ffff7fb0000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
> 
> Mm, was staring on this for a while,
> and file offsets and perms looks the
> same. What differences do you mean
> exactly?

The PROT_NONE mapping now does not have a file associated:

  0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p

This is not a problem itself, but again this change decrease the information
that some tools might use to analyze the memory mapping.

> 
>> So I am not seeing any advantage of this refactor: it slight increase the
>> number of syscalls for library loading and changes the 'debuggability' of
>> resulting shared library maps.
> 
> This is not a "pure refactor", but a preparation
> to dlmem(). dlmem() allows the user to pre-map
> the memory area (optionally), so in the subsequent
> patches I first call the user's callback, and if its
> not there or doesn't want to pre-map, then I call
> _dl_load_segment() for doing the same thing as
> a fall-back.
> 
> Do you want me to squash also that patch
> into the one that actually needs it?
> 

No need, this patch as-is is nack.  This patch has only cons: it increases runtime
cost for generic case, increase the minimum number of mmap segment a shared library
required, and decrease debuggability.
  
stsp March 29, 2023, 6:46 p.m. UTC | #4
29.03.2023 23:29, Adhemerval Zanella Netto пишет:
> On 29/03/23 15:00, stsp wrote:
>> But its not ignored in glibc, see
>>
>> sysdeps/unix/sysv/linux/x86_64/64/mmap_internal.h
>>
>> Without that flag PREFER_MAP_32BIT_EXEC
>> test fails.
> Because elf/dl-load.h already defines MAP_COPY that handles it, so why
> not use it instead?

Would MAP_COPY be a good choice for
explicitly anonymous mapping? If so -
can change.

>>> So basically it would add another mmap on program loading.  For instance, loading
>>> a simple empty main programs:
>> Yes, that's true.
>> Is this a problem?
>>
>>
> Yes, Linux limits a maximum mmap both per process [1].  This code increase both
> the total mapping requires and runtime cost to setup a new shared library.
>
> [1] https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/sysctl/vm.rst#max_map_count

This talks about the map areas.
I don't think map areas number changed.
Extra syscall - yes. Extra map area - no.
So I don't think my patch is a subject of
the aforementioned system limit.

>>> And it also slight change the mapping, using the same program:
>>>
>>> * Before:
>>>
>>>         0x7ffff7dc2000     0x7ffff7de8000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7de8000     0x7ffff7f54000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7f54000     0x7ffff7faa000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7faa000     0x7ffff7fab000     0x1000   0x1e8000  ---p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7fab000     0x7ffff7faf000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7faf000     0x7ffff7fb1000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>
>>> * With this patch:
>>>
>>>         0x7ffff7dc1000     0x7ffff7de7000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7de7000     0x7ffff7f53000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7f53000     0x7ffff7fa9000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>>>         0x7ffff7faa000     0x7ffff7fae000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>         0x7ffff7fae000     0x7ffff7fb0000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>> Mm, was staring on this for a while,
>> and file offsets and perms looks the
>> same. What differences do you mean
>> exactly?
> The PROT_NONE mapping now does not have a file associated:
>
>    0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>
> This is not a problem itself, but again this change decrease the information
> that some tools might use to analyze the memory mapping.

Ah, that seems to be a "hole" are
between segments. I actually think
my handling is much better. Without
my patch, such holes are filled with
actually the _random_ page from the
original file mapping. Just whatever
page happened to have that offset.
Do you think the random page from
the file is a good idea for tooling/debugging?
  
Adhemerval Zanella Netto March 29, 2023, 7:17 p.m. UTC | #5
On 29/03/23 15:46, stsp wrote:
> 
> 29.03.2023 23:29, Adhemerval Zanella Netto пишет:
>> On 29/03/23 15:00, stsp wrote:
>>> But its not ignored in glibc, see
>>>
>>> sysdeps/unix/sysv/linux/x86_64/64/mmap_internal.h
>>>
>>> Without that flag PREFER_MAP_32BIT_EXEC
>>> test fails.
>> Because elf/dl-load.h already defines MAP_COPY that handles it, so why
>> not use it instead?
> 
> Would MAP_COPY be a good choice for
> explicitly anonymous mapping? If so -
> can change.

It avoid code duplication.

> 
>>>> So basically it would add another mmap on program loading.  For instance, loading
>>>> a simple empty main programs:
>>> Yes, that's true.
>>> Is this a problem?
>>>
>>>
>> Yes, Linux limits a maximum mmap both per process [1].  This code increase both
>> the total mapping requires and runtime cost to setup a new shared library.
>>
>> [1] https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/sysctl/vm.rst#max_map_count
> 
> This talks about the map areas.
> I don't think map areas number changed.
> Extra syscall - yes. Extra map area - no.
> So I don't think my patch is a subject of
> the aforementioned system limit.

In fact I think this is gdb limitation, accessing the procfs directly there is
no extra mapping and all the segments are indeed mapped by associated shared
libraries.

> 
>>>> And it also slight change the mapping, using the same program:
>>>>
>>>> * Before:
>>>>
>>>>         0x7ffff7dc2000     0x7ffff7de8000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7de8000     0x7ffff7f54000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7f54000     0x7ffff7faa000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7faa000     0x7ffff7fab000     0x1000   0x1e8000  ---p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7fab000     0x7ffff7faf000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7faf000     0x7ffff7fb1000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>
>>>> * With this patch:
>>>>
>>>>         0x7ffff7dc1000     0x7ffff7de7000    0x26000        0x0  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7de7000     0x7ffff7f53000   0x16c000    0x26000  r-xp   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7f53000     0x7ffff7fa9000    0x56000   0x192000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>>>>         0x7ffff7faa000     0x7ffff7fae000     0x4000   0x1e8000  r--p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>>>         0x7ffff7fae000     0x7ffff7fb0000     0x2000   0x1ec000  rw-p   /home/azanella/Projects/glibc/build/x86_64-linux-gnu/libc.so
>>> Mm, was staring on this for a while,
>>> and file offsets and perms looks the
>>> same. What differences do you mean
>>> exactly?
>> The PROT_NONE mapping now does not have a file associated:
>>
>>    0x7ffff7fa9000     0x7ffff7faa000     0x1000        0x0  ---p
>>
>> This is not a problem itself, but again this change decrease the information
>> that some tools might use to analyze the memory mapping.
> 
> Ah, that seems to be a "hole" are
> between segments. I actually think
> my handling is much better. Without
> my patch, such holes are filled with
> actually the _random_ page from the
> original file mapping. Just whatever
> page happened to have that offset.
> Do you think the random page from
> the file is a good idea for tooling/debugging?

It seems to be a gdb limitation that is showing some wrong information.  But again,
I really don't see *why* this change is needed: the current algorithms already
maps the ELF segments correctly and have random data on the hole does not really
matter (it would be mapped as PROT_NONE).
  
stsp March 29, 2023, 7:43 p.m. UTC | #6
30.03.2023 00:17, Adhemerval Zanella Netto пишет:
> On 29/03/23 15:46, stsp wrote:
>
>> Would MAP_COPY be a good choice for
>> explicitly anonymous mapping? If so -
>> can change.
> It avoid code duplication.

Queued for v10.

>>
>> Ah, that seems to be a "hole" are
>> between segments. I actually think
>> my handling is much better. Without
>> my patch, such holes are filled with
>> actually the _random_ page from the
>> original file mapping. Just whatever
>> page happened to have that offset.
>> Do you think the random page from
>> the file is a good idea for tooling/debugging?
> It seems to be a gdb limitation that is showing some wrong information.  But again,
> I really don't see *why* this change is needed: the current algorithms already
> maps the ELF segments correctly and have random data on the hole does not really
> matter (it would be mapped as PROT_NONE).
Its only for dlmem().
In dlmem() you need to do anonymous
mmap at the relocation address, and
after that we shouldn't skip the first
segment.
I can remove this patch if you insist, but
the cost will likely be quite high, as I'll
need then to propagate some flag up to
_dl_map_segments() to tell if we can
skip the first segment or not, and if
we should call _dl_map_segment() or
a premap callback.
  

Patch

diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
index 504cfc0a41..9af8cae188 100644
--- a/elf/dl-map-segments.h
+++ b/elf/dl-map-segments.h
@@ -22,18 +22,26 @@ 
 /* Map a segment and align it properly.  */
 
 static __always_inline ElfW(Addr)
-_dl_map_segment (const struct loadcmd *c, ElfW(Addr) mappref,
-		 const size_t maplength, int fd)
+_dl_map_segment (ElfW(Addr) mappref, size_t maplength, size_t mapalign)
 {
-  if (__glibc_likely (c->mapalign <= GLRO(dl_pagesize)))
-    return (ElfW(Addr)) __mmap ((void *) mappref, maplength, c->prot,
-				MAP_COPY|MAP_FILE, fd, c->mapoff);
+  int err;
+  unsigned map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
+  unsigned prot = PROT_READ | PROT_WRITE;
+
+#ifdef MAP_DENYWRITE
+  /* Tell mmap() that we are mapping solib. This flag enables things
+     like LD_PREFER_MAP_32BIT_EXEC. */
+  map_flags |= MAP_DENYWRITE;
+#endif
+  if (__glibc_likely (mapalign <= GLRO(dl_pagesize)))
+    return (ElfW(Addr)) __mmap ((void *) mappref, maplength, prot,
+				map_flags, -1, 0);
 
   /* If the segment alignment > the page size, allocate enough space to
      ensure that the segment can be properly aligned.  */
-  ElfW(Addr) maplen = (maplength >= c->mapalign
-		       ? (maplength + c->mapalign)
-		       : (2 * c->mapalign));
+  ElfW(Addr) maplen = (maplength >= mapalign
+		       ? (maplength + mapalign)
+		       : (2 * mapalign));
   ElfW(Addr) map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplen,
 					      PROT_NONE,
 					      MAP_ANONYMOUS|MAP_PRIVATE,
@@ -41,26 +49,24 @@  _dl_map_segment (const struct loadcmd *c, ElfW(Addr) mappref,
   if (__glibc_unlikely ((void *) map_start == MAP_FAILED))
     return map_start;
 
-  ElfW(Addr) map_start_aligned = ALIGN_UP (map_start, c->mapalign);
-  map_start_aligned = (ElfW(Addr)) __mmap ((void *) map_start_aligned,
-					   maplength, c->prot,
-					   MAP_COPY|MAP_FILE|MAP_FIXED,
-					   fd, c->mapoff);
-  if (__glibc_unlikely ((void *) map_start_aligned == MAP_FAILED))
-    __munmap ((void *) map_start, maplen);
-  else
+  ElfW(Addr) map_start_aligned = ALIGN_UP (map_start, mapalign);
+  err = __mprotect ((void *) map_start_aligned, maplength, prot);
+  if (__glibc_unlikely (err))
     {
-      /* Unmap the unused regions.  */
-      ElfW(Addr) delta = map_start_aligned - map_start;
-      if (delta)
-	__munmap ((void *) map_start, delta);
-      ElfW(Addr) map_end = map_start_aligned + maplength;
-      map_end = ALIGN_UP (map_end, GLRO(dl_pagesize));
-      delta = map_start + maplen - map_end;
-      if (delta)
-	__munmap ((void *) map_end, delta);
+      __munmap ((void *) map_start, maplen);
+      return (ElfW(Addr)) MAP_FAILED;
     }
 
+  /* Unmap the unused regions.  */
+  ElfW(Addr) delta = map_start_aligned - map_start;
+  if (delta)
+    __munmap ((void *) map_start, delta);
+  ElfW(Addr) map_end = map_start_aligned + maplength;
+  map_end = ALIGN_UP (map_end, GLRO(dl_pagesize));
+  delta = map_start + maplen - map_end;
+  if (delta)
+    __munmap ((void *) map_end, delta);
+
   return map_start_aligned;
 }
 
@@ -98,7 +104,7 @@  _dl_map_segments (struct link_map *l, int fd,
            - MAP_BASE_ADDR (l));
 
       /* Remember which part of the address space this object uses.  */
-      l->l_map_start = _dl_map_segment (c, mappref, maplength, fd);
+      l->l_map_start = _dl_map_segment (mappref, maplength, c->mapalign);
       if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
         return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
 
@@ -123,14 +129,14 @@  _dl_map_segments (struct link_map *l, int fd,
         }
 
       l->l_contiguous = 1;
-
-      goto postmap;
     }
-
-  /* Remember which part of the address space this object uses.  */
-  l->l_map_start = c->mapstart + l->l_addr;
-  l->l_map_end = l->l_map_start + maplength;
-  l->l_contiguous = !has_holes;
+  else
+    {
+      /* Remember which part of the address space this object uses.  */
+      l->l_map_start = c->mapstart + l->l_addr;
+      l->l_map_end = l->l_map_start + maplength;
+      l->l_contiguous = !has_holes;
+    }
 
   while (c < &loadcmds[nloadcmds])
     {
@@ -143,7 +149,6 @@  _dl_map_segments (struct link_map *l, int fd,
               == MAP_FAILED))
         return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
 
-    postmap:
       _dl_postprocess_loadcmd (l, header, c);
 
       if (c->allocend > c->dataend)