[v3,13/19] elf: Remove LD_PROFILE for static binaries

Message ID 20231106202552.3404059-14-adhemerval.zanella@linaro.org (mailing list archive)
State Committed
Commit 55f41ef8de4a4d0c5762d78659e11202d3c765d4
Delegated to: Siddhesh Poyarekar
Headers
Series Improve loader environment variable handling |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_glibc_check--master-arm fail Patch failed to apply

Commit Message

Adhemerval Zanella Netto Nov. 6, 2023, 8:25 p.m. UTC
  The _dl_non_dynamic_init does not parse LD_PROFILE, which does not
enable profile for dlopen objects.  Since dlopen is deprecated for
static objects, it is better to remove the support.

It also allows to trim down libc.a of profile support.

Checked on x86_64-linux-gnu.
---
 elf/Makefile                              | 10 ++--
 elf/dl-load.c                             | 10 ++--
 elf/dl-runtime.c                          | 12 ++---
 elf/dl-support.c                          |  9 ----
 elf/{dl-profstub.c => libc-dl-profstub.c} |  0
 include/dlfcn.h                           |  5 ++
 sysdeps/aarch64/dl-machine.h              |  4 +-
 sysdeps/aarch64/dl-trampoline.S           |  2 +-
 sysdeps/alpha/dl-machine.h                |  6 ++-
 sysdeps/alpha/dl-trampoline.S             |  4 ++
 sysdeps/arm/dl-machine.h                  |  4 +-
 sysdeps/arm/dl-trampoline.S               |  2 +-
 sysdeps/hppa/dl-machine.h                 | 36 +++++++------
 sysdeps/hppa/dl-trampoline.S              |  2 +
 sysdeps/i386/dl-machine.h                 |  2 +
 sysdeps/i386/dl-trampoline.S              |  2 +-
 sysdeps/ia64/dl-machine.h                 | 10 ++--
 sysdeps/ia64/dl-trampoline.S              |  2 +-
 sysdeps/loongarch/dl-machine.h            |  6 ++-
 sysdeps/loongarch/dl-trampoline.h         |  2 +
 sysdeps/m68k/dl-machine.h                 |  4 +-
 sysdeps/m68k/dl-trampoline.S              |  2 +
 sysdeps/powerpc/powerpc32/dl-machine.c    |  2 +-
 sysdeps/powerpc/powerpc32/dl-machine.h    | 10 ++--
 sysdeps/powerpc/powerpc32/dl-trampoline.S |  2 +-
 sysdeps/powerpc/powerpc64/dl-machine.h    | 20 ++++---
 sysdeps/powerpc/powerpc64/dl-trampoline.S |  2 +-
 sysdeps/s390/s390-32/dl-machine.h         |  8 +--
 sysdeps/s390/s390-32/dl-trampoline.h      |  2 +-
 sysdeps/s390/s390-64/dl-machine.h         |  8 +--
 sysdeps/s390/s390-64/dl-trampoline.h      |  2 +-
 sysdeps/sh/dl-machine.h                   |  2 +
 sysdeps/sh/dl-trampoline.S                |  2 +
 sysdeps/sparc/sparc32/dl-machine.h        |  4 +-
 sysdeps/sparc/sparc32/dl-trampoline.S     |  2 +
 sysdeps/sparc/sparc64/dl-machine.h        |  4 +-
 sysdeps/sparc/sparc64/dl-trampoline.S     |  2 +
 sysdeps/x86_64/dl-machine.h               |  2 +
 sysdeps/x86_64/dl-trampoline.S            | 64 ++++++++++++-----------
 39 files changed, 165 insertions(+), 109 deletions(-)
 rename elf/{dl-profstub.c => libc-dl-profstub.c} (100%)
  

Comments

Siddhesh Poyarekar Nov. 20, 2023, 10:55 p.m. UTC | #1
On 2023-11-06 15:25, Adhemerval Zanella wrote:
> The _dl_non_dynamic_init does not parse LD_PROFILE, which does not
> enable profile for dlopen objects.  Since dlopen is deprecated for
> static objects, it is better to remove the support.
> 
> It also allows to trim down libc.a of profile support.
> 
> Checked on x86_64-linux-gnu.
> ---

LGTM, but please also do a bmg run to verify this change.

Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>

>   elf/Makefile                              | 10 ++--
>   elf/dl-load.c                             | 10 ++--
>   elf/dl-runtime.c                          | 12 ++---
>   elf/dl-support.c                          |  9 ----
>   elf/{dl-profstub.c => libc-dl-profstub.c} |  0
>   include/dlfcn.h                           |  5 ++
>   sysdeps/aarch64/dl-machine.h              |  4 +-
>   sysdeps/aarch64/dl-trampoline.S           |  2 +-
>   sysdeps/alpha/dl-machine.h                |  6 ++-
>   sysdeps/alpha/dl-trampoline.S             |  4 ++
>   sysdeps/arm/dl-machine.h                  |  4 +-
>   sysdeps/arm/dl-trampoline.S               |  2 +-
>   sysdeps/hppa/dl-machine.h                 | 36 +++++++------
>   sysdeps/hppa/dl-trampoline.S              |  2 +
>   sysdeps/i386/dl-machine.h                 |  2 +
>   sysdeps/i386/dl-trampoline.S              |  2 +-
>   sysdeps/ia64/dl-machine.h                 | 10 ++--
>   sysdeps/ia64/dl-trampoline.S              |  2 +-
>   sysdeps/loongarch/dl-machine.h            |  6 ++-
>   sysdeps/loongarch/dl-trampoline.h         |  2 +
>   sysdeps/m68k/dl-machine.h                 |  4 +-
>   sysdeps/m68k/dl-trampoline.S              |  2 +
>   sysdeps/powerpc/powerpc32/dl-machine.c    |  2 +-
>   sysdeps/powerpc/powerpc32/dl-machine.h    | 10 ++--
>   sysdeps/powerpc/powerpc32/dl-trampoline.S |  2 +-
>   sysdeps/powerpc/powerpc64/dl-machine.h    | 20 ++++---
>   sysdeps/powerpc/powerpc64/dl-trampoline.S |  2 +-
>   sysdeps/s390/s390-32/dl-machine.h         |  8 +--
>   sysdeps/s390/s390-32/dl-trampoline.h      |  2 +-
>   sysdeps/s390/s390-64/dl-machine.h         |  8 +--
>   sysdeps/s390/s390-64/dl-trampoline.h      |  2 +-
>   sysdeps/sh/dl-machine.h                   |  2 +
>   sysdeps/sh/dl-trampoline.S                |  2 +
>   sysdeps/sparc/sparc32/dl-machine.h        |  4 +-
>   sysdeps/sparc/sparc32/dl-trampoline.S     |  2 +
>   sysdeps/sparc/sparc64/dl-machine.h        |  4 +-
>   sysdeps/sparc/sparc64/dl-trampoline.S     |  2 +
>   sysdeps/x86_64/dl-machine.h               |  2 +
>   sysdeps/x86_64/dl-trampoline.S            | 64 ++++++++++++-----------
>   39 files changed, 165 insertions(+), 109 deletions(-)
>   rename elf/{dl-profstub.c => libc-dl-profstub.c} (100%)
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index 08896bb895..d17ff5424e 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -37,12 +37,12 @@ routines = \
>     dl-iteratephdr \
>     dl-libc \
>     dl-origin \
> -  dl-profstub \
>     dl-reloc-static-pie \
>     dl-support \
>     dl-sym \
>     dl-sysdep \
>     enbl-secure \
> +  libc-dl-profstub  \
>     libc-dl_find_object \
>     libc_early_init \
>     rtld_static_init \
> @@ -72,7 +72,6 @@ dl-routines = \
>     dl-open \
>     dl-origin \
>     dl-printf \
> -  dl-profile \
>     dl-reloc \
>     dl-runtime \
>     dl-scope \
> @@ -117,7 +116,11 @@ elide-routines.os = \
>     # elide-routines.os
>   
>   # These object files are only included in the dynamically-linked libc.
> -shared-only-routines = libc-dl_find_object
> +shared-only-routines = \
> +  libc-dl-profile \
> +  libc-dl-profstub \
> +  libc-dl_find_object \
> +  # shared-only-routines
>   
>   # ld.so uses those routines, plus some special stuff for being the program
>   # interpreter and operating independent of libc.
> @@ -135,6 +138,7 @@ rtld-routines = \
>     dl-libc_freeres \
>     dl-minimal \
>     dl-mutex \
> +  dl-profile \
>     dl-sysdep \
>     dl-usage \
>     rtld \
> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index 2923b1141d..7356a4fe48 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -1443,11 +1443,6 @@ cannot enable executable stack as shared object requires");
>        name by which the DSO is actually known.  Add that as well.  */
>     if (__glibc_unlikely (origname != NULL))
>       add_name_to_object (l, origname);
> -#else
> -  /* Audit modules only exist when linking is dynamic so ORIGNAME
> -     cannot be non-NULL.  */
> -  assert (origname == NULL);
> -#endif
>   
>     /* When we profile the SONAME might be needed for something else but
>        loading.  Add it right away.  */
> @@ -1455,6 +1450,11 @@ cannot enable executable stack as shared object requires");
>         && l->l_info[DT_SONAME] != NULL)
>       add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
>   			    + l->l_info[DT_SONAME]->d_un.d_val));
> +#else
> +  /* Audit modules only exist when linking is dynamic so ORIGNAME
> +     cannot be non-NULL.  */
> +  assert (origname == NULL);
> +#endif
>   
>     /* If we have newly loaded libc.so, update the namespace
>        description.  */
> diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
> index 32a8bfcf74..fe7deda32a 100644
> --- a/elf/dl-runtime.c
> +++ b/elf/dl-runtime.c
> @@ -162,14 +162,14 @@ _dl_fixup (
>     return elf_machine_fixup_plt (l, result, refsym, sym, reloc, rel_addr, value);
>   }
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   DL_FIXUP_VALUE_TYPE
>   __attribute ((noinline))
>   DL_ARCH_FIXUP_ATTRIBUTE
>   _dl_profile_fixup (
> -#ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
> +# ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
>   		   ELF_MACHINE_RUNTIME_FIXUP_ARGS,
> -#endif
> +# endif
>   		   struct link_map *l, ElfW(Word) reloc_arg,
>   		   ElfW(Addr) retaddr, void *regs, long int *framesizep)
>   {
> @@ -309,14 +309,12 @@ _dl_profile_fixup (
>         /* And now perhaps the relocation addend.  */
>         value = elf_machine_plt_value (l, reloc, value);
>   
> -#ifdef SHARED
>         /* Auditing checkpoint: we have a new binding.  Provide the
>   	 auditing libraries the possibility to change the value and
>   	 tell us whether further auditing is wanted.  */
>         if (defsym != NULL && GLRO(dl_naudit) > 0)
>   	_dl_audit_symbind (l, reloc_result, reloc, defsym, &value, result,
>   			   true);
> -#endif
>   
>         /* Store the result for later runs.  */
>         if (__glibc_likely (! GLRO(dl_bind_not)))
> @@ -335,11 +333,9 @@ _dl_profile_fixup (
>     long int framesize = -1;
>   
>   
> -#ifdef SHARED
>     /* Auditing checkpoint: report the PLT entering and allow the
>        auditors to change the value.  */
>     _dl_audit_pltenter (l, reloc_result, &value, regs, &framesize);
> -#endif
>   
>     /* Store the frame size information.  */
>     *framesizep = framesize;
> @@ -349,4 +345,4 @@ _dl_profile_fixup (
>     return value;
>   }
>   
> -#endif /* PROF */
> +#endif /* !defined PROF && defined SHARED */
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index 44a54dea07..31a608df87 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -60,10 +60,6 @@ int _dl_dynamic_weak;
>   /* If nonzero print warnings about problematic situations.  */
>   int _dl_verbose;
>   
> -/* We never do profiling.  */
> -const char *_dl_profile;
> -const char *_dl_profile_output;
> -
>   /* Names of shared object for which the RUNPATHs and RPATHs should be
>      ignored.  */
>   const char *_dl_inhibit_rpath;
> @@ -301,11 +297,6 @@ _dl_non_dynamic_init (void)
>   
>     _dl_dynamic_weak = *(getenv ("LD_DYNAMIC_WEAK") ?: "") == '\0';
>   
> -  _dl_profile_output = getenv ("LD_PROFILE_OUTPUT");
> -  if (_dl_profile_output == NULL || _dl_profile_output[0] == '\0')
> -    _dl_profile_output
> -      = &"/var/tmp\0/var/profile"[__libc_enable_secure ? 9 : 0];
> -
>     if (__libc_enable_secure)
>       {
>         static const char unsecure_envvars[] =
> diff --git a/elf/dl-profstub.c b/elf/libc-dl-profstub.c
> similarity index 100%
> rename from elf/dl-profstub.c
> rename to elf/libc-dl-profstub.c
> diff --git a/include/dlfcn.h b/include/dlfcn.h
> index ae25f05303..a44420fa37 100644
> --- a/include/dlfcn.h
> +++ b/include/dlfcn.h
> @@ -135,5 +135,10 @@ extern int __dladdr1 (const void *address, Dl_info *info,
>   extern int __dlinfo (void *handle, int request, void *arg);
>   extern char *__dlerror (void);
>   
> +#ifndef SHARED
> +# undef DL_CALL_FCT
> +# define DL_CALL_FCT(fctp, args) ((fctp) args)
> +#endif
> +
>   #endif
>   #endif
> diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
> index 4170b9269f..a56eb96a79 100644
> --- a/sysdeps/aarch64/dl-machine.h
> +++ b/sysdeps/aarch64/dl-machine.h
> @@ -68,7 +68,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>       {
>         ElfW(Addr) *got;
>         extern void _dl_runtime_resolve (ElfW(Word));
> -      extern void _dl_runtime_profile (ElfW(Word));
>   
>         got = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]);
>         if (got[1])
> @@ -83,6 +82,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
> +      extern void _dl_runtime_profile (ElfW(Word));
>         if ( profile)
>   	{
>   	   got[2] = (ElfW(Addr)) &_dl_runtime_profile;
> @@ -94,6 +95,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  /* This function will get called to fix up the GOT entry
>   	     indicated by the offset on the stack, and then jump to
> diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
> index a3474ba741..fc82151ba2 100644
> --- a/sysdeps/aarch64/dl-trampoline.S
> +++ b/sysdeps/aarch64/dl-trampoline.S
> @@ -122,7 +122,7 @@ _dl_runtime_resolve:
>   
>   	cfi_endproc
>   	.size _dl_runtime_resolve, .-_dl_runtime_resolve
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   	.globl _dl_runtime_profile
>   	.type _dl_runtime_profile, #function
>   	cfi_startproc
> diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h
> index ed5389e3c5..7fe2afca93 100644
> --- a/sysdeps/alpha/dl-machine.h
> +++ b/sysdeps/alpha/dl-machine.h
> @@ -75,9 +75,7 @@ elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
>   			   int lazy, int profile)
>   {
>     extern char _dl_runtime_resolve_new[] attribute_hidden;
> -  extern char _dl_runtime_profile_new[] attribute_hidden;
>     extern char _dl_runtime_resolve_old[] attribute_hidden;
> -  extern char _dl_runtime_profile_old[] attribute_hidden;
>   
>     struct pltgot {
>       char *resolve;
> @@ -109,6 +107,9 @@ elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
>     else
>       resolve = _dl_runtime_resolve_old;
>   
> +#ifdef SHARED
> +  extern char _dl_runtime_profile_new[] attribute_hidden;
> +  extern char _dl_runtime_profile_old[] attribute_hidden;
>     if (__builtin_expect (profile, 0))
>       {
>         if (secureplt)
> @@ -123,6 +124,7 @@ elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
>   	  GL(dl_profile_map) = map;
>   	}
>       }
> +#endif
>   
>     pg->resolve = resolve;
>     pg->link = map;
> diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S
> index f8c3d33906..5dffa62587 100644
> --- a/sysdeps/alpha/dl-trampoline.S
> +++ b/sysdeps/alpha/dl-trampoline.S
> @@ -89,6 +89,7 @@ _dl_runtime_resolve_new:
>   	.globl	_dl_runtime_profile_new
>   	.type	_dl_runtime_profile_new, @function
>   
> +#ifdef SHARED
>   #undef FRAMESIZE
>   #define FRAMESIZE	20*8
>   
> @@ -207,6 +208,7 @@ _dl_runtime_profile_new:
>   
>   	cfi_endproc
>   	.size	_dl_runtime_profile_new, .-_dl_runtime_profile_new
> +#endif /* SHARED */
>   
>   	.align	4
>   	.globl	_dl_runtime_resolve_old
> @@ -340,6 +342,7 @@ _dl_runtime_resolve_old:
>   	.usepv	_dl_runtime_profile_old, no
>   	.type	_dl_runtime_profile_old, @function
>   
> +#ifdef SHARED
>   	/* We save the registers in a different order than desired by
>   	   .mask/.fmask, so we have to use explicit cfi directives.  */
>   	cfi_startproc
> @@ -538,3 +541,4 @@ _dl_runtime_profile_old:
>   
>   	cfi_endproc
>   	.size	_dl_runtime_profile_old, .-_dl_runtime_profile_old
> +#endif /* SHARED */
> diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
> index d720c02c96..a68679e653 100644
> --- a/sysdeps/arm/dl-machine.h
> +++ b/sysdeps/arm/dl-machine.h
> @@ -65,7 +65,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   {
>     Elf32_Addr *got;
>     extern void _dl_runtime_resolve (Elf32_Word);
> -  extern void _dl_runtime_profile (Elf32_Word);
>   
>     if (l->l_info[DT_JMPREL] && lazy)
>       {
> @@ -88,6 +87,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
> +      extern void _dl_runtime_profile (Elf32_Word);
>         if (profile)
>   	{
>   	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
> @@ -99,6 +100,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	/* This function will get called to fix up the GOT entry indicated by
>   	   the offset on the stack, and then jump to the resolved address.  */
>   	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
> diff --git a/sysdeps/arm/dl-trampoline.S b/sysdeps/arm/dl-trampoline.S
> index 23c2476917..2df5b7ee36 100644
> --- a/sysdeps/arm/dl-trampoline.S
> +++ b/sysdeps/arm/dl-trampoline.S
> @@ -70,7 +70,7 @@ _dl_runtime_resolve:
>   	cfi_endproc
>   	.size _dl_runtime_resolve, .-_dl_runtime_resolve
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   	.globl _dl_runtime_profile
>   	.type _dl_runtime_profile, #function
>   	CFI_SECTIONS
> diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
> index 4e6e70b3c9..993593de5d 100644
> --- a/sysdeps/hppa/dl-machine.h
> +++ b/sysdeps/hppa/dl-machine.h
> @@ -195,7 +195,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>     end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val;
>   
>     extern void _dl_runtime_resolve (void);
> -  extern void _dl_runtime_profile (void);
>   
>     /* Linking lazily */
>     if (lazy)
> @@ -235,22 +234,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	      got[1] = (Elf32_Addr) l;
>   
>   	      /* This function will be called to perform the relocation. */
> -	      if (__builtin_expect (!profile, 1))
> -		{
> -		  /* If a static application called us, then _dl_runtime_resolve is not
> -		     a function descriptor, but the *real* address of the function... */
> -		  if((unsigned long) &_dl_runtime_resolve & 3)
> -		    {
> -		      got[-2] = (Elf32_Addr) ((struct fdesc *)
> -				  ((unsigned long) &_dl_runtime_resolve & ~3))->ip;
> -		    }
> -		  else
> -		    {
> -		      /* Static executable! */
> -		      got[-2] = (Elf32_Addr) &_dl_runtime_resolve;
> -		    }
> -		}
> -	      else
> +#ifdef SHARED
> +	      extern void _dl_runtime_profile (void);
> +	      if (__glibc_unlikely (profile))
>   		{
>   		  if (GLRO(dl_profile) != NULL
>   		      && _dl_name_match_p (GLRO(dl_profile), l))
> @@ -272,6 +258,22 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   		      got[-2] = (Elf32_Addr) &_dl_runtime_profile;
>   		    }
>   		}
> +	      else
> +#endif
> +		{
> +		  /* If a static application called us, then _dl_runtime_resolve is not
> +		     a function descriptor, but the *real* address of the function... */
> +		  if((unsigned long) &_dl_runtime_resolve & 3)
> +		    {
> +		      got[-2] = (Elf32_Addr) ((struct fdesc *)
> +				  ((unsigned long) &_dl_runtime_resolve & ~3))->ip;
> +		    }
> +		  else
> +		    {
> +		      /* Static executable! */
> +		      got[-2] = (Elf32_Addr) &_dl_runtime_resolve;
> +		    }
> +		}
>   	      /* Plunk in the gp of this function descriptor so we
>   		 can make the call to _dl_runtime_xxxxxx */
>   	      got[-1] = ltp;
> diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
> index 689c6e1a40..9e904df3d2 100644
> --- a/sysdeps/hppa/dl-trampoline.S
> +++ b/sysdeps/hppa/dl-trampoline.S
> @@ -156,6 +156,7 @@ _dl_runtime_resolve:
>   	cfi_endproc
>   	.size   _dl_runtime_resolve, . - _dl_runtime_resolve
>   
> +#ifdef SHARED
>           .text
>           .global _dl_runtime_profile
>           .type _dl_runtime_profile,@function
> @@ -359,3 +360,4 @@ L(cont):
>           .PROCEND
>   	cfi_endproc
>   	.size   _dl_runtime_profile, . - _dl_runtime_profile
> +#endif
> diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
> index 18749f2ec2..07469e99b0 100644
> --- a/sysdeps/i386/dl-machine.h
> +++ b/sysdeps/i386/dl-machine.h
> @@ -92,6 +92,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
>         if (__glibc_unlikely (profile))
>   	{
>   	  got[2] = (shstk_enabled
> @@ -105,6 +106,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	/* This function will get called to fix up the GOT entry indicated by
>   	   the offset on the stack, and then jump to the resolved address.  */
>   	got[2] = (shstk_enabled
> diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
> index 2d55f373b4..3604aabe87 100644
> --- a/sysdeps/i386/dl-trampoline.S
> +++ b/sysdeps/i386/dl-trampoline.S
> @@ -70,7 +70,7 @@ _dl_runtime_resolve_shstk:
>   	cfi_endproc
>   	.size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   # The SHSTK compatible version.
>   	.globl _dl_runtime_profile_shstk
>   	.type _dl_runtime_profile_shstk, @function
> diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h
> index e1da3dadcb..3ef6b0ef4b 100644
> --- a/sysdeps/ia64/dl-machine.h
> +++ b/sysdeps/ia64/dl-machine.h
> @@ -121,9 +121,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>         reserve[0] = (Elf64_Addr) l;
>   
>         /* This function will be called to perform the relocation.  */
> -      if (!profile)
> -	doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_resolve)->ip;
> -      else
> +#ifdef SHARED
> +      if (__glibc_unlikely (profile))
>   	{
>   	  if (GLRO(dl_profile) != NULL
>   	      && _dl_name_match_p (GLRO(dl_profile), l))
> @@ -134,6 +133,11 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    }
>   	  doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_profile)->ip;
>   	}
> +      else
> +#endif
> +	{
> +	  doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_resolve)->ip;
> +	}
>   
>         reserve[1] = doit;
>         reserve[2] = gp;
> diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S
> index 54b33c8c02..10d8432c8f 100644
> --- a/sysdeps/ia64/dl-trampoline.S
> +++ b/sysdeps/ia64/dl-trampoline.S
> @@ -188,7 +188,7 @@ END(_dl_runtime_resolve)
>   #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
>   #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   ENTRY(_dl_runtime_profile)
>   	{ .mii
>   	  .prologue
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index 57913cefaa..0d17fd21e3 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -287,15 +287,16 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
>         if (profile != 0)
>   	{
> -#if !defined __loongarch_soft_float
> +# if !defined __loongarch_soft_float
>   	  if (SUPPORT_LASX)
>   	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
>   	  else if (SUPPORT_LSX)
>   	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
>   	  else
> -#endif
> +# endif
>   	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
>   
>   	  if (GLRO(dl_profile) != NULL
> @@ -305,6 +306,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  /* This function will get called to fix up the GOT entry
>   	     indicated by the offset on the stack, and then jump to
> diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
> index e298439d39..1da70aeb23 100644
> --- a/sysdeps/loongarch/dl-trampoline.h
> +++ b/sysdeps/loongarch/dl-trampoline.h
> @@ -126,6 +126,7 @@ ENTRY (_dl_runtime_resolve)
>   	jirl	zero, t1, 0
>   END (_dl_runtime_resolve)
>   
> +#ifdef SHARED
>   #include "dl-link.h"
>   
>   ENTRY (_dl_runtime_profile)
> @@ -367,3 +368,4 @@ ENTRY (_dl_runtime_profile)
>   	jirl	zero, ra, 0
>   
>   END (_dl_runtime_profile)
> +#endif /* SHARED */
> diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h
> index 5ee586b27b..8d7e733e2a 100644
> --- a/sysdeps/m68k/dl-machine.h
> +++ b/sysdeps/m68k/dl-machine.h
> @@ -75,7 +75,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   {
>     Elf32_Addr *got;
>     extern void _dl_runtime_resolve (Elf32_Word);
> -  extern void _dl_runtime_profile (Elf32_Word);
>   
>     if (l->l_info[DT_JMPREL] && lazy)
>       {
> @@ -93,6 +92,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
> +      extern void _dl_runtime_profile (Elf32_Word);
>         if (profile)
>   	{
>   	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
> @@ -106,6 +107,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    }
>   	}
>         else
> +#endif
>   	/* This function will get called to fix up the GOT entry indicated by
>   	   the offset on the stack, and then jump to the resolved address.  */
>   	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
> diff --git a/sysdeps/m68k/dl-trampoline.S b/sysdeps/m68k/dl-trampoline.S
> index dba3741400..f1b4943868 100644
> --- a/sysdeps/m68k/dl-trampoline.S
> +++ b/sysdeps/m68k/dl-trampoline.S
> @@ -60,6 +60,7 @@ _dl_runtime_resolve:
>   	cfi_endproc
>   	.size _dl_runtime_resolve, . - _dl_runtime_resolve
>   
> +#ifdef SHARED
>   	.text
>   	.globl _dl_runtime_profile
>   	.type _dl_runtime_profile, @function
> @@ -220,3 +221,4 @@ _dl_runtime_profile:
>   	rts
>   	cfi_endproc
>   	.size _dl_runtime_profile, . - _dl_runtime_profile
> +#endif /* SHARED */
> diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
> index ef84911ede..e6b603de94 100644
> --- a/sysdeps/powerpc/powerpc32/dl-machine.c
> +++ b/sysdeps/powerpc/powerpc32/dl-machine.c
> @@ -226,7 +226,7 @@ __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
>   	  Elf32_Word dlrr;
>   	  Elf32_Word offset;
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   	  dlrr = (Elf32_Word) (profile
>   			       ? _dl_prof_resolve
>   			       : _dl_runtime_resolve);
> diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
> index a4cad7583c..1ff46d5f8a 100644
> --- a/sysdeps/powerpc/powerpc32/dl-machine.h
> +++ b/sysdeps/powerpc/powerpc32/dl-machine.h
> @@ -188,15 +188,19 @@ elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
>         extern void _dl_runtime_resolve (void);
>         extern void _dl_prof_resolve (void);
>   
> -      if (__glibc_likely (!profile))
> -	dlrr = _dl_runtime_resolve;
> -      else
> +#ifdef SHARED
> +      if (__glibc_unlikely (profile))
>   	{
>   	  if (GLRO(dl_profile) != NULL
>   	      &&_dl_name_match_p (GLRO(dl_profile), map))
>   	    GL(dl_profile_map) = map;
>   	  dlrr = _dl_prof_resolve;
>   	}
> +      else
> +#endif
> +	{
> +	  dlrr = _dl_runtime_resolve;
> +	}
>         got = (Elf32_Addr *) map->l_info[DT_PPC(GOT)]->d_un.d_ptr;
>         glink = got[1];
>         got[1] = (Elf32_Addr) dlrr;
> diff --git a/sysdeps/powerpc/powerpc32/dl-trampoline.S b/sysdeps/powerpc/powerpc32/dl-trampoline.S
> index 93b1673ebb..be8de0e2dc 100644
> --- a/sysdeps/powerpc/powerpc32/dl-trampoline.S
> +++ b/sysdeps/powerpc/powerpc32/dl-trampoline.S
> @@ -70,7 +70,7 @@ _dl_runtime_resolve:
>   	cfi_endproc
>   	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   	.align 2
>   	.globl _dl_prof_resolve
>   	.type _dl_prof_resolve,@function
> diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
> index 449208e86f..601c3cba9d 100644
> --- a/sysdeps/powerpc/powerpc64/dl-machine.h
> +++ b/sysdeps/powerpc/powerpc64/dl-machine.h
> @@ -362,13 +362,19 @@ elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
>   	  Elf64_Word offset;
>   	  Elf64_Addr dlrr;
>   
> -	  dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve
> -				       : _dl_runtime_resolve);
> -	  if (profile && GLRO(dl_profile) != NULL
> -	      && _dl_name_match_p (GLRO(dl_profile), map))
> -	    /* This is the object we are looking for.  Say that we really
> -	       want profiling and the timers are started.  */
> -	    GL(dl_profile_map) = map;
> +#ifdef SHARED
> +	  if (__glibc_unlikely (profile))
> +	    {
> +	      dlrr = (Elf64_Addr) _dl_profile_resolve;
> +	      if (profile && GLRO(dl_profile) != NULL
> +		  && _dl_name_match_p (GLRO(dl_profile), map))
> +		/* This is the object we are looking for.  Say that we really
> +		   want profiling and the timers are started.  */
> +		GL(dl_profile_map) = map;
> +	    }
> +	  else
> +#endif
> +	    dlrr = (Elf64_Addr) _dl_runtime_resolve;
>   
>   #if _CALL_ELF != 2
>   	  /* We need to stuff the address/TOC of _dl_runtime_resolve
> diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S
> index 1d04ec8109..b2fc2bb133 100644
> --- a/sysdeps/powerpc/powerpc64/dl-trampoline.S
> +++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S
> @@ -195,7 +195,7 @@ END(_dl_runtime_resolve)
>      and r11 contains the link_map (from PLT0+16).  The link_map becomes
>      parm1 (r3) and the index (r0) needs to be converted to an offset
>      (index * 24) in parm2 (r4).  */
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   	.hidden _dl_profile_resolve
>   ENTRY (_dl_profile_resolve, 4)
>   /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
> diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h
> index 100a3e05f6..b8bf2796c7 100644
> --- a/sysdeps/s390/s390-32/dl-machine.h
> +++ b/sysdeps/s390/s390-32/dl-machine.h
> @@ -124,16 +124,17 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.  In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.  */
> +#ifdef SHARED
>         if (__glibc_unlikely (profile))
>   	{
> -#if defined HAVE_S390_VX_ASM_SUPPORT
> +# if defined HAVE_S390_VX_ASM_SUPPORT
>   	  if (GLRO(dl_hwcap) & HWCAP_S390_VX)
>   	    got[2] = (Elf32_Addr) &_dl_runtime_profile_vx;
>   	  else
>   	    got[2] = (Elf32_Addr) &_dl_runtime_profile;
> -#else
> +# else
>   	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
> -#endif
> +# endif
>   
>   	  if (GLRO(dl_profile) != NULL
>   	      && _dl_name_match_p (GLRO(dl_profile), l))
> @@ -142,6 +143,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  /* This function will get called to fix up the GOT entry indicated by
>   	     the offset on the stack, and then jump to the resolved address.  */
> diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
> index 78fdca9d53..8093ab08d3 100644
> --- a/sysdeps/s390/s390-32/dl-trampoline.h
> +++ b/sysdeps/s390/s390-32/dl-trampoline.h
> @@ -148,7 +148,7 @@ _dl_runtime_resolve:
>   #undef F0_OFF
>   #undef F2_OFF
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   # define SIZEOF_STRUCT_LA_S390_32_REGS 168
>   # define REGS_OFF -264
>   # define R2_OFF -264
> diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h
> index 9fabb09750..82259dad64 100644
> --- a/sysdeps/s390/s390-64/dl-machine.h
> +++ b/sysdeps/s390/s390-64/dl-machine.h
> @@ -111,16 +111,17 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.	 In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.	*/
> +#ifdef SHARED
>         if (__glibc_unlikely (profile))
>   	{
> -#if defined HAVE_S390_VX_ASM_SUPPORT
> +# if defined HAVE_S390_VX_ASM_SUPPORT
>   	  if (GLRO(dl_hwcap) & HWCAP_S390_VX)
>   	    got[2] = (Elf64_Addr) &_dl_runtime_profile_vx;
>   	  else
>   	    got[2] = (Elf64_Addr) &_dl_runtime_profile;
> -#else
> +# else
>   	  got[2] = (Elf64_Addr) &_dl_runtime_profile;
> -#endif
> +# endif
>   
>   	  if (GLRO(dl_profile) != NULL
>   	      && _dl_name_match_p (GLRO(dl_profile), l))
> @@ -129,6 +130,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  /* This function will get called to fix up the GOT entry indicated by
>   	     the offset on the stack, and then jump to the resolved address.  */
> diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h
> index 3a7cfc5f92..61188119e6 100644
> --- a/sysdeps/s390/s390-64/dl-trampoline.h
> +++ b/sysdeps/s390/s390-64/dl-trampoline.h
> @@ -150,7 +150,7 @@ _dl_runtime_resolve:
>   #undef F4_OFF
>   #undef F6_OFF
>   
> -#ifndef PROF
> +#if !defined PROF && defined SHARED
>   # define SIZEOF_STRUCT_LA_S390_64_REGS 200
>   # define REGS_OFF -360
>   # define R2_OFF -360
> diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h
> index 0e4eac42c3..e0480eae5a 100644
> --- a/sysdeps/sh/dl-machine.h
> +++ b/sysdeps/sh/dl-machine.h
> @@ -101,6 +101,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 to intercept the calls to collect information.	 In this case we
>   	 don't store the address in the GOT so that all future calls also
>   	 end in this function.	*/
> +#ifdef SHARED
>         if (profile)
>   	{
>   	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
> @@ -110,6 +111,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	/* This function will get called to fix up the GOT entry indicated by
>   	   the offset on the stack, and then jump to the resolved address.  */
>   	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
> diff --git a/sysdeps/sh/dl-trampoline.S b/sysdeps/sh/dl-trampoline.S
> index 5d703341ed..ecaae34db4 100644
> --- a/sysdeps/sh/dl-trampoline.S
> +++ b/sysdeps/sh/dl-trampoline.S
> @@ -142,6 +142,7 @@ _dl_runtime_resolve:
>   	.size _dl_runtime_resolve, .-_dl_runtime_resolve
>   
>   
> +#ifdef SHARED
>   	.globl _dl_runtime_profile
>   	.type _dl_runtime_profile,@function
>   	cfi_startproc
> @@ -428,3 +429,4 @@ _dl_runtime_profile:
>   8:	.long _dl_audit_pltexit
>   #endif
>   	.size _dl_runtime_profile, .-_dl_runtime_profile
> +#endif /* SHARED */
> diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
> index 9b57ae1a93..b10e541810 100644
> --- a/sysdeps/sparc/sparc32/dl-machine.h
> +++ b/sysdeps/sparc/sparc32/dl-machine.h
> @@ -116,7 +116,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	 bits of %g1 with an offset into the .rela.plt section and jump to
>   	 the beginning of the PLT.  */
>         plt = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
> -      if (__builtin_expect(profile, 0))
> +#ifdef SHARED
> +      if (__glibc_unlikely (profile))
>   	{
>   	  rfunc = (Elf32_Addr) &_dl_runtime_profile;
>   
> @@ -125,6 +126,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  rfunc = (Elf32_Addr) &_dl_runtime_resolve;
>   	}
> diff --git a/sysdeps/sparc/sparc32/dl-trampoline.S b/sysdeps/sparc/sparc32/dl-trampoline.S
> index 08ff31b474..5e7d860ae4 100644
> --- a/sysdeps/sparc/sparc32/dl-trampoline.S
> +++ b/sysdeps/sparc/sparc32/dl-trampoline.S
> @@ -47,6 +47,7 @@ _dl_runtime_resolve:
>   
>   	.size	_dl_runtime_resolve, .-_dl_runtime_resolve
>   
> +#ifdef SHARED
>   	/* For the profiling cases we pass in our stack frame
>   	 * as the base of the La_sparc32_regs, so it looks
>   	 * like:
> @@ -185,3 +186,4 @@ _dl_runtime_profile:
>   	cfi_endproc
>   
>   	.size	_dl_runtime_profile, .-_dl_runtime_profile
> +#endif
> diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
> index 2f04ac550e..98469e7604 100644
> --- a/sysdeps/sparc/sparc64/dl-machine.h
> +++ b/sysdeps/sparc/sparc64/dl-machine.h
> @@ -136,7 +136,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>         Elf64_Addr res0_addr, res1_addr;
>         unsigned int *plt = (void *) D_PTR (l, l_info[DT_PLTGOT]);
>   
> -      if (__builtin_expect(profile, 0))
> +#ifdef SHARED
> +      if (__glibc_unlikely (profile))
>   	{
>   	  res0_addr = (Elf64_Addr) &_dl_runtime_profile_0;
>   	  res1_addr = (Elf64_Addr) &_dl_runtime_profile_1;
> @@ -146,6 +147,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  res0_addr = (Elf64_Addr) &_dl_runtime_resolve_0;
>   	  res1_addr = (Elf64_Addr) &_dl_runtime_resolve_1;
> diff --git a/sysdeps/sparc/sparc64/dl-trampoline.S b/sysdeps/sparc/sparc64/dl-trampoline.S
> index 444690a71e..82b42681dd 100644
> --- a/sysdeps/sparc/sparc64/dl-trampoline.S
> +++ b/sysdeps/sparc/sparc64/dl-trampoline.S
> @@ -92,6 +92,7 @@ _dl_runtime_resolve_1:
>   
>   	.size	_dl_runtime_resolve_1, .-_dl_runtime_resolve_1
>   
> +#ifdef SHARED
>   	/* For the profiling cases we pass in our stack frame
>   	 * as the base of the La_sparc64_regs, so it looks
>   	 * like:
> @@ -323,3 +324,4 @@ _dl_runtime_profile_1:
>   	cfi_endproc
>   
>   	.size	_dl_runtime_resolve_1, .-_dl_runtime_resolve_1
> +#endif
> diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> index 9ea2a70837..581a2f1a9e 100644
> --- a/sysdeps/x86_64/dl-machine.h
> +++ b/sysdeps/x86_64/dl-machine.h
> @@ -89,6 +89,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   
>         const struct cpu_features* cpu_features = __get_cpu_features ();
>   
> +#ifdef SHARED
>         /* The got[2] entry contains the address of a function which gets
>   	 called to get the address of a so far unresolved function and
>   	 jump to it.  The profiling extension of the dynamic linker allows
> @@ -111,6 +112,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
>   	    GL(dl_profile_map) = l;
>   	}
>         else
> +#endif
>   	{
>   	  /* This function will get called to fix up the GOT entry
>   	     indicated by the offset on the stack, and then jump to
> diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
> index a6b9a1826b..3fd30d58fc 100644
> --- a/sysdeps/x86_64/dl-trampoline.S
> +++ b/sysdeps/x86_64/dl-trampoline.S
> @@ -53,45 +53,49 @@
>   
>   #define RESTORE_AVX
>   
> -#define VEC_SIZE		64
> -#define VMOVA			vmovdqa64
> -#define VEC(i)			zmm##i
> -#define _dl_runtime_profile	_dl_runtime_profile_avx512
> -# define SECTION(p)		p##.evex512
> -#include "dl-trampoline.h"
> -#undef _dl_runtime_profile
> -#undef VEC
> -#undef VMOVA
> -#undef VEC_SIZE
> -#undef SECTION
> -
> -#if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
> -# define VEC_SIZE		32
> -# define VMOVA			vmovdqa
> -# define VEC(i)			ymm##i
> -# define SECTION(p)		p##.avx
> -# define _dl_runtime_profile	_dl_runtime_profile_avx
> +#ifdef SHARED
> +# define VEC_SIZE		64
> +# define VMOVA			vmovdqa64
> +# define VEC(i)			zmm##i
> +# define _dl_runtime_profile	_dl_runtime_profile_avx512
> +#  define SECTION(p)		p##.evex512
>   # include "dl-trampoline.h"
>   # undef _dl_runtime_profile
>   # undef VEC
>   # undef VMOVA
>   # undef VEC_SIZE
>   # undef SECTION
> -#endif
>   
> -#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> +# if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
> +#  define VEC_SIZE		32
> +#  define VMOVA			vmovdqa
> +#  define VEC(i)			ymm##i
> +#  define SECTION(p)		p##.avx
> +#  define _dl_runtime_profile	_dl_runtime_profile_avx
> +#  include "dl-trampoline.h"
> +#  undef _dl_runtime_profile
> +#  undef VEC
> +#  undef VMOVA
> +#  undef VEC_SIZE
> +#  undef SECTION
> +# endif
> +
> +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>   /* movaps/movups is 1-byte shorter.  */
> -# define VEC_SIZE		16
> -# define VMOVA			movaps
> -# define VEC(i)			xmm##i
> -# define _dl_runtime_profile	_dl_runtime_profile_sse
> -# undef RESTORE_AVX
> -# include "dl-trampoline.h"
> -# undef _dl_runtime_profile
> -# undef VEC
> -# undef VMOVA
> -# undef VEC_SIZE
> +#  define VEC_SIZE		16
> +#  define VMOVA			movaps
> +#  define VEC(i)			xmm##i
> +#  define _dl_runtime_profile	_dl_runtime_profile_sse
> +#  undef RESTORE_AVX
> +#  include "dl-trampoline.h"
> +#  undef _dl_runtime_profile
> +#  undef VEC
> +#  undef VMOVA
> +#  undef VEC_SIZE
> +# endif
> +#endif /* SHARED */
>   
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>   # define USE_FXSAVE
>   # define STATE_SAVE_ALIGNMENT	16
>   # define _dl_runtime_resolve	_dl_runtime_resolve_fxsave
  

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 08896bb895..d17ff5424e 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -37,12 +37,12 @@  routines = \
   dl-iteratephdr \
   dl-libc \
   dl-origin \
-  dl-profstub \
   dl-reloc-static-pie \
   dl-support \
   dl-sym \
   dl-sysdep \
   enbl-secure \
+  libc-dl-profstub  \
   libc-dl_find_object \
   libc_early_init \
   rtld_static_init \
@@ -72,7 +72,6 @@  dl-routines = \
   dl-open \
   dl-origin \
   dl-printf \
-  dl-profile \
   dl-reloc \
   dl-runtime \
   dl-scope \
@@ -117,7 +116,11 @@  elide-routines.os = \
   # elide-routines.os
 
 # These object files are only included in the dynamically-linked libc.
-shared-only-routines = libc-dl_find_object
+shared-only-routines = \
+  libc-dl-profile \
+  libc-dl-profstub \
+  libc-dl_find_object \
+  # shared-only-routines
 
 # ld.so uses those routines, plus some special stuff for being the program
 # interpreter and operating independent of libc.
@@ -135,6 +138,7 @@  rtld-routines = \
   dl-libc_freeres \
   dl-minimal \
   dl-mutex \
+  dl-profile \
   dl-sysdep \
   dl-usage \
   rtld \
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 2923b1141d..7356a4fe48 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1443,11 +1443,6 @@  cannot enable executable stack as shared object requires");
      name by which the DSO is actually known.  Add that as well.  */
   if (__glibc_unlikely (origname != NULL))
     add_name_to_object (l, origname);
-#else
-  /* Audit modules only exist when linking is dynamic so ORIGNAME
-     cannot be non-NULL.  */
-  assert (origname == NULL);
-#endif
 
   /* When we profile the SONAME might be needed for something else but
      loading.  Add it right away.  */
@@ -1455,6 +1450,11 @@  cannot enable executable stack as shared object requires");
       && l->l_info[DT_SONAME] != NULL)
     add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
 			    + l->l_info[DT_SONAME]->d_un.d_val));
+#else
+  /* Audit modules only exist when linking is dynamic so ORIGNAME
+     cannot be non-NULL.  */
+  assert (origname == NULL);
+#endif
 
   /* If we have newly loaded libc.so, update the namespace
      description.  */
diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
index 32a8bfcf74..fe7deda32a 100644
--- a/elf/dl-runtime.c
+++ b/elf/dl-runtime.c
@@ -162,14 +162,14 @@  _dl_fixup (
   return elf_machine_fixup_plt (l, result, refsym, sym, reloc, rel_addr, value);
 }
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 DL_FIXUP_VALUE_TYPE
 __attribute ((noinline))
 DL_ARCH_FIXUP_ATTRIBUTE
 _dl_profile_fixup (
-#ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
+# ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
 		   ELF_MACHINE_RUNTIME_FIXUP_ARGS,
-#endif
+# endif
 		   struct link_map *l, ElfW(Word) reloc_arg,
 		   ElfW(Addr) retaddr, void *regs, long int *framesizep)
 {
@@ -309,14 +309,12 @@  _dl_profile_fixup (
       /* And now perhaps the relocation addend.  */
       value = elf_machine_plt_value (l, reloc, value);
 
-#ifdef SHARED
       /* Auditing checkpoint: we have a new binding.  Provide the
 	 auditing libraries the possibility to change the value and
 	 tell us whether further auditing is wanted.  */
       if (defsym != NULL && GLRO(dl_naudit) > 0)
 	_dl_audit_symbind (l, reloc_result, reloc, defsym, &value, result,
 			   true);
-#endif
 
       /* Store the result for later runs.  */
       if (__glibc_likely (! GLRO(dl_bind_not)))
@@ -335,11 +333,9 @@  _dl_profile_fixup (
   long int framesize = -1;
 
 
-#ifdef SHARED
   /* Auditing checkpoint: report the PLT entering and allow the
      auditors to change the value.  */
   _dl_audit_pltenter (l, reloc_result, &value, regs, &framesize);
-#endif
 
   /* Store the frame size information.  */
   *framesizep = framesize;
@@ -349,4 +345,4 @@  _dl_profile_fixup (
   return value;
 }
 
-#endif /* PROF */
+#endif /* !defined PROF && defined SHARED */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 44a54dea07..31a608df87 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -60,10 +60,6 @@  int _dl_dynamic_weak;
 /* If nonzero print warnings about problematic situations.  */
 int _dl_verbose;
 
-/* We never do profiling.  */
-const char *_dl_profile;
-const char *_dl_profile_output;
-
 /* Names of shared object for which the RUNPATHs and RPATHs should be
    ignored.  */
 const char *_dl_inhibit_rpath;
@@ -301,11 +297,6 @@  _dl_non_dynamic_init (void)
 
   _dl_dynamic_weak = *(getenv ("LD_DYNAMIC_WEAK") ?: "") == '\0';
 
-  _dl_profile_output = getenv ("LD_PROFILE_OUTPUT");
-  if (_dl_profile_output == NULL || _dl_profile_output[0] == '\0')
-    _dl_profile_output
-      = &"/var/tmp\0/var/profile"[__libc_enable_secure ? 9 : 0];
-
   if (__libc_enable_secure)
     {
       static const char unsecure_envvars[] =
diff --git a/elf/dl-profstub.c b/elf/libc-dl-profstub.c
similarity index 100%
rename from elf/dl-profstub.c
rename to elf/libc-dl-profstub.c
diff --git a/include/dlfcn.h b/include/dlfcn.h
index ae25f05303..a44420fa37 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -135,5 +135,10 @@  extern int __dladdr1 (const void *address, Dl_info *info,
 extern int __dlinfo (void *handle, int request, void *arg);
 extern char *__dlerror (void);
 
+#ifndef SHARED
+# undef DL_CALL_FCT
+# define DL_CALL_FCT(fctp, args) ((fctp) args)
+#endif
+
 #endif
 #endif
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 4170b9269f..a56eb96a79 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -68,7 +68,6 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
     {
       ElfW(Addr) *got;
       extern void _dl_runtime_resolve (ElfW(Word));
-      extern void _dl_runtime_profile (ElfW(Word));
 
       got = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]);
       if (got[1])
@@ -83,6 +82,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
+      extern void _dl_runtime_profile (ElfW(Word));
       if ( profile)
 	{
 	   got[2] = (ElfW(Addr)) &_dl_runtime_profile;
@@ -94,6 +95,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index a3474ba741..fc82151ba2 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -122,7 +122,7 @@  _dl_runtime_resolve:
 
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-#ifndef PROF
+#if !defined PROF && defined SHARED
 	.globl _dl_runtime_profile
 	.type _dl_runtime_profile, #function
 	cfi_startproc
diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h
index ed5389e3c5..7fe2afca93 100644
--- a/sysdeps/alpha/dl-machine.h
+++ b/sysdeps/alpha/dl-machine.h
@@ -75,9 +75,7 @@  elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
 			   int lazy, int profile)
 {
   extern char _dl_runtime_resolve_new[] attribute_hidden;
-  extern char _dl_runtime_profile_new[] attribute_hidden;
   extern char _dl_runtime_resolve_old[] attribute_hidden;
-  extern char _dl_runtime_profile_old[] attribute_hidden;
 
   struct pltgot {
     char *resolve;
@@ -109,6 +107,9 @@  elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
   else
     resolve = _dl_runtime_resolve_old;
 
+#ifdef SHARED
+  extern char _dl_runtime_profile_new[] attribute_hidden;
+  extern char _dl_runtime_profile_old[] attribute_hidden;
   if (__builtin_expect (profile, 0))
     {
       if (secureplt)
@@ -123,6 +124,7 @@  elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
 	  GL(dl_profile_map) = map;
 	}
     }
+#endif
 
   pg->resolve = resolve;
   pg->link = map;
diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S
index f8c3d33906..5dffa62587 100644
--- a/sysdeps/alpha/dl-trampoline.S
+++ b/sysdeps/alpha/dl-trampoline.S
@@ -89,6 +89,7 @@  _dl_runtime_resolve_new:
 	.globl	_dl_runtime_profile_new
 	.type	_dl_runtime_profile_new, @function
 
+#ifdef SHARED
 #undef FRAMESIZE
 #define FRAMESIZE	20*8
 
@@ -207,6 +208,7 @@  _dl_runtime_profile_new:
 
 	cfi_endproc
 	.size	_dl_runtime_profile_new, .-_dl_runtime_profile_new
+#endif /* SHARED */
 
 	.align	4
 	.globl	_dl_runtime_resolve_old
@@ -340,6 +342,7 @@  _dl_runtime_resolve_old:
 	.usepv	_dl_runtime_profile_old, no
 	.type	_dl_runtime_profile_old, @function
 
+#ifdef SHARED
 	/* We save the registers in a different order than desired by
 	   .mask/.fmask, so we have to use explicit cfi directives.  */
 	cfi_startproc
@@ -538,3 +541,4 @@  _dl_runtime_profile_old:
 
 	cfi_endproc
 	.size	_dl_runtime_profile_old, .-_dl_runtime_profile_old
+#endif /* SHARED */
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index d720c02c96..a68679e653 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -65,7 +65,6 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 {
   Elf32_Addr *got;
   extern void _dl_runtime_resolve (Elf32_Word);
-  extern void _dl_runtime_profile (Elf32_Word);
 
   if (l->l_info[DT_JMPREL] && lazy)
     {
@@ -88,6 +87,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
+      extern void _dl_runtime_profile (Elf32_Word);
       if (profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
@@ -99,6 +100,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
 	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
diff --git a/sysdeps/arm/dl-trampoline.S b/sysdeps/arm/dl-trampoline.S
index 23c2476917..2df5b7ee36 100644
--- a/sysdeps/arm/dl-trampoline.S
+++ b/sysdeps/arm/dl-trampoline.S
@@ -70,7 +70,7 @@  _dl_runtime_resolve:
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 	.globl _dl_runtime_profile
 	.type _dl_runtime_profile, #function
 	CFI_SECTIONS
diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
index 4e6e70b3c9..993593de5d 100644
--- a/sysdeps/hppa/dl-machine.h
+++ b/sysdeps/hppa/dl-machine.h
@@ -195,7 +195,6 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
   end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val;
 
   extern void _dl_runtime_resolve (void);
-  extern void _dl_runtime_profile (void);
 
   /* Linking lazily */
   if (lazy)
@@ -235,22 +234,9 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	      got[1] = (Elf32_Addr) l;
 
 	      /* This function will be called to perform the relocation. */
-	      if (__builtin_expect (!profile, 1))
-		{
-		  /* If a static application called us, then _dl_runtime_resolve is not
-		     a function descriptor, but the *real* address of the function... */
-		  if((unsigned long) &_dl_runtime_resolve & 3)
-		    {
-		      got[-2] = (Elf32_Addr) ((struct fdesc *)
-				  ((unsigned long) &_dl_runtime_resolve & ~3))->ip;
-		    }
-		  else
-		    {
-		      /* Static executable! */
-		      got[-2] = (Elf32_Addr) &_dl_runtime_resolve;
-		    }
-		}
-	      else
+#ifdef SHARED
+	      extern void _dl_runtime_profile (void);
+	      if (__glibc_unlikely (profile))
 		{
 		  if (GLRO(dl_profile) != NULL
 		      && _dl_name_match_p (GLRO(dl_profile), l))
@@ -272,6 +258,22 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 		      got[-2] = (Elf32_Addr) &_dl_runtime_profile;
 		    }
 		}
+	      else
+#endif
+		{
+		  /* If a static application called us, then _dl_runtime_resolve is not
+		     a function descriptor, but the *real* address of the function... */
+		  if((unsigned long) &_dl_runtime_resolve & 3)
+		    {
+		      got[-2] = (Elf32_Addr) ((struct fdesc *)
+				  ((unsigned long) &_dl_runtime_resolve & ~3))->ip;
+		    }
+		  else
+		    {
+		      /* Static executable! */
+		      got[-2] = (Elf32_Addr) &_dl_runtime_resolve;
+		    }
+		}
 	      /* Plunk in the gp of this function descriptor so we
 		 can make the call to _dl_runtime_xxxxxx */
 	      got[-1] = ltp;
diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
index 689c6e1a40..9e904df3d2 100644
--- a/sysdeps/hppa/dl-trampoline.S
+++ b/sysdeps/hppa/dl-trampoline.S
@@ -156,6 +156,7 @@  _dl_runtime_resolve:
 	cfi_endproc
 	.size   _dl_runtime_resolve, . - _dl_runtime_resolve
 
+#ifdef SHARED
         .text
         .global _dl_runtime_profile
         .type _dl_runtime_profile,@function
@@ -359,3 +360,4 @@  L(cont):
         .PROCEND
 	cfi_endproc
 	.size   _dl_runtime_profile, . - _dl_runtime_profile
+#endif
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 18749f2ec2..07469e99b0 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -92,6 +92,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
       if (__glibc_unlikely (profile))
 	{
 	  got[2] = (shstk_enabled
@@ -105,6 +106,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
 	got[2] = (shstk_enabled
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 2d55f373b4..3604aabe87 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -70,7 +70,7 @@  _dl_runtime_resolve_shstk:
 	cfi_endproc
 	.size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 # The SHSTK compatible version.
 	.globl _dl_runtime_profile_shstk
 	.type _dl_runtime_profile_shstk, @function
diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h
index e1da3dadcb..3ef6b0ef4b 100644
--- a/sysdeps/ia64/dl-machine.h
+++ b/sysdeps/ia64/dl-machine.h
@@ -121,9 +121,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
       reserve[0] = (Elf64_Addr) l;
 
       /* This function will be called to perform the relocation.  */
-      if (!profile)
-	doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_resolve)->ip;
-      else
+#ifdef SHARED
+      if (__glibc_unlikely (profile))
 	{
 	  if (GLRO(dl_profile) != NULL
 	      && _dl_name_match_p (GLRO(dl_profile), l))
@@ -134,6 +133,11 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    }
 	  doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_profile)->ip;
 	}
+      else
+#endif
+	{
+	  doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_resolve)->ip;
+	}
 
       reserve[1] = doit;
       reserve[2] = gp;
diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S
index 54b33c8c02..10d8432c8f 100644
--- a/sysdeps/ia64/dl-trampoline.S
+++ b/sysdeps/ia64/dl-trampoline.S
@@ -188,7 +188,7 @@  END(_dl_runtime_resolve)
 #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
 #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 ENTRY(_dl_runtime_profile)
 	{ .mii
 	  .prologue
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 57913cefaa..0d17fd21e3 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -287,15 +287,16 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
       if (profile != 0)
 	{
-#if !defined __loongarch_soft_float
+# if !defined __loongarch_soft_float
 	  if (SUPPORT_LASX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
 	  else if (SUPPORT_LSX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
 	  else
-#endif
+# endif
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
 
 	  if (GLRO(dl_profile) != NULL
@@ -305,6 +306,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
index e298439d39..1da70aeb23 100644
--- a/sysdeps/loongarch/dl-trampoline.h
+++ b/sysdeps/loongarch/dl-trampoline.h
@@ -126,6 +126,7 @@  ENTRY (_dl_runtime_resolve)
 	jirl	zero, t1, 0
 END (_dl_runtime_resolve)
 
+#ifdef SHARED
 #include "dl-link.h"
 
 ENTRY (_dl_runtime_profile)
@@ -367,3 +368,4 @@  ENTRY (_dl_runtime_profile)
 	jirl	zero, ra, 0
 
 END (_dl_runtime_profile)
+#endif /* SHARED */
diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h
index 5ee586b27b..8d7e733e2a 100644
--- a/sysdeps/m68k/dl-machine.h
+++ b/sysdeps/m68k/dl-machine.h
@@ -75,7 +75,6 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 {
   Elf32_Addr *got;
   extern void _dl_runtime_resolve (Elf32_Word);
-  extern void _dl_runtime_profile (Elf32_Word);
 
   if (l->l_info[DT_JMPREL] && lazy)
     {
@@ -93,6 +92,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
+      extern void _dl_runtime_profile (Elf32_Word);
       if (profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
@@ -106,6 +107,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    }
 	}
       else
+#endif
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
 	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
diff --git a/sysdeps/m68k/dl-trampoline.S b/sysdeps/m68k/dl-trampoline.S
index dba3741400..f1b4943868 100644
--- a/sysdeps/m68k/dl-trampoline.S
+++ b/sysdeps/m68k/dl-trampoline.S
@@ -60,6 +60,7 @@  _dl_runtime_resolve:
 	cfi_endproc
 	.size _dl_runtime_resolve, . - _dl_runtime_resolve
 
+#ifdef SHARED
 	.text
 	.globl _dl_runtime_profile
 	.type _dl_runtime_profile, @function
@@ -220,3 +221,4 @@  _dl_runtime_profile:
 	rts
 	cfi_endproc
 	.size _dl_runtime_profile, . - _dl_runtime_profile
+#endif /* SHARED */
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
index ef84911ede..e6b603de94 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -226,7 +226,7 @@  __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 	  Elf32_Word dlrr;
 	  Elf32_Word offset;
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 	  dlrr = (Elf32_Word) (profile
 			       ? _dl_prof_resolve
 			       : _dl_runtime_resolve);
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index a4cad7583c..1ff46d5f8a 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -188,15 +188,19 @@  elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
       extern void _dl_runtime_resolve (void);
       extern void _dl_prof_resolve (void);
 
-      if (__glibc_likely (!profile))
-	dlrr = _dl_runtime_resolve;
-      else
+#ifdef SHARED
+      if (__glibc_unlikely (profile))
 	{
 	  if (GLRO(dl_profile) != NULL
 	      &&_dl_name_match_p (GLRO(dl_profile), map))
 	    GL(dl_profile_map) = map;
 	  dlrr = _dl_prof_resolve;
 	}
+      else
+#endif
+	{
+	  dlrr = _dl_runtime_resolve;
+	}
       got = (Elf32_Addr *) map->l_info[DT_PPC(GOT)]->d_un.d_ptr;
       glink = got[1];
       got[1] = (Elf32_Addr) dlrr;
diff --git a/sysdeps/powerpc/powerpc32/dl-trampoline.S b/sysdeps/powerpc/powerpc32/dl-trampoline.S
index 93b1673ebb..be8de0e2dc 100644
--- a/sysdeps/powerpc/powerpc32/dl-trampoline.S
+++ b/sysdeps/powerpc/powerpc32/dl-trampoline.S
@@ -70,7 +70,7 @@  _dl_runtime_resolve:
 	cfi_endproc
 	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 	.align 2
 	.globl _dl_prof_resolve
 	.type _dl_prof_resolve,@function
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index 449208e86f..601c3cba9d 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -362,13 +362,19 @@  elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
 	  Elf64_Word offset;
 	  Elf64_Addr dlrr;
 
-	  dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve
-				       : _dl_runtime_resolve);
-	  if (profile && GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), map))
-	    /* This is the object we are looking for.  Say that we really
-	       want profiling and the timers are started.  */
-	    GL(dl_profile_map) = map;
+#ifdef SHARED
+	  if (__glibc_unlikely (profile))
+	    {
+	      dlrr = (Elf64_Addr) _dl_profile_resolve;
+	      if (profile && GLRO(dl_profile) != NULL
+		  && _dl_name_match_p (GLRO(dl_profile), map))
+		/* This is the object we are looking for.  Say that we really
+		   want profiling and the timers are started.  */
+		GL(dl_profile_map) = map;
+	    }
+	  else
+#endif
+	    dlrr = (Elf64_Addr) _dl_runtime_resolve;
 
 #if _CALL_ELF != 2
 	  /* We need to stuff the address/TOC of _dl_runtime_resolve
diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S
index 1d04ec8109..b2fc2bb133 100644
--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S
+++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S
@@ -195,7 +195,7 @@  END(_dl_runtime_resolve)
    and r11 contains the link_map (from PLT0+16).  The link_map becomes
    parm1 (r3) and the index (r0) needs to be converted to an offset
    (index * 24) in parm2 (r4).  */
-#ifndef PROF
+#if !defined PROF && defined SHARED
 	.hidden _dl_profile_resolve
 ENTRY (_dl_profile_resolve, 4)
 /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h
index 100a3e05f6..b8bf2796c7 100644
--- a/sysdeps/s390/s390-32/dl-machine.h
+++ b/sysdeps/s390/s390-32/dl-machine.h
@@ -124,16 +124,17 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
+#ifdef SHARED
       if (__glibc_unlikely (profile))
 	{
-#if defined HAVE_S390_VX_ASM_SUPPORT
+# if defined HAVE_S390_VX_ASM_SUPPORT
 	  if (GLRO(dl_hwcap) & HWCAP_S390_VX)
 	    got[2] = (Elf32_Addr) &_dl_runtime_profile_vx;
 	  else
 	    got[2] = (Elf32_Addr) &_dl_runtime_profile;
-#else
+# else
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
-#endif
+# endif
 
 	  if (GLRO(dl_profile) != NULL
 	      && _dl_name_match_p (GLRO(dl_profile), l))
@@ -142,6 +143,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  /* This function will get called to fix up the GOT entry indicated by
 	     the offset on the stack, and then jump to the resolved address.  */
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
index 78fdca9d53..8093ab08d3 100644
--- a/sysdeps/s390/s390-32/dl-trampoline.h
+++ b/sysdeps/s390/s390-32/dl-trampoline.h
@@ -148,7 +148,7 @@  _dl_runtime_resolve:
 #undef F0_OFF
 #undef F2_OFF
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 # define SIZEOF_STRUCT_LA_S390_32_REGS 168
 # define REGS_OFF -264
 # define R2_OFF -264
diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h
index 9fabb09750..82259dad64 100644
--- a/sysdeps/s390/s390-64/dl-machine.h
+++ b/sysdeps/s390/s390-64/dl-machine.h
@@ -111,16 +111,17 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.	 In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.	*/
+#ifdef SHARED
       if (__glibc_unlikely (profile))
 	{
-#if defined HAVE_S390_VX_ASM_SUPPORT
+# if defined HAVE_S390_VX_ASM_SUPPORT
 	  if (GLRO(dl_hwcap) & HWCAP_S390_VX)
 	    got[2] = (Elf64_Addr) &_dl_runtime_profile_vx;
 	  else
 	    got[2] = (Elf64_Addr) &_dl_runtime_profile;
-#else
+# else
 	  got[2] = (Elf64_Addr) &_dl_runtime_profile;
-#endif
+# endif
 
 	  if (GLRO(dl_profile) != NULL
 	      && _dl_name_match_p (GLRO(dl_profile), l))
@@ -129,6 +130,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  /* This function will get called to fix up the GOT entry indicated by
 	     the offset on the stack, and then jump to the resolved address.  */
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h
index 3a7cfc5f92..61188119e6 100644
--- a/sysdeps/s390/s390-64/dl-trampoline.h
+++ b/sysdeps/s390/s390-64/dl-trampoline.h
@@ -150,7 +150,7 @@  _dl_runtime_resolve:
 #undef F4_OFF
 #undef F6_OFF
 
-#ifndef PROF
+#if !defined PROF && defined SHARED
 # define SIZEOF_STRUCT_LA_S390_64_REGS 200
 # define REGS_OFF -360
 # define R2_OFF -360
diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h
index 0e4eac42c3..e0480eae5a 100644
--- a/sysdeps/sh/dl-machine.h
+++ b/sysdeps/sh/dl-machine.h
@@ -101,6 +101,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.	 In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.	*/
+#ifdef SHARED
       if (profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
@@ -110,6 +111,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
 	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
diff --git a/sysdeps/sh/dl-trampoline.S b/sysdeps/sh/dl-trampoline.S
index 5d703341ed..ecaae34db4 100644
--- a/sysdeps/sh/dl-trampoline.S
+++ b/sysdeps/sh/dl-trampoline.S
@@ -142,6 +142,7 @@  _dl_runtime_resolve:
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
 
 
+#ifdef SHARED
 	.globl _dl_runtime_profile
 	.type _dl_runtime_profile,@function
 	cfi_startproc
@@ -428,3 +429,4 @@  _dl_runtime_profile:
 8:	.long _dl_audit_pltexit
 #endif
 	.size _dl_runtime_profile, .-_dl_runtime_profile
+#endif /* SHARED */
diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
index 9b57ae1a93..b10e541810 100644
--- a/sysdeps/sparc/sparc32/dl-machine.h
+++ b/sysdeps/sparc/sparc32/dl-machine.h
@@ -116,7 +116,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 bits of %g1 with an offset into the .rela.plt section and jump to
 	 the beginning of the PLT.  */
       plt = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
-      if (__builtin_expect(profile, 0))
+#ifdef SHARED
+      if (__glibc_unlikely (profile))
 	{
 	  rfunc = (Elf32_Addr) &_dl_runtime_profile;
 
@@ -125,6 +126,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  rfunc = (Elf32_Addr) &_dl_runtime_resolve;
 	}
diff --git a/sysdeps/sparc/sparc32/dl-trampoline.S b/sysdeps/sparc/sparc32/dl-trampoline.S
index 08ff31b474..5e7d860ae4 100644
--- a/sysdeps/sparc/sparc32/dl-trampoline.S
+++ b/sysdeps/sparc/sparc32/dl-trampoline.S
@@ -47,6 +47,7 @@  _dl_runtime_resolve:
 
 	.size	_dl_runtime_resolve, .-_dl_runtime_resolve
 
+#ifdef SHARED
 	/* For the profiling cases we pass in our stack frame
 	 * as the base of the La_sparc32_regs, so it looks
 	 * like:
@@ -185,3 +186,4 @@  _dl_runtime_profile:
 	cfi_endproc
 
 	.size	_dl_runtime_profile, .-_dl_runtime_profile
+#endif
diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
index 2f04ac550e..98469e7604 100644
--- a/sysdeps/sparc/sparc64/dl-machine.h
+++ b/sysdeps/sparc/sparc64/dl-machine.h
@@ -136,7 +136,8 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
       Elf64_Addr res0_addr, res1_addr;
       unsigned int *plt = (void *) D_PTR (l, l_info[DT_PLTGOT]);
 
-      if (__builtin_expect(profile, 0))
+#ifdef SHARED
+      if (__glibc_unlikely (profile))
 	{
 	  res0_addr = (Elf64_Addr) &_dl_runtime_profile_0;
 	  res1_addr = (Elf64_Addr) &_dl_runtime_profile_1;
@@ -146,6 +147,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  res0_addr = (Elf64_Addr) &_dl_runtime_resolve_0;
 	  res1_addr = (Elf64_Addr) &_dl_runtime_resolve_1;
diff --git a/sysdeps/sparc/sparc64/dl-trampoline.S b/sysdeps/sparc/sparc64/dl-trampoline.S
index 444690a71e..82b42681dd 100644
--- a/sysdeps/sparc/sparc64/dl-trampoline.S
+++ b/sysdeps/sparc/sparc64/dl-trampoline.S
@@ -92,6 +92,7 @@  _dl_runtime_resolve_1:
 
 	.size	_dl_runtime_resolve_1, .-_dl_runtime_resolve_1
 
+#ifdef SHARED
 	/* For the profiling cases we pass in our stack frame
 	 * as the base of the La_sparc64_regs, so it looks
 	 * like:
@@ -323,3 +324,4 @@  _dl_runtime_profile_1:
 	cfi_endproc
 
 	.size	_dl_runtime_resolve_1, .-_dl_runtime_resolve_1
+#endif
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 9ea2a70837..581a2f1a9e 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -89,6 +89,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 
       const struct cpu_features* cpu_features = __get_cpu_features ();
 
+#ifdef SHARED
       /* The got[2] entry contains the address of a function which gets
 	 called to get the address of a so far unresolved function and
 	 jump to it.  The profiling extension of the dynamic linker allows
@@ -111,6 +112,7 @@  elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
+#endif
 	{
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a6b9a1826b..3fd30d58fc 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -53,45 +53,49 @@ 
 
 #define RESTORE_AVX
 
-#define VEC_SIZE		64
-#define VMOVA			vmovdqa64
-#define VEC(i)			zmm##i
-#define _dl_runtime_profile	_dl_runtime_profile_avx512
-# define SECTION(p)		p##.evex512
-#include "dl-trampoline.h"
-#undef _dl_runtime_profile
-#undef VEC
-#undef VMOVA
-#undef VEC_SIZE
-#undef SECTION
-
-#if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
-# define VEC_SIZE		32
-# define VMOVA			vmovdqa
-# define VEC(i)			ymm##i
-# define SECTION(p)		p##.avx
-# define _dl_runtime_profile	_dl_runtime_profile_avx
+#ifdef SHARED
+# define VEC_SIZE		64
+# define VMOVA			vmovdqa64
+# define VEC(i)			zmm##i
+# define _dl_runtime_profile	_dl_runtime_profile_avx512
+#  define SECTION(p)		p##.evex512
 # include "dl-trampoline.h"
 # undef _dl_runtime_profile
 # undef VEC
 # undef VMOVA
 # undef VEC_SIZE
 # undef SECTION
-#endif
 
-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+# if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
+#  define VEC_SIZE		32
+#  define VMOVA			vmovdqa
+#  define VEC(i)			ymm##i
+#  define SECTION(p)		p##.avx
+#  define _dl_runtime_profile	_dl_runtime_profile_avx
+#  include "dl-trampoline.h"
+#  undef _dl_runtime_profile
+#  undef VEC
+#  undef VMOVA
+#  undef VEC_SIZE
+#  undef SECTION
+# endif
+
+# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
 /* movaps/movups is 1-byte shorter.  */
-# define VEC_SIZE		16
-# define VMOVA			movaps
-# define VEC(i)			xmm##i
-# define _dl_runtime_profile	_dl_runtime_profile_sse
-# undef RESTORE_AVX
-# include "dl-trampoline.h"
-# undef _dl_runtime_profile
-# undef VEC
-# undef VMOVA
-# undef VEC_SIZE
+#  define VEC_SIZE		16
+#  define VMOVA			movaps
+#  define VEC(i)			xmm##i
+#  define _dl_runtime_profile	_dl_runtime_profile_sse
+#  undef RESTORE_AVX
+#  include "dl-trampoline.h"
+#  undef _dl_runtime_profile
+#  undef VEC
+#  undef VMOVA
+#  undef VEC_SIZE
+# endif
+#endif /* SHARED */
 
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
 # define USE_FXSAVE
 # define STATE_SAVE_ALIGNMENT	16
 # define _dl_runtime_resolve	_dl_runtime_resolve_fxsave