New feature proposal: pureglibc
Commit Message
glibc, like many standard C library implementations, can be seen as the union of two libraries:
* the actual C library which provides high level services, using the system calls
of the underlying kernel,
* the interface library to the kernel, which dispatches the system call
to the kernel and retrieves the results.
A pure C library is a library providing only the former item.
It is useful to have a pure C library when a process needs to use self-virtualization.
In view-os, for example, using self-virtualization I can run modules like umfuseext2
further virtualizing the system calls generated by the ext2fs and glibc libraries.
In this way the file system image can be something generated on-the-fly instead of a
file as requested by the ext2fs design.
This is just an example. Many other applications can be found using coding creativity.
I am currently using a tricky and partial implementation of purelibc as an
overlay shared library redefining some glibc functions.
This is clearly a workaround.
https://sourceforge.net/p/view-os/code/HEAD/tree/trunk/purelibc/
http://wiki.v2.cs.unibo.it/wiki/index.php?title=PureLibc
The patch here attached is a draft implementation of a "pureglibc":
a global variable permits to divert all the system calls generated
by glibc to a process-provided function, glibc becomes in this way a 'pure' C library
when this variable is non-NULL: system calls can be traced and virtualized.
The patch currently implements pureglibc for the x86_64 architecture only.
I am posting this as a proposal for a new feature, asking for comments and
for alternative (effective) ways to implement the same feature.
Thank you.
renzo
Here below: two "hello world" examples, one for tracing, one for self-virtualization, and the patch.
Example #1:
System Call tracing:
------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
void printsyscall(long number, long nr, long *args) {
char buf[256];
int i;
sprintf(buf, "syscall %d -", number);
for (i = 0; i < nr; i++)
sprintf(buf + strlen(buf), " %016x", args[i]);
sprintf(buf + strlen(buf), "\n");
syscall(__NR_write, 1, buf, strlen(buf));
}
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
printsyscall(number, nr, arg);
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
int main() {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
printf("hello world\n");
}
-------
Example #2:
Virtualization (when this shared object is preloaded
"open" or "openat" syscalls of /etc/passwd, open /tmp/passwd instead.)
-------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
if (number == __NR_open && strcmp((char *) arg[0], "/etc/passwd") == 0)
arg[0] = (long) "/tmp/passwd";
if (number == __NR_openat && strcmp((char *) arg[1], "/etc/passwd") == 0)
arg[1] = (long) "/tmp/passwd";
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
__attribute__((constructor))
void init(void) {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
}
--------
Here is the patch:
2018-08-21 Renzo Davoli <renzo@cs.unibo.it>
Comments
On Tue, 22 Aug 2017, Renzo Davoli wrote:
> A pure C library is a library providing only the former item.
I'd expect such a library not to use any existing OS sysdeps directory in
glibc, or to use any of the existing syscalls code; rather, it would look
like a new OS to glibc and have an associated GCC port for compiling for
that OS. We removed the NaCl port, but it may illustrate the sort of
things that are relevant (as may the not-in-tree WebAssembly port that
also exists). Of course glibc builds and installs various executables, so
there would be a question of how generic executables can be built and
installed for such an OS environment (maybe one option for running such
executables would be an execution environment that sets a breakpoint on
the hook used for interfacing with the system and acts accordingly when
that breakpoint is reached, in addition to execution environments that
substitute an alternative implementation of that hook for direct
execution).
@@ -156,7 +156,7 @@ libc {
gnu_dev_major; gnu_dev_minor; gnu_dev_makedev;
}
GLIBC_2.26 {
- preadv2; preadv64v2; pwritev2; pwritev64v2;
+ preadv2; preadv64v2; pwritev2; pwritev64v2; __pure_syscall; pure_syscall;
}
GLIBC_PRIVATE {
__madvise;
@@ -24,6 +24,8 @@ char *__progname = (char *) "";
weak_alias (__progname_full, program_invocation_name)
weak_alias (__progname, program_invocation_short_name)
+long int (*__pure_syscall)(long name, long nr, ...) = NULL;
+weak_alias (__pure_syscall, pure_syscall)
void
__init_misc (int argc, char **argv, char **envp)
@@ -193,7 +193,7 @@
# undef INLINE_SYSCALL
# define INLINE_SYSCALL(name, nr, args...) \
({ \
- unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args); \
+ unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, ##args); \
if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, ))) \
{ \
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, )); \
@@ -221,6 +221,8 @@
/* Registers clobbered by syscall. */
# define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
+extern long int (*__pure_syscall)(long name, long nr, ...);
+
# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
unsigned long int resultvar; \
@@ -233,7 +235,13 @@
(long int) resultvar; })
# undef INTERNAL_SYSCALL
# define INTERNAL_SYSCALL(name, err, nr, args...) \
- INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+ ({ \
+ long int resultvar; \
+ if (__glibc_unlikely (__pure_syscall != NULL)) \
+ resultvar = __pure_syscall(__NR_##name, nr, ##args); \
+ else \
+ resultvar = INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args); \
+ resultvar; })
# define INTERNAL_SYSCALL_NCS_TYPES(name, err, nr, args...) \
({ \