[01/26] Linux: Enhance glibcsyscalls.h to support listing system calls

Message ID aff7fcc5ee68faa1334ccfe8066ae3c42d927fe8.1581279333.git.fweimer@redhat.com
State Superseded
Headers

Commit Message

Florian Weimer Feb. 9, 2020, 8:19 p.m. UTC
  The script can now be called to query the definition status of
system call numbers across all architectures, like this:

$ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py query-syscall sync_file_range sync_file_range2
sync_file_range:
  defined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
  undefined: arm powerpc/powerpc32 powerpc/powerpc64
sync_file_range2:
  defined: arm powerpc/powerpc32 powerpc/powerpc64
  undefined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
---
 sysdeps/unix/sysv/linux/glibcsyscalls.py | 86 +++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)
  

Comments

Adhemerval Zanella Netto Feb. 27, 2020, 10:14 p.m. UTC | #1
On 09/02/2020 17:19, Florian Weimer wrote:
> The script can now be called to query the definition status of
> system call numbers across all architectures, like this:
> 
> $ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py query-syscall sync_file_range sync_file_range2
> sync_file_range:
>   defined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
>   undefined: arm powerpc/powerpc32 powerpc/powerpc64
> sync_file_range2:
>   defined: arm powerpc/powerpc32 powerpc/powerpc64
>   undefined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
> ---
>  sysdeps/unix/sysv/linux/glibcsyscalls.py | 86 +++++++++++++++++++++++-
>  1 file changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/glibcsyscalls.py b/sysdeps/unix/sysv/linux/glibcsyscalls.py
> index de4d707e48..5beac42cb4 100644
> --- a/sysdeps/unix/sysv/linux/glibcsyscalls.py
> +++ b/sysdeps/unix/sysv/linux/glibcsyscalls.py
> @@ -17,9 +17,12 @@
>  # License along with the GNU C Library; if not, see
>  # <http://www.gnu.org/licenses/>.
>  
> +import os
>  import re

Now that this would be used as a script issued directly by the
user, I think it is worth to add a more comprehensible description
(one that can be used as __doc__). 

>  
> -import glibcextract
> +if __name__ != '__main__':
> +    # When called as a main program, this is not needed.
> +    import glibcextract
>  
>  def extract_system_call_name(macro):
>      """Convert the macro name (with __NR_) to a system call name."""
> @@ -168,3 +171,84 @@ def linux_kernel_version(cc):
>      val = glibcextract.compute_c_consts(sym_data, cc)['LINUX_VERSION_CODE']
>      val = int(val)
>      return ((val & 0xff0000) >> 16, (val & 0xff00) >> 8)
> +
> +class ArchSyscall:
> +    """Canonical name and location of a syscall header."""
> +
> +    def __init__(self, name, path):
> +        self.name = name
> +        self.path = path
> +
> +    def __repr__(self):
> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
> +            self.name, self.path)
> +
> +def list_arch_syscall_headers(topdir):
> +    """A generator which returns all the ArchSyscall objects in a tree."""
> +
> +    sysdeps = os.path.join(topdir, 'sysdeps', 'unix', 'sysv', 'linux')
> +    for root, dirs, files in os.walk(sysdeps):
> +        if root != sysdeps:
> +            for filename in files:
> +                if filename == 'arch-syscall.h':
> +                    yield ArchSyscall(
> +                        name=os.path.relpath(root, sysdeps),
> +                        path=os.path.join(root, filename))
> +
> +def __main():
> +    """Entry point when called as the main program."""
> +
> +    import sys
> +
> +    # Top-level directory of the source tree.
> +    topdir = os.path.realpath(os.path.join(
> +        os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))
> +
> +    def usage(status):
> +        print('usage: glibcsyscalls list-headers')
> +        print('       glibcsyscalls query-syscall SYSCALL...')
> +        sys.exit(status)
> +
> +    if len(sys.argv) <= 1:
> +        usage(0)
> +
> +    command = sys.argv[1]
> +    if command == 'list-headers':
> +        # Print the absolute paths of all arch-syscall.h header files.
> +        if len(sys.argv) != 2:
> +            usage(1)
> +        for header in sorted([syscall.path for syscall
> +                              in list_arch_syscall_headers(topdir)]):
> +            print(header)
> +
> +    elif command == 'query-syscall':
> +        # Summarize the implementation status of the specified system calls.
> +        if len(sys.argv) < 3:
> +            usage(1)

Why not follow other scripts and use argparser as well? Something like:

--
def main(argv):
    """Entry point when called as the main program."""

    import argparse

    # Top-level directory of the source tree.
    topdir = os.path.realpath(os.path.join(
        os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))

    def get_parser():
        parser = argparse.ArgumentParser(description=__doc__)
        subparsers = parser.add_subparsers(dest='command', required=True)
        parser_a = subparsers.add_parser('list-headers');
        parser_b = subparsers.add_parser('query-syscall');
        parser_b.add_argument('syscalls',
                              help='Which syscalls to check',
                              nargs='+')
        return parser
    parser = get_parser();
    opts = parser.parse_args(argv)

    if opts.command == 'list-headers':
    [...]
--

> +
> +        # List of system call tables.
> +        tables = sorted(list_arch_syscall_headers(topdir),
> +                          key=lambda syscall: syscall.name)
> +        for table in tables:
> +            table.numbers = load_arch_syscall_header(table.path)
> +
> +        for nr in sys.argv[2:]:
> +            defined = [table.name for table in tables
> +                           if nr in table.numbers]
> +            undefined = [table.name for table in tables
> +                             if nr not in table.numbers]
> +            if not defined:
> +                print('{}: not defined on any architecture'.format(nr))
> +            elif not undefined:
> +                print('{}: defined on all architectures'.format(nr))
> +            else:
> +                print('{}:'.format(nr))
> +                print('  defined: {}'.format(' '.join(defined)))
> +                print('  undefined: {}'.format(' '.join(undefined)))
> +
> +    else:
> +        # Unrecognized command.
> +        usage(1)
> +
> +if __name__ == '__main__':
> +    __main()
> 

I don't think double underscore is required here.
  
Florian Weimer May 11, 2020, 10:10 a.m. UTC | #2
* Adhemerval Zanella:

> Now that this would be used as a script issued directly by the
> user, I think it is worth to add a more comprehensible description
> (one that can be used as __doc__). 

Sorry, I mised that before posting the new version:

  <https://sourceware.org/pipermail/libc-alpha/2020-May/113777.html>

> Why not follow other scripts and use argparser as well? Something like:
>
> --
> def main(argv):
>     """Entry point when called as the main program."""
>
>     import argparse
>
>     # Top-level directory of the source tree.
>     topdir = os.path.realpath(os.path.join(
>         os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))
>
>     def get_parser():
>         parser = argparse.ArgumentParser(description=__doc__)
>         subparsers = parser.add_subparsers(dest='command', required=True)
>         parser_a = subparsers.add_parser('list-headers');
>         parser_b = subparsers.add_parser('query-syscall');
>         parser_b.add_argument('syscalls',
>                               help='Which syscalls to check',
>                               nargs='+')
>         return parser
>     parser = get_parser();
>     opts = parser.parse_args(argv)
>
>     if opts.command == 'list-headers':
>     [...]

I just didn't know enough about the argparse module to come up with this.

> I don't think double underscore is required here.

I like it because it prevents exporting.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/glibcsyscalls.py b/sysdeps/unix/sysv/linux/glibcsyscalls.py
index de4d707e48..5beac42cb4 100644
--- a/sysdeps/unix/sysv/linux/glibcsyscalls.py
+++ b/sysdeps/unix/sysv/linux/glibcsyscalls.py
@@ -17,9 +17,12 @@ 
 # License along with the GNU C Library; if not, see
 # <http://www.gnu.org/licenses/>.
 
+import os
 import re
 
-import glibcextract
+if __name__ != '__main__':
+    # When called as a main program, this is not needed.
+    import glibcextract
 
 def extract_system_call_name(macro):
     """Convert the macro name (with __NR_) to a system call name."""
@@ -168,3 +171,84 @@  def linux_kernel_version(cc):
     val = glibcextract.compute_c_consts(sym_data, cc)['LINUX_VERSION_CODE']
     val = int(val)
     return ((val & 0xff0000) >> 16, (val & 0xff00) >> 8)
+
+class ArchSyscall:
+    """Canonical name and location of a syscall header."""
+
+    def __init__(self, name, path):
+        self.name = name
+        self.path = path
+
+    def __repr__(self):
+        return 'ArchSyscall(name={!r}, patch={!r})'.format(
+            self.name, self.path)
+
+def list_arch_syscall_headers(topdir):
+    """A generator which returns all the ArchSyscall objects in a tree."""
+
+    sysdeps = os.path.join(topdir, 'sysdeps', 'unix', 'sysv', 'linux')
+    for root, dirs, files in os.walk(sysdeps):
+        if root != sysdeps:
+            for filename in files:
+                if filename == 'arch-syscall.h':
+                    yield ArchSyscall(
+                        name=os.path.relpath(root, sysdeps),
+                        path=os.path.join(root, filename))
+
+def __main():
+    """Entry point when called as the main program."""
+
+    import sys
+
+    # Top-level directory of the source tree.
+    topdir = os.path.realpath(os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))
+
+    def usage(status):
+        print('usage: glibcsyscalls list-headers')
+        print('       glibcsyscalls query-syscall SYSCALL...')
+        sys.exit(status)
+
+    if len(sys.argv) <= 1:
+        usage(0)
+
+    command = sys.argv[1]
+    if command == 'list-headers':
+        # Print the absolute paths of all arch-syscall.h header files.
+        if len(sys.argv) != 2:
+            usage(1)
+        for header in sorted([syscall.path for syscall
+                              in list_arch_syscall_headers(topdir)]):
+            print(header)
+
+    elif command == 'query-syscall':
+        # Summarize the implementation status of the specified system calls.
+        if len(sys.argv) < 3:
+            usage(1)
+
+        # List of system call tables.
+        tables = sorted(list_arch_syscall_headers(topdir),
+                          key=lambda syscall: syscall.name)
+        for table in tables:
+            table.numbers = load_arch_syscall_header(table.path)
+
+        for nr in sys.argv[2:]:
+            defined = [table.name for table in tables
+                           if nr in table.numbers]
+            undefined = [table.name for table in tables
+                             if nr not in table.numbers]
+            if not defined:
+                print('{}: not defined on any architecture'.format(nr))
+            elif not undefined:
+                print('{}: defined on all architectures'.format(nr))
+            else:
+                print('{}:'.format(nr))
+                print('  defined: {}'.format(' '.join(defined)))
+                print('  undefined: {}'.format(' '.join(undefined)))
+
+    else:
+        # Unrecognized command.
+        usage(1)
+
+if __name__ == '__main__':
+    __main()