Linux: Enhance glibcsyscalls.py to support listing system calls

Message ID 87y2pyg4x4.fsf@mid.deneb.enyo.de
State Committed
Headers
Series Linux: Enhance glibcsyscalls.py to support listing system calls |

Commit Message

Florian Weimer May 11, 2020, 10:08 a.m. UTC
  The script can now be called to query the definition status of
system call numbers across all architectures, like this:

$ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py query-syscall sync_file_range sync_file_range2
sync_file_range:
  defined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
  undefined: arm powerpc/powerpc32 powerpc/powerpc64
sync_file_range2:
  defined: arm powerpc/powerpc32 powerpc/powerpc64
  undefined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32

This command lists the headers containing the system call numbers:

$ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py list-headers

The argument parser code is based on a suggestion from Adhemerval Zanella.

-----
 sysdeps/unix/sysv/linux/glibcsyscalls.py | 84 +++++++++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)
  

Comments

Adhemerval Zanella May 11, 2020, 8:06 p.m. UTC | #1
On 11/05/2020 07:08, Florian Weimer wrote:
> The script can now be called to query the definition status of
> system call numbers across all architectures, like this:
> 
> $ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py query-syscall sync_file_range sync_file_range2
> sync_file_range:
>   defined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
>   undefined: arm powerpc/powerpc32 powerpc/powerpc64
> sync_file_range2:
>   defined: arm powerpc/powerpc32 powerpc/powerpc64
>   undefined: aarch64 alpha csky hppa i386 ia64 m68k microblaze mips/mips32 mips/mips64/n32 mips/mips64/n64 nios2 riscv/rv64 s390/s390-32 s390/s390-64 sh sparc/sparc32 sparc/sparc64 x86_64/64 x86_64/x32
> 
> This command lists the headers containing the system call numbers:
> 
> $ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py list-headers
> 
> The argument parser code is based on a suggestion from Adhemerval Zanella.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> 
> -----
>  sysdeps/unix/sysv/linux/glibcsyscalls.py | 84 +++++++++++++++++++++++++++++++-
>  1 file changed, 83 insertions(+), 1 deletion(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/glibcsyscalls.py b/sysdeps/unix/sysv/linux/glibcsyscalls.py
> index de4d707e48..8d45cd4333 100644
> --- a/sysdeps/unix/sysv/linux/glibcsyscalls.py
> +++ b/sysdeps/unix/sysv/linux/glibcsyscalls.py
> @@ -17,9 +17,12 @@
>  # License along with the GNU C Library; if not, see
>  # <http://www.gnu.org/licenses/>.
>  
> +import os
>  import re
>  
> -import glibcextract
> +if __name__ != '__main__':
> +    # When called as a main program, this is not needed.
> +    import glibcextract
>  
>  def extract_system_call_name(macro):
>      """Convert the macro name (with __NR_) to a system call name."""
> @@ -168,3 +171,82 @@ def linux_kernel_version(cc):
>      val = glibcextract.compute_c_consts(sym_data, cc)['LINUX_VERSION_CODE']
>      val = int(val)
>      return ((val & 0xff0000) >> 16, (val & 0xff00) >> 8)
> +
> +class ArchSyscall:
> +    """Canonical name and location of a syscall header."""
> +
> +    def __init__(self, name, path):
> +        self.name = name
> +        self.path = path
> +
> +    def __repr__(self):
> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
> +            self.name, self.path)

I am missing why you need to call the repr() for the str objects issued
by the generator below.

> +
> +def list_arch_syscall_headers(topdir):
> +    """A generator which returns all the ArchSyscall objects in a tree."""
> +
> +    sysdeps = os.path.join(topdir, 'sysdeps', 'unix', 'sysv', 'linux')
> +    for root, dirs, files in os.walk(sysdeps):
> +        if root != sysdeps:
> +            for filename in files:
> +                if filename == 'arch-syscall.h':
> +                    yield ArchSyscall(
> +                        name=os.path.relpath(root, sysdeps),
> +                        path=os.path.join(root, filename))
> +
> +def __main():
> +    """Entry point when called as the main program."""
> +
> +    import argparse
> +    import sys
> +
> +    # Top-level directory of the source tree.
> +    topdir = os.path.realpath(os.path.join(
> +        os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))
> +
> +    def get_parser():
> +        parser = argparse.ArgumentParser(description=__doc__)
> +        subparsers = parser.add_subparsers(dest='command', required=True)
> +        subparsers.add_parser('list-headers',
> +            help='Print the absolute paths of all arch-syscall.h header files')
> +        subparser = subparsers.add_parser('query-syscall',
> +            help='Summarize the implementation status of system calls')
> +        subparser.add_argument('syscalls', help='Which syscalls to check',
> +                               nargs='+')
> +        return parser
> +    parser = get_parser()
> +    args = parser.parse_args()
> +
> +    if args.command == 'list-headers':
> +        for header in sorted([syscall.path for syscall
> +                              in list_arch_syscall_headers(topdir)]):
> +            print(header)
> +
> +    elif args.command == 'query-syscall':
> +        # List of system call tables.
> +        tables = sorted(list_arch_syscall_headers(topdir),
> +                          key=lambda syscall: syscall.name)
> +        for table in tables:
> +            table.numbers = load_arch_syscall_header(table.path)
> +
> +        for nr in args.syscalls:
> +            defined = [table.name for table in tables
> +                           if nr in table.numbers]
> +            undefined = [table.name for table in tables
> +                             if nr not in table.numbers]
> +            if not defined:
> +                print('{}: not defined on any architecture'.format(nr))
> +            elif not undefined:
> +                print('{}: defined on all architectures'.format(nr))
> +            else:
> +                print('{}:'.format(nr))
> +                print('  defined: {}'.format(' '.join(defined)))
> +                print('  undefined: {}'.format(' '.join(undefined)))
> +
> +    else:
> +        # Unrecognized command.
> +        usage(1)
> +
> +if __name__ == '__main__':
> +    __main()
>
  
Florian Weimer May 11, 2020, 8:13 p.m. UTC | #2
* Adhemerval Zanella via Libc-alpha:

>> +class ArchSyscall:
>> +    """Canonical name and location of a syscall header."""
>> +
>> +    def __init__(self, name, path):
>> +        self.name = name
>> +        self.path = path
>> +
>> +    def __repr__(self):
>> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
>> +            self.name, self.path)
>
> I am missing why you need to call the repr() for the str objects issued
> by the generator below.

I don't, but I want to make sure that we get good debugging output if
needed.

Should I still make the __doc__ change?  The --help output from the
script itself is quite reasonable.
  
Adhemerval Zanella May 11, 2020, 8:14 p.m. UTC | #3
On 11/05/2020 17:13, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>>> +class ArchSyscall:
>>> +    """Canonical name and location of a syscall header."""
>>> +
>>> +    def __init__(self, name, path):
>>> +        self.name = name
>>> +        self.path = path
>>> +
>>> +    def __repr__(self):
>>> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
>>> +            self.name, self.path)
>>
>> I am missing why you need to call the repr() for the str objects issued
>> by the generator below.
> 
> I don't, but I want to make sure that we get good debugging output if
> needed.
> 
> Should I still make the __doc__ change?  The --help output from the
> script itself is quite reasonable.
> 

I am fine with the __doc__ change.
  
Florian Weimer May 11, 2020, 8:17 p.m. UTC | #4
* Adhemerval Zanella:

> On 11/05/2020 17:13, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>> 
>>>> +class ArchSyscall:
>>>> +    """Canonical name and location of a syscall header."""
>>>> +
>>>> +    def __init__(self, name, path):
>>>> +        self.name = name
>>>> +        self.path = path
>>>> +
>>>> +    def __repr__(self):
>>>> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
>>>> +            self.name, self.path)
>>>
>>> I am missing why you need to call the repr() for the str objects issued
>>> by the generator below.
>> 
>> I don't, but I want to make sure that we get good debugging output if
>> needed.
>> 
>> Should I still make the __doc__ change?  The --help output from the
>> script itself is quite reasonable.
>> 
>
> I am fine with the __doc__ change.

Uhm, which __doc__ change?  There isn't one.

Output from the script is quite reasonable without it:

$ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py 
usage: glibcsyscalls.py [-h] {list-headers,query-syscall} ...
glibcsyscalls.py: error: the following arguments are required: command
$ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py  --help
usage: glibcsyscalls.py [-h] {list-headers,query-syscall} ...

positional arguments:
  {list-headers,query-syscall}
    list-headers        Print the absolute paths of all arch-syscall.h header
                        files
    query-syscall       Summarize the implementation status of system calls

optional arguments:
  -h, --help            show this help message and exit
  
Adhemerval Zanella May 11, 2020, 8:21 p.m. UTC | #5
On 11/05/2020 17:17, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 11/05/2020 17:13, Florian Weimer wrote:
>>> * Adhemerval Zanella via Libc-alpha:
>>>
>>>>> +class ArchSyscall:
>>>>> +    """Canonical name and location of a syscall header."""
>>>>> +
>>>>> +    def __init__(self, name, path):
>>>>> +        self.name = name
>>>>> +        self.path = path
>>>>> +
>>>>> +    def __repr__(self):
>>>>> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
>>>>> +            self.name, self.path)
>>>>
>>>> I am missing why you need to call the repr() for the str objects issued
>>>> by the generator below.
>>>
>>> I don't, but I want to make sure that we get good debugging output if
>>> needed.
>>>
>>> Should I still make the __doc__ change?  The --help output from the
>>> script itself is quite reasonable.
>>>
>>
>> I am fine with the __doc__ change.
> 
> Uhm, which __doc__ change?  There isn't one.

Sorry, I meant patch is ok as is.

> 
> Output from the script is quite reasonable without it:
> 
> $ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py 
> usage: glibcsyscalls.py [-h] {list-headers,query-syscall} ...
> glibcsyscalls.py: error: the following arguments are required: command
> $ python3 sysdeps/unix/sysv/linux/glibcsyscalls.py  --help
> usage: glibcsyscalls.py [-h] {list-headers,query-syscall} ...
> 
> positional arguments:
>   {list-headers,query-syscall}
>     list-headers        Print the absolute paths of all arch-syscall.h header
>                         files
>     query-syscall       Summarize the implementation status of system calls
> 
> optional arguments:
>   -h, --help            show this help message and exit
>
  
Florian Weimer May 11, 2020, 8:22 p.m. UTC | #6
* Adhemerval Zanella:

> On 11/05/2020 17:17, Florian Weimer wrote:
>> * Adhemerval Zanella:
>> 
>>> On 11/05/2020 17:13, Florian Weimer wrote:
>>>> * Adhemerval Zanella via Libc-alpha:
>>>>
>>>>>> +class ArchSyscall:
>>>>>> +    """Canonical name and location of a syscall header."""
>>>>>> +
>>>>>> +    def __init__(self, name, path):
>>>>>> +        self.name = name
>>>>>> +        self.path = path
>>>>>> +
>>>>>> +    def __repr__(self):
>>>>>> +        return 'ArchSyscall(name={!r}, patch={!r})'.format(
>>>>>> +            self.name, self.path)
>>>>>
>>>>> I am missing why you need to call the repr() for the str objects issued
>>>>> by the generator below.
>>>>
>>>> I don't, but I want to make sure that we get good debugging output if
>>>> needed.
>>>>
>>>> Should I still make the __doc__ change?  The --help output from the
>>>> script itself is quite reasonable.
>>>>
>>>
>>> I am fine with the __doc__ change.
>> 
>> Uhm, which __doc__ change?  There isn't one.
>
> Sorry, I meant patch is ok as is.

Great, thanks for the review.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/glibcsyscalls.py b/sysdeps/unix/sysv/linux/glibcsyscalls.py
index de4d707e48..8d45cd4333 100644
--- a/sysdeps/unix/sysv/linux/glibcsyscalls.py
+++ b/sysdeps/unix/sysv/linux/glibcsyscalls.py
@@ -17,9 +17,12 @@ 
 # License along with the GNU C Library; if not, see
 # <http://www.gnu.org/licenses/>.
 
+import os
 import re
 
-import glibcextract
+if __name__ != '__main__':
+    # When called as a main program, this is not needed.
+    import glibcextract
 
 def extract_system_call_name(macro):
     """Convert the macro name (with __NR_) to a system call name."""
@@ -168,3 +171,82 @@  def linux_kernel_version(cc):
     val = glibcextract.compute_c_consts(sym_data, cc)['LINUX_VERSION_CODE']
     val = int(val)
     return ((val & 0xff0000) >> 16, (val & 0xff00) >> 8)
+
+class ArchSyscall:
+    """Canonical name and location of a syscall header."""
+
+    def __init__(self, name, path):
+        self.name = name
+        self.path = path
+
+    def __repr__(self):
+        return 'ArchSyscall(name={!r}, patch={!r})'.format(
+            self.name, self.path)
+
+def list_arch_syscall_headers(topdir):
+    """A generator which returns all the ArchSyscall objects in a tree."""
+
+    sysdeps = os.path.join(topdir, 'sysdeps', 'unix', 'sysv', 'linux')
+    for root, dirs, files in os.walk(sysdeps):
+        if root != sysdeps:
+            for filename in files:
+                if filename == 'arch-syscall.h':
+                    yield ArchSyscall(
+                        name=os.path.relpath(root, sysdeps),
+                        path=os.path.join(root, filename))
+
+def __main():
+    """Entry point when called as the main program."""
+
+    import argparse
+    import sys
+
+    # Top-level directory of the source tree.
+    topdir = os.path.realpath(os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), *('..',) * 4))
+
+    def get_parser():
+        parser = argparse.ArgumentParser(description=__doc__)
+        subparsers = parser.add_subparsers(dest='command', required=True)
+        subparsers.add_parser('list-headers',
+            help='Print the absolute paths of all arch-syscall.h header files')
+        subparser = subparsers.add_parser('query-syscall',
+            help='Summarize the implementation status of system calls')
+        subparser.add_argument('syscalls', help='Which syscalls to check',
+                               nargs='+')
+        return parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    if args.command == 'list-headers':
+        for header in sorted([syscall.path for syscall
+                              in list_arch_syscall_headers(topdir)]):
+            print(header)
+
+    elif args.command == 'query-syscall':
+        # List of system call tables.
+        tables = sorted(list_arch_syscall_headers(topdir),
+                          key=lambda syscall: syscall.name)
+        for table in tables:
+            table.numbers = load_arch_syscall_header(table.path)
+
+        for nr in args.syscalls:
+            defined = [table.name for table in tables
+                           if nr in table.numbers]
+            undefined = [table.name for table in tables
+                             if nr not in table.numbers]
+            if not defined:
+                print('{}: not defined on any architecture'.format(nr))
+            elif not undefined:
+                print('{}: defined on all architectures'.format(nr))
+            else:
+                print('{}:'.format(nr))
+                print('  defined: {}'.format(' '.join(defined)))
+                print('  undefined: {}'.format(' '.join(undefined)))
+
+    else:
+        # Unrecognized command.
+        usage(1)
+
+if __name__ == '__main__':
+    __main()