[2/4] Add an include-checking script

Message ID 20240417-header-check-v1-2-a60f610f5bcc@adacore.com
State New
Headers
Series Add include guard checker and reformatter |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed

Commit Message

Tom Tromey April 17, 2024, 10:05 p.m. UTC
  This adds a new Python script that checks the header guards of all gdb
source files.  It enforces a fairly strict formatting and naming
scheme.

In particular, for a file "x/y-z.h" (relative to the repository root),
the include guard will be named "X_Y_Z_H".  Only the '#ifndef' form is
allowed, not "#if !defined(...)".  The trailing comment on the
"#endif" is also required.

The script also tries to update files that appear to have the required
lines if they are in the wrong form or use the wrong name.
---
 gdb/check-include-guards.py | 125 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
  

Comments

Tom de Vries April 18, 2024, 12:54 p.m. UTC | #1
On 4/18/24 00:05, Tom Tromey wrote:
> This adds a new Python script that checks the header guards of all gdb
> source files.  It enforces a fairly strict formatting and naming
> scheme.
> 
> In particular, for a file "x/y-z.h" (relative to the repository root),
> the include guard will be named "X_Y_Z_H".  Only the '#ifndef' form is
> allowed, not "#if !defined(...)".  The trailing comment on the
> "#endif" is also required.
> 
> The script also tries to update files that appear to have the required
> lines if they are in the wrong form or use the wrong name.

I tried out this script, starting from an empty gdb/bla.h and minimally 
following the process ended up with:
...
$ cat gdb/bla.h
/* bla.  */

#ifndef GDB_BLA_H
#define GDB_BLA_H
extern int a;
#endif /* GDB_BLA_H */
...

The only thing I ran into was that the corner-case of an empty file:
...
$ rm gdb/bla.h
$ touch gdb/bla.h
$ gdb/check-include-guards.py gdb/bla.h
Traceback (most recent call last):
   File "gdb/check-include-guards.py", line 123, in <module>
     check_header(filename)
   File "gdb/check-include-guards.py", line 73, in check_header
     if "THIS FILE IS GENERATED" in contents[0]:
IndexError: list index out of range
...

Anyway, I like the series, though I haven't reviewed in detail.

I've also done a test run, so ...

Tested-By: Tom de Vries <tdevries@suse.de>

Thanks,
- Tom

> ---
>   gdb/check-include-guards.py | 125 ++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 125 insertions(+)
> 
> diff --git a/gdb/check-include-guards.py b/gdb/check-include-guards.py
> new file mode 100755
> index 00000000000..5c61fc90bb0
> --- /dev/null
> +++ b/gdb/check-include-guards.py
> @@ -0,0 +1,125 @@
> +#!/usr/bin/env python3
> +
> +# Copyright (C) 2024 Free Software Foundation, Inc.
> +#
> +# This file is part of GDB.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +# This is intended to be run from pre-commit.  You can also run it by
> +# hand by passing repository-relative filenames to it, like:
> +#   ./gdb/check-include-guards.py gdb/*.h
> +
> +
> +import glob
> +import re
> +import sys
> +
> +status = 0
> +
> +DEF = re.compile("^#ifndef ([A-Za-z0-9_]+)\n")
> +OLDDEF = re.compile("^#if !defined *\\(([A-Za-z0-9_]+)\\)\n")
> +
> +
> +def failure(filename, ndx, text):
> +    print(filename + ":" + str(ndx + 1) + ": " + text)
> +    global status
> +    status = 1
> +
> +
> +def headers(dirname):
> +    return glob.iglob(dirname + "/*.h")
> +
> +
> +def skip_comments_and_blanks(ndx, contents):
> +    while ndx < len(contents) and contents[ndx].startswith("/*"):
> +        while ndx < len(contents):
> +            ndx += 1
> +            if contents[ndx - 1].endswith("*/\n"):
> +                break
> +        # Skip blank lines.
> +        while ndx < len(contents):
> +            if contents[ndx].strip() != "":
> +                break
> +            ndx += 1
> +    return ndx
> +
> +
> +def write_header(filename, contents):
> +    with open(filename, "w") as f:
> +        f.writelines(contents)
> +
> +
> +def check_header(filename):
> +    # Turn x/y-z.h into X_Y_Z_H.
> +    assert filename.endswith(".h")
> +    expected = filename.replace("-", "_")
> +    expected = expected.replace(".", "_")
> +    expected = expected.replace("/", "_")
> +    expected = expected.upper()
> +    with open(filename) as f:
> +        contents = list(f)
> +    if "THIS FILE IS GENERATED" in contents[0]:
> +        # Ignore.
> +        return
> +    if not contents[0].startswith("/*"):
> +        failure(filename, 0, "header should start with comment")
> +        return
> +    i = skip_comments_and_blanks(0, contents)
> +    if i == len(contents):
> +        failure(filename, i, "unterminated intro comment or missing body")
> +        return
> +    m = DEF.match(contents[i])
> +    force_rewrite = False
> +    if not m:
> +        m = OLDDEF.match(contents[i])
> +        if not m:
> +            failure(filename, i, "no header guard")
> +            return
> +        force_rewrite = True
> +    symbol = m.group(1)
> +    updated = False
> +    if symbol != expected:
> +        failure(filename, i, "symbol should be: " + expected)
> +        force_rewrite = True
> +    if force_rewrite:
> +        contents[i] = "#ifndef " + expected + "\n"
> +        updated = True
> +    i += 1
> +    if i == len(contents):
> +        failure(filename, i, "premature EOF")
> +        return
> +    if not contents[i].startswith("#define "):
> +        failure(filename, i, "no define of header guard")
> +        return
> +    if contents[i] != "#define " + expected + "\n":
> +        failure(filename, i, "wrong symbol name in define")
> +        contents[i] = "#define " + expected + "\n"
> +        updated = True
> +    i = len(contents) - 1
> +    if not contents[i].startswith("#endif"):
> +        failure(filename, i, "no trailing endif")
> +        return
> +    if contents[i] != "#endif /* " + expected + " */\n":
> +        failure(filename, i, "incorrect endif")
> +        contents[i] = "#endif /* " + expected + " */\n"
> +        updated = True
> +    if updated:
> +        write_header(filename, contents)
> +
> +
> +for filename in sys.argv[1:]:
> +    check_header(filename)
> +
> +sys.exit(status)
>
  
Simon Marchi April 18, 2024, 3:29 p.m. UTC | #2
On 2024-04-17 18:05, Tom Tromey wrote:
> This adds a new Python script that checks the header guards of all gdb
> source files.  It enforces a fairly strict formatting and naming
> scheme.
> 
> In particular, for a file "x/y-z.h" (relative to the repository root),
> the include guard will be named "X_Y_Z_H".  Only the '#ifndef' form is
> allowed, not "#if !defined(...)".  The trailing comment on the
> "#endif" is also required.
> 
> The script also tries to update files that appear to have the required
> lines if they are in the wrong form or use the wrong name.


My editor is configured to complain about missing type hints, so I added
them.  I pasted a diff at the end of this message, if you want to add
them.

I have some minor comments, but as long as the script does the job, I'm
fine with it.

> ---
>  gdb/check-include-guards.py | 125 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 125 insertions(+)
> 
> diff --git a/gdb/check-include-guards.py b/gdb/check-include-guards.py
> new file mode 100755
> index 00000000000..5c61fc90bb0
> --- /dev/null
> +++ b/gdb/check-include-guards.py
> @@ -0,0 +1,125 @@
> +#!/usr/bin/env python3
> +
> +# Copyright (C) 2024 Free Software Foundation, Inc.
> +#
> +# This file is part of GDB.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +# This is intended to be run from pre-commit.  You can also run it by
> +# hand by passing repository-relative filenames to it, like:
> +#   ./gdb/check-include-guards.py gdb/*.h
> +
> +
> +import glob
> +import re
> +import sys
> +
> +status = 0
> +
> +DEF = re.compile("^#ifndef ([A-Za-z0-9_]+)\n")
> +OLDDEF = re.compile("^#if !defined *\\(([A-Za-z0-9_]+)\\)\n")
> +
> +
> +def failure(filename, ndx, text):
> +    print(filename + ":" + str(ndx + 1) + ": " + text)

Should this print to stderr?

> +    global status
> +    status = 1

I would prefer if failure returned 1 (or the callers could return 1
themselves).  Then, the top-level could do something like:

  for filename in sys.argv[1:]:
      status = check_header(filename) or status

Or, maybe more Pythonic would be to throw an exception here and catch it
at the top-level, like (untested):

  for filename in sys.argv[1:]:
      try:
          check_header(filename)
      except CheckHeaderError as e:
          print(f'{filename}: e', file=sys.stderr)
	  status = 1

> +def headers(dirname):
> +    return glob.iglob(dirname + "/*.h")

This function appears to be unused.

> +
> +
> +def skip_comments_and_blanks(ndx, contents):
> +    while ndx < len(contents) and contents[ndx].startswith("/*"):
> +        while ndx < len(contents):
> +            ndx += 1
> +            if contents[ndx - 1].endswith("*/\n"):
> +                break
> +        # Skip blank lines.
> +        while ndx < len(contents):
> +            if contents[ndx].strip() != "":
> +                break
> +            ndx += 1
> +    return ndx
> +
> +
> +def write_header(filename, contents):
> +    with open(filename, "w") as f:
> +        f.writelines(contents)
> +
> +
> +def check_header(filename):
> +    # Turn x/y-z.h into X_Y_Z_H.
> +    assert filename.endswith(".h")
> +    expected = filename.replace("-", "_")
> +    expected = expected.replace(".", "_")
> +    expected = expected.replace("/", "_")
> +    expected = expected.upper()
> +    with open(filename) as f:
> +        contents = list(f)

You could do:

  contents = [line.strip('\n') for line in f]

and then no longer have to deal with \n everywhere.

> +    if "THIS FILE IS GENERATED" in contents[0]:
> +        # Ignore.
> +        return
> +    if not contents[0].startswith("/*"):
> +        failure(filename, 0, "header should start with comment")
> +        return
> +    i = skip_comments_and_blanks(0, contents)
> +    if i == len(contents):
> +        failure(filename, i, "unterminated intro comment or missing body")
> +        return
> +    m = DEF.match(contents[i])
> +    force_rewrite = False
> +    if not m:
> +        m = OLDDEF.match(contents[i])
> +        if not m:
> +            failure(filename, i, "no header guard")
> +            return
> +        force_rewrite = True
> +    symbol = m.group(1)
> +    updated = False
> +    if symbol != expected:
> +        failure(filename, i, "symbol should be: " + expected)
> +        force_rewrite = True
> +    if force_rewrite:
> +        contents[i] = "#ifndef " + expected + "\n"

You don't need to add a \n here (and at other spots where you write a
line), writelines will do it.

> +        updated = True
> +    i += 1
> +    if i == len(contents):
> +        failure(filename, i, "premature EOF")
> +        return
> +    if not contents[i].startswith("#define "):
> +        failure(filename, i, "no define of header guard")
> +        return
> +    if contents[i] != "#define " + expected + "\n":
> +        failure(filename, i, "wrong symbol name in define")
> +        contents[i] = "#define " + expected + "\n"
> +        updated = True
> +    i = len(contents) - 1
> +    if not contents[i].startswith("#endif"):
> +        failure(filename, i, "no trailing endif")
> +        return
> +    if contents[i] != "#endif /* " + expected + " */\n":
> +        failure(filename, i, "incorrect endif")
> +        contents[i] = "#endif /* " + expected + " */\n"
> +        updated = True

You could access the last line with contents[-1].

Simon


From 146706bc251fd7b357cbeea391a610ef46cd0b33 Mon Sep 17 00:00:00 2001
From: Simon Marchi <simon.marchi@polymtl.ca>
Date: Thu, 18 Apr 2024 11:29:11 -0400
Subject: [PATCH] type hints

Change-Id: I5551962a5006352538f5af00fdf0afa08df8b638
---
 gdb/check-include-guards.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gdb/check-include-guards.py b/gdb/check-include-guards.py
index 5c61fc90bb0a..e37e80f5b7e6 100755
--- a/gdb/check-include-guards.py
+++ b/gdb/check-include-guards.py
@@ -32,17 +32,17 @@ DEF = re.compile("^#ifndef ([A-Za-z0-9_]+)\n")
 OLDDEF = re.compile("^#if !defined *\\(([A-Za-z0-9_]+)\\)\n")
 
 
-def failure(filename, ndx, text):
+def failure(filename: str, ndx: int, text: str):
     print(filename + ":" + str(ndx + 1) + ": " + text)
     global status
     status = 1
 
 
-def headers(dirname):
+def headers(dirname: str):
     return glob.iglob(dirname + "/*.h")
 
 
-def skip_comments_and_blanks(ndx, contents):
+def skip_comments_and_blanks(ndx: int, contents: list[str]):
     while ndx < len(contents) and contents[ndx].startswith("/*"):
         while ndx < len(contents):
             ndx += 1
@@ -56,12 +56,12 @@ def skip_comments_and_blanks(ndx, contents):
     return ndx
 
 
-def write_header(filename, contents):
+def write_header(filename: str, contents: list[str]):
     with open(filename, "w") as f:
         f.writelines(contents)
 
 
-def check_header(filename):
+def check_header(filename: str):
     # Turn x/y-z.h into X_Y_Z_H.
     assert filename.endswith(".h")
     expected = filename.replace("-", "_")

base-commit: f474980ac0d0a449f4adfd032d85ac62a634aa26
  
Tom Tromey April 18, 2024, 3:31 p.m. UTC | #3
Tom> The only thing I ran into was that the corner-case of an empty file:

Thanks, I fixed this.

Also I found out that if a pre-commit hook modifies a file, pre-commit
will still DTRT (fail), so error messages aren't needed in this case.
I've updated the script to do this.

Tom
  
Tom Tromey April 18, 2024, 3:40 p.m. UTC | #4
>>>>> "Simon" == Simon Marchi <simark@simark.ca> writes:

>> +def failure(filename, ndx, text):
>> +    print(filename + ":" + str(ndx + 1) + ": " + text)

Simon> Should this print to stderr?

Done.

>> +    global status
>> +    status = 1

Simon> I would prefer if failure returned 1 (or the callers could return 1
Simon> themselves).

I ended up having it just call sys.exit (which really just throws).
The approach in the patch was from an earlier version that was run by
hand, before I figured out it could be run from pre-commit; and there
working on multiple files was more important.

>> +def headers(dirname):
>> +    return glob.iglob(dirname + "/*.h")

Simon> This function appears to be unused.

Removed.

Simon> You could do:
Simon>   contents = [line.strip('\n') for line in f]
Simon> and then no longer have to deal with \n everywhere.

>> +    if force_rewrite:
>> +        contents[i] = "#ifndef " + expected + "\n"

Simon> You don't need to add a \n here (and at other spots where you write a
Simon> line), writelines will do it.

The Python docs seem to say otherwise:

https://docs.python.org/3/library/io.html#io.IOBase.writelines

I left all the \n stuff alone.

>> +    if contents[i] != "#endif /* " + expected + " */\n":
>> +        failure(filename, i, "incorrect endif")
>> +        contents[i] = "#endif /* " + expected + " */\n"
>> +        updated = True

Simon> You could access the last line with contents[-1].

I figured we needed the line number for a possible error message anyway.

Tom
  
Simon Marchi April 18, 2024, 3:42 p.m. UTC | #5
On 2024-04-18 11:40, Tom Tromey wrote:
>>>>>> "Simon" == Simon Marchi <simark@simark.ca> writes:
> 
>>> +def failure(filename, ndx, text):
>>> +    print(filename + ":" + str(ndx + 1) + ": " + text)
> 
> Simon> Should this print to stderr?
> 
> Done.
> 
>>> +    global status
>>> +    status = 1
> 
> Simon> I would prefer if failure returned 1 (or the callers could return 1
> Simon> themselves).
> 
> I ended up having it just call sys.exit (which really just throws).
> The approach in the patch was from an earlier version that was run by
> hand, before I figured out it could be run from pre-commit; and there
> working on multiple files was more important.
> 
>>> +def headers(dirname):
>>> +    return glob.iglob(dirname + "/*.h")
> 
> Simon> This function appears to be unused.
> 
> Removed.
> 
> Simon> You could do:
> Simon>   contents = [line.strip('\n') for line in f]
> Simon> and then no longer have to deal with \n everywhere.
> 
>>> +    if force_rewrite:
>>> +        contents[i] = "#ifndef " + expected + "\n"
> 
> Simon> You don't need to add a \n here (and at other spots where you write a
> Simon> line), writelines will do it.
> 
> The Python docs seem to say otherwise:
> 
> https://docs.python.org/3/library/io.html#io.IOBase.writelines

Oops lol, I read that exact text, but too fast, and read 'Line
separators are added'.  Sorry about that.

Simon
  

Patch

diff --git a/gdb/check-include-guards.py b/gdb/check-include-guards.py
new file mode 100755
index 00000000000..5c61fc90bb0
--- /dev/null
+++ b/gdb/check-include-guards.py
@@ -0,0 +1,125 @@ 
+#!/usr/bin/env python3
+
+# Copyright (C) 2024 Free Software Foundation, Inc.
+#
+# This file is part of GDB.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This is intended to be run from pre-commit.  You can also run it by
+# hand by passing repository-relative filenames to it, like:
+#   ./gdb/check-include-guards.py gdb/*.h
+
+
+import glob
+import re
+import sys
+
+status = 0
+
+DEF = re.compile("^#ifndef ([A-Za-z0-9_]+)\n")
+OLDDEF = re.compile("^#if !defined *\\(([A-Za-z0-9_]+)\\)\n")
+
+
+def failure(filename, ndx, text):
+    print(filename + ":" + str(ndx + 1) + ": " + text)
+    global status
+    status = 1
+
+
+def headers(dirname):
+    return glob.iglob(dirname + "/*.h")
+
+
+def skip_comments_and_blanks(ndx, contents):
+    while ndx < len(contents) and contents[ndx].startswith("/*"):
+        while ndx < len(contents):
+            ndx += 1
+            if contents[ndx - 1].endswith("*/\n"):
+                break
+        # Skip blank lines.
+        while ndx < len(contents):
+            if contents[ndx].strip() != "":
+                break
+            ndx += 1
+    return ndx
+
+
+def write_header(filename, contents):
+    with open(filename, "w") as f:
+        f.writelines(contents)
+
+
+def check_header(filename):
+    # Turn x/y-z.h into X_Y_Z_H.
+    assert filename.endswith(".h")
+    expected = filename.replace("-", "_")
+    expected = expected.replace(".", "_")
+    expected = expected.replace("/", "_")
+    expected = expected.upper()
+    with open(filename) as f:
+        contents = list(f)
+    if "THIS FILE IS GENERATED" in contents[0]:
+        # Ignore.
+        return
+    if not contents[0].startswith("/*"):
+        failure(filename, 0, "header should start with comment")
+        return
+    i = skip_comments_and_blanks(0, contents)
+    if i == len(contents):
+        failure(filename, i, "unterminated intro comment or missing body")
+        return
+    m = DEF.match(contents[i])
+    force_rewrite = False
+    if not m:
+        m = OLDDEF.match(contents[i])
+        if not m:
+            failure(filename, i, "no header guard")
+            return
+        force_rewrite = True
+    symbol = m.group(1)
+    updated = False
+    if symbol != expected:
+        failure(filename, i, "symbol should be: " + expected)
+        force_rewrite = True
+    if force_rewrite:
+        contents[i] = "#ifndef " + expected + "\n"
+        updated = True
+    i += 1
+    if i == len(contents):
+        failure(filename, i, "premature EOF")
+        return
+    if not contents[i].startswith("#define "):
+        failure(filename, i, "no define of header guard")
+        return
+    if contents[i] != "#define " + expected + "\n":
+        failure(filename, i, "wrong symbol name in define")
+        contents[i] = "#define " + expected + "\n"
+        updated = True
+    i = len(contents) - 1
+    if not contents[i].startswith("#endif"):
+        failure(filename, i, "no trailing endif")
+        return
+    if contents[i] != "#endif /* " + expected + " */\n":
+        failure(filename, i, "incorrect endif")
+        contents[i] = "#endif /* " + expected + " */\n"
+        updated = True
+    if updated:
+        write_header(filename, contents)
+
+
+for filename in sys.argv[1:]:
+    check_header(filename)
+
+sys.exit(status)