[v2,2/3] scripts: Enhance glibcpp to do basic macro processing

Message ID 52eced32a56ad0a727b74058985aca32523c68e3.1663167514.git.fweimer@redhat.com
State Committed
Commit e6e6184bed490403811771fa527eb95b4ae53c7c
Delegated to: Siddhesh Poyarekar
Headers
Series Parse <elf.h> in the glibcelf Python module |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer Sept. 14, 2022, 3 p.m. UTC
  ---
 scripts/glibcpp.py     | 317 +++++++++++++++++++++++++++++++++++++++++
 support/Makefile       |  10 +-
 support/tst-glibcpp.py | 217 ++++++++++++++++++++++++++++
 3 files changed, 542 insertions(+), 2 deletions(-)
 create mode 100644 support/tst-glibcpp.py
  

Comments

Siddhesh Poyarekar Sept. 21, 2022, 5:08 p.m. UTC | #1
On 2022-09-14 11:00, Florian Weimer via Libc-alpha wrote:
> ---
>   scripts/glibcpp.py     | 317 +++++++++++++++++++++++++++++++++++++++++
>   support/Makefile       |  10 +-
>   support/tst-glibcpp.py | 217 ++++++++++++++++++++++++++++
>   3 files changed, 542 insertions(+), 2 deletions(-)
>   create mode 100644 support/tst-glibcpp.py

LGTM.

Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>

> 
> diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py
> index b44c6a4392..455459a609 100644
> --- a/scripts/glibcpp.py
> +++ b/scripts/glibcpp.py
> @@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings.
>   """
>   
>   import collections
> +import operator
>   import re
> +import sys
>   
>   # Caution: The order of the outermost alternation matters.
>   # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
> @@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter):
>   
>           yield tok
>           pos = mo.end()
> +
> +class MacroDefinition(collections.namedtuple('MacroDefinition',
> +                                             'name_token args body error')):
> +    """A preprocessor macro definition.
> +
> +    name_token is the Token_ for the name.
> +
> +    args is None for a macro that is not function-like.  Otherwise, it
> +    is a tuple that contains the macro argument name tokens.
> +
> +    body is a tuple that contains the tokens that constitue the body
> +    of the macro definition (excluding whitespace).
> +
> +    error is None if no error was detected, or otherwise a problem
> +    description associated with this macro definition.
> +
> +    """
> +
> +    @property
> +    def function(self):
> +        """Return true if the macro is function-like."""
> +        return self.args is not None
> +
> +    @property
> +    def name(self):
> +        """Return the name of the macro being defined."""
> +        return self.name_token.text
> +
> +    @property
> +    def line(self):
> +        """Return the line number of the macro defintion."""
> +        return self.name_token.line
> +
> +    @property
> +    def args_lowered(self):
> +        """Return the macro argument list as a list of strings"""
> +        if self.function:
> +            return [token.text for token in self.args]
> +        else:
> +            return None
> +
> +    @property
> +    def body_lowered(self):
> +        """Return the macro body as a list of strings."""
> +        return [token.text for token in self.body]
> +
> +def macro_definitions(tokens):
> +    """A generator for C macro definitions among tokens.
> +
> +    The generator yields MacroDefinition objects.
> +
> +    tokens must be iterable, yielding Token_ objects.
> +
> +    """
> +
> +    macro_name = None
> +    macro_start = False # Set to false after macro name and one otken.
> +    macro_args = None # Set to a list during the macro argument sequence.
> +    in_macro_args = False # True while processing macro identifier-list.
> +    error = None
> +    body = []
> +
> +    for token in tokens:
> +        if token.context == 'define' and macro_name is None \
> +           and token.kind == 'IDENT':
> +            # Starting up macro processing.
> +            if macro_start:
> +                # First identifier is the macro name.
> +                macro_name = token
> +            else:
> +                # Next token is the name.
> +                macro_start = True
> +            continue
> +
> +        if macro_name is None:
> +            # Drop tokens not in macro definitions.
> +            continue
> +
> +        if token.context != 'define':
> +            # End of the macro definition.
> +            if in_macro_args and error is None:
> +                error = 'macro definition ends in macro argument list'
> +            yield MacroDefinition(macro_name, macro_args, tuple(body), error)
> +            # No longer in a macro definition.
> +            macro_name = None
> +            macro_start = False
> +            macro_args = None
> +            in_macro_args = False
> +            error = None
> +            body.clear()
> +            continue
> +
> +        if macro_start:
> +            # First token after the macro name.
> +            macro_start = False
> +            if token.kind == 'PUNCTUATOR' and token.text == '(':
> +                macro_args = []
> +                in_macro_args = True
> +            continue
> +
> +        if in_macro_args:
> +            if token.kind == 'IDENT' \
> +               or (token.kind == 'PUNCTUATOR' and token.text == '...'):
> +                # Macro argument or ... placeholder.
> +                macro_args.append(token)
> +            if token.kind == 'PUNCTUATOR':
> +                if token.text == ')':
> +                    macro_args = tuple(macro_args)
> +                    in_macro_args = False
> +                elif token.text == ',':
> +                    pass # Skip.  Not a full syntax check.
> +                elif error is None:
> +                    error = 'invalid punctuator in macro argument list: ' \
> +                        + repr(token.text)
> +            elif error is None:
> +                error = 'invalid {} token in macro argument list'.format(
> +                    token.kind)
> +            continue
> +
> +        if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
> +            body.append(token)
> +
> +    # Emit the macro in case the last line does not end with a newline.
> +    if macro_name is not None:
> +        if in_macro_args and error is None:
> +            error = 'macro definition ends in macro argument list'
> +        yield MacroDefinition(macro_name, macro_args, tuple(body), error)
> +
> +# Used to split UL etc. suffixes from numbers such as 123UL.
> +RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
> +
> +BINARY_OPERATORS = {
> +    '+': operator.add,
> +    '<<': operator.lshift,
> +}
> +
> +# Use the general-purpose dict type if it is order-preserving.
> +if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
> +    OrderedDict = collections.OrderedDict
> +else:
> +    OrderedDict = dict
> +
> +def macro_eval(macro_defs, reporter):
> +    """Compute macro values
> +
> +    macro_defs is the output from macro_definitions.  reporter is an
> +    object that accepts reporter.error(line_number, message) and
> +    reporter.note(line_number, message) calls to report errors
> +    and error context invocations.
> +
> +    The returned dict contains the values of macros which are not
> +    function-like, pairing their names with their computed values.
> +
> +    The current implementation is incomplete.  It is deliberately not
> +    entirely faithful to C, even in the implemented parts.  It checks
> +    that macro replacements follow certain syntactic rules even if
> +    they are never evaluated.
> +
> +    """
> +
> +    # Unevaluated macro definitions by name.
> +    definitions = OrderedDict()
> +    for md in macro_defs:
> +        if md.name in definitions:
> +            reporter.error(md.line, 'macro {} redefined'.format(md.name))
> +            reporter.note(definitions[md.name].line,
> +                          'location of previous definition')
> +        else:
> +            definitions[md.name] = md
> +
> +    # String to value mappings for fully evaluated macros.
> +    evaluated = OrderedDict()
> +
> +    # String to macro definitions during evaluation.  Nice error
> +    # reporting relies on determinstic iteration order.
> +    stack = OrderedDict()
> +
> +    def eval_token(current, token):
> +        """Evaluate one macro token.
> +
> +        Integers and strings are returned as such (the latter still
> +        quoted).  Identifiers are expanded.
> +
> +        None indicates an empty expansion or an error.
> +
> +        """
> +
> +        if token.kind == 'PP_NUMBER':
> +            value = None
> +            m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
> +            if m:
> +                try:
> +                    value = int(m.group(1), 0)
> +                except ValueError:
> +                    pass
> +            if value is None:
> +                reporter.error(token.line,
> +                    'invalid number {!r} in definition of {}'.format(
> +                        token.text, current.name))
> +            return value
> +
> +        if token.kind == 'STRING':
> +            return token.text
> +
> +        if token.kind == 'CHARCONST' and len(token.text) == 3:
> +            return ord(token.text[1])
> +
> +        if token.kind == 'IDENT':
> +            name = token.text
> +            result = eval1(current, name)
> +            if name not in evaluated:
> +                evaluated[name] = result
> +            return result
> +
> +        reporter.error(token.line,
> +            'unrecognized {!r} in definition of {}'.format(
> +                token.text, current.name))
> +        return None
> +
> +
> +    def eval1(current, name):
> +        """Evaluate one name.
> +
> +        The name is looked up and the macro definition evaluated
> +        recursively if necessary.  The current argument is the macro
> +        definition being evaluated.
> +
> +        None as a return value indicates an error.
> +
> +        """
> +
> +        # Fast path if the value has already been evaluated.
> +        if name in evaluated:
> +            return evaluated[name]
> +
> +        try:
> +            md = definitions[name]
> +        except KeyError:
> +            reporter.error(current.line,
> +                'reference to undefined identifier {} in definition of {}'
> +                           .format(name, current.name))
> +            return None
> +
> +        if md.name in stack:
> +            # Recursive macro definition.
> +            md = stack[name]
> +            reporter.error(md.line,
> +                'macro definition {} refers to itself'.format(md.name))
> +            for md1 in reversed(list(stack.values())):
> +                if md1 is md:
> +                    break
> +                reporter.note(md1.line,
> +                              'evaluated from {}'.format(md1.name))
> +            return None
> +
> +        stack[md.name] = md
> +        if md.function:
> +            reporter.error(current.line,
> +                'attempt to evaluate function-like macro {}'.format(name))
> +            reporter.note(md.line, 'definition of {}'.format(md.name))
> +            return None
> +
> +        try:
> +            body = md.body
> +            if len(body) == 0:
> +                # Empty expansion.
> +                return None
> +
> +            # Remove surrounding ().
> +            if body[0].text == '(' and body[-1].text == ')':
> +                body = body[1:-1]
> +                had_parens = True
> +            else:
> +                had_parens = False
> +
> +            if len(body) == 1:
> +                return eval_token(md, body[0])
> +
> +            # Minimal expression evaluator for binary operators.
> +            op = body[1].text
> +            if len(body) == 3 and op in BINARY_OPERATORS:
> +                if not had_parens:
> +                    reporter.error(body[1].line,
> +                        'missing parentheses around {} expression'.format(op))
> +                    reporter.note(md.line,
> +                                  'in definition of macro {}'.format(md.name))
> +
> +                left = eval_token(md, body[0])
> +                right = eval_token(md, body[2])
> +
> +                if type(left) != type(1):
> +                    reporter.error(left.line,
> +                        'left operand of {} is not an integer'.format(op))
> +                    reporter.note(md.line,
> +                                  'in definition of macro {}'.format(md.name))
> +                if type(right) != type(1):
> +                    reporter.error(left.line,
> +                        'right operand of {} is not an integer'.format(op))
> +                    reporter.note(md.line,
> +                                  'in definition of macro {}'.format(md.name))
> +                return BINARY_OPERATORS[op](left, right)
> +
> +            reporter.error(md.line,
> +                'uninterpretable macro token sequence: {}'.format(
> +                    ' '.join(md.body_lowered)))
> +            return None
> +        finally:
> +            del stack[md.name]
> +
> +    # Start of main body of macro_eval.
> +    for md in definitions.values():
> +        name = md.name
> +        if name not in evaluated and not md.function:
> +            evaluated[name] = eval1(md, name)
> +    return evaluated
> diff --git a/support/Makefile b/support/Makefile
> index 9b50eac117..551d02941f 100644
> --- a/support/Makefile
> +++ b/support/Makefile
> @@ -274,12 +274,12 @@ $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o
>   tests = \
>     README-testing \
>     tst-support-namespace \
> +  tst-support-open-dev-null-range \
> +  tst-support-process_state \
>     tst-support_blob_repeat \
>     tst-support_capture_subprocess \
>     tst-support_descriptors \
>     tst-support_format_dns_packet \
> -  tst-support-open-dev-null-range \
> -  tst-support-process_state \
>     tst-support_quote_blob \
>     tst-support_quote_blob_wide \
>     tst-support_quote_string \
> @@ -304,6 +304,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
>   	$(evaluate-test)
>   endif
>   
> +tests-special += $(objpfx)tst-glibcpp.out
> +
> +$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py
> +	PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \
> +	$(evaluate-test)
> +
>   $(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
>   
>   tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd)
> diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py
> new file mode 100644
> index 0000000000..a2db1916cc
> --- /dev/null
> +++ b/support/tst-glibcpp.py
> @@ -0,0 +1,217 @@
> +#! /usr/bin/python3
> +# Tests for scripts/glibcpp.py
> +# Copyright (C) 2022 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <https://www.gnu.org/licenses/>.
> +
> +import inspect
> +import sys
> +
> +import glibcpp
> +
> +# Error counter.
> +errors = 0
> +
> +class TokenizerErrors:
> +    """Used as the error reporter during tokenization."""
> +
> +    def __init__(self):
> +        self.errors = []
> +
> +    def error(self, token, message):
> +        self.errors.append((token, message))
> +
> +def check_macro_definitions(source, expected):
> +    reporter = TokenizerErrors()
> +    tokens = glibcpp.tokenize_c(source, reporter)
> +
> +    actual = []
> +    for md in glibcpp.macro_definitions(tokens):
> +        if md.function:
> +            md_name = '{}({})'.format(md.name, ','.join(md.args_lowered))
> +        else:
> +            md_name = md.name
> +        actual.append((md_name, md.body_lowered))
> +
> +    if actual != expected or reporter.errors:
> +        global errors
> +        errors += 1
> +        # Obtain python source line information.
> +        frame = inspect.stack(2)[1]
> +        print('{}:{}: error: macro definition mismatch, actual definitions:'
> +              .format(frame[1], frame[2]))
> +        for md in actual:
> +            print('note: {} {!r}'.format(md[0], md[1]))
> +
> +        if reporter.errors:
> +            for err in reporter.errors:
> +                print('note: tokenizer error: {}: {}'.format(
> +                    err[0].line, err[1]))
> +
> +def check_macro_eval(source, expected, expected_errors=''):
> +    reporter = TokenizerErrors()
> +    tokens = list(glibcpp.tokenize_c(source, reporter))
> +
> +    if reporter.errors:
> +        # Obtain python source line information.
> +        frame = inspect.stack(2)[1]
> +        for err in reporter.errors:
> +            print('{}:{}: tokenizer error: {}: {}'.format(
> +                frame[1], frame[2], err[0].line, err[1]))
> +        return
> +
> +    class EvalReporter:
> +        """Used as the error reporter during evaluation."""
> +
> +        def __init__(self):
> +            self.lines = []
> +
> +        def error(self, line, message):
> +            self.lines.append('{}: error: {}\n'.format(line, message))
> +
> +        def note(self, line, message):
> +            self.lines.append('{}: note: {}\n'.format(line, message))
> +
> +    reporter = EvalReporter()
> +    actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
> +    actual_errors = ''.join(reporter.lines)
> +    if actual != expected or actual_errors != expected_errors:
> +        global errors
> +        errors += 1
> +        # Obtain python source line information.
> +        frame = inspect.stack(2)[1]
> +        print('{}:{}: error: macro evaluation mismatch, actual results:'
> +              .format(frame[1], frame[2]))
> +        for k, v in actual.items():
> +            print('  {}: {!r}'.format(k, v))
> +        for msg in reporter.lines:
> +            sys.stdout.write('  | ' + msg)
> +
> +# Individual test cases follow.
> +
> +check_macro_definitions('', [])
> +check_macro_definitions('int main()\n{\n{\n', [])
> +check_macro_definitions("""
> +#define A 1
> +#define B 2 /* ignored */
> +#define C 3 // also ignored
> +#define D \
> + 4
> +#define STRING "string"
> +#define FUNCLIKE(a, b) (a + b)
> +#define FUNCLIKE2(a, b) (a + \
> + b)
> +""", [('A', ['1']),
> +      ('B', ['2']),
> +      ('C', ['3']),
> +      ('D', ['4']),
> +      ('STRING', ['"string"']),
> +      ('FUNCLIKE(a,b)', list('(a+b)')),
> +      ('FUNCLIKE2(a,b)', list('(a+b)')),
> +      ])
> +check_macro_definitions('#define MACRO', [('MACRO', [])])
> +check_macro_definitions('#define MACRO\n', [('MACRO', [])])
> +check_macro_definitions('#define MACRO()', [('MACRO()', [])])
> +check_macro_definitions('#define MACRO()\n', [('MACRO()', [])])
> +
> +check_macro_eval('#define A 1', {'A': 1})
> +check_macro_eval('#define A (1)', {'A': 1})
> +check_macro_eval('#define A (1 + 1)', {'A': 2})
> +check_macro_eval('#define A (1U << 31)', {'A': 1 << 31})
> +check_macro_eval('''\
> +#define A (B + 1)
> +#define B 10
> +#define F(x) ignored
> +#define C "not ignored"
> +''', {
> +    'A': 11,
> +    'B': 10,
> +    'C': '"not ignored"',
> +})
> +
> +# Checking for evaluation errors.
> +check_macro_eval('''\
> +#define A 1
> +#define A 2
> +''', {
> +    'A': 1,
> +}, '''\
> +2: error: macro A redefined
> +1: note: location of previous definition
> +''')
> +
> +check_macro_eval('''\
> +#define A A
> +#define B 1
> +''', {
> +    'A': None,
> +    'B': 1,
> +}, '''\
> +1: error: macro definition A refers to itself
> +''')
> +
> +check_macro_eval('''\
> +#define A B
> +#define B A
> +''', {
> +    'A': None,
> +    'B': None,
> +}, '''\
> +1: error: macro definition A refers to itself
> +2: note: evaluated from B
> +''')
> +
> +check_macro_eval('''\
> +#define A B
> +#define B C
> +#define C A
> +''', {
> +    'A': None,
> +    'B': None,
> +    'C': None,
> +}, '''\
> +1: error: macro definition A refers to itself
> +3: note: evaluated from C
> +2: note: evaluated from B
> +''')
> +
> +check_macro_eval('''\
> +#define A 1 +
> +''', {
> +    'A': None,
> +}, '''\
> +1: error: uninterpretable macro token sequence: 1 +
> +''')
> +
> +check_macro_eval('''\
> +#define A 3*5
> +''', {
> +    'A': None,
> +}, '''\
> +1: error: uninterpretable macro token sequence: 3 * 5
> +''')
> +
> +check_macro_eval('''\
> +#define A 3 + 5
> +''', {
> +    'A': 8,
> +}, '''\
> +1: error: missing parentheses around + expression
> +1: note: in definition of macro A
> +''')
> +
> +if errors:
> +    sys.exit(1)
  

Patch

diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py
index b44c6a4392..455459a609 100644
--- a/scripts/glibcpp.py
+++ b/scripts/glibcpp.py
@@ -33,7 +33,9 @@  Accepts non-ASCII characters only within comments and strings.
 """
 
 import collections
+import operator
 import re
+import sys
 
 # Caution: The order of the outermost alternation matters.
 # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
@@ -210,3 +212,318 @@  def tokenize_c(file_contents, reporter):
 
         yield tok
         pos = mo.end()
+
+class MacroDefinition(collections.namedtuple('MacroDefinition',
+                                             'name_token args body error')):
+    """A preprocessor macro definition.
+
+    name_token is the Token_ for the name.
+
+    args is None for a macro that is not function-like.  Otherwise, it
+    is a tuple that contains the macro argument name tokens.
+
+    body is a tuple that contains the tokens that constitue the body
+    of the macro definition (excluding whitespace).
+
+    error is None if no error was detected, or otherwise a problem
+    description associated with this macro definition.
+
+    """
+
+    @property
+    def function(self):
+        """Return true if the macro is function-like."""
+        return self.args is not None
+
+    @property
+    def name(self):
+        """Return the name of the macro being defined."""
+        return self.name_token.text
+
+    @property
+    def line(self):
+        """Return the line number of the macro defintion."""
+        return self.name_token.line
+
+    @property
+    def args_lowered(self):
+        """Return the macro argument list as a list of strings"""
+        if self.function:
+            return [token.text for token in self.args]
+        else:
+            return None
+
+    @property
+    def body_lowered(self):
+        """Return the macro body as a list of strings."""
+        return [token.text for token in self.body]
+
+def macro_definitions(tokens):
+    """A generator for C macro definitions among tokens.
+
+    The generator yields MacroDefinition objects.
+
+    tokens must be iterable, yielding Token_ objects.
+
+    """
+
+    macro_name = None
+    macro_start = False # Set to false after macro name and one otken.
+    macro_args = None # Set to a list during the macro argument sequence.
+    in_macro_args = False # True while processing macro identifier-list.
+    error = None
+    body = []
+
+    for token in tokens:
+        if token.context == 'define' and macro_name is None \
+           and token.kind == 'IDENT':
+            # Starting up macro processing.
+            if macro_start:
+                # First identifier is the macro name.
+                macro_name = token
+            else:
+                # Next token is the name.
+                macro_start = True
+            continue
+
+        if macro_name is None:
+            # Drop tokens not in macro definitions.
+            continue
+
+        if token.context != 'define':
+            # End of the macro definition.
+            if in_macro_args and error is None:
+                error = 'macro definition ends in macro argument list'
+            yield MacroDefinition(macro_name, macro_args, tuple(body), error)
+            # No longer in a macro definition.
+            macro_name = None
+            macro_start = False
+            macro_args = None
+            in_macro_args = False
+            error = None
+            body.clear()
+            continue
+
+        if macro_start:
+            # First token after the macro name.
+            macro_start = False
+            if token.kind == 'PUNCTUATOR' and token.text == '(':
+                macro_args = []
+                in_macro_args = True
+            continue
+
+        if in_macro_args:
+            if token.kind == 'IDENT' \
+               or (token.kind == 'PUNCTUATOR' and token.text == '...'):
+                # Macro argument or ... placeholder.
+                macro_args.append(token)
+            if token.kind == 'PUNCTUATOR':
+                if token.text == ')':
+                    macro_args = tuple(macro_args)
+                    in_macro_args = False
+                elif token.text == ',':
+                    pass # Skip.  Not a full syntax check.
+                elif error is None:
+                    error = 'invalid punctuator in macro argument list: ' \
+                        + repr(token.text)
+            elif error is None:
+                error = 'invalid {} token in macro argument list'.format(
+                    token.kind)
+            continue
+
+        if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
+            body.append(token)
+
+    # Emit the macro in case the last line does not end with a newline.
+    if macro_name is not None:
+        if in_macro_args and error is None:
+            error = 'macro definition ends in macro argument list'
+        yield MacroDefinition(macro_name, macro_args, tuple(body), error)
+
+# Used to split UL etc. suffixes from numbers such as 123UL.
+RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
+
+BINARY_OPERATORS = {
+    '+': operator.add,
+    '<<': operator.lshift,
+}
+
+# Use the general-purpose dict type if it is order-preserving.
+if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
+    OrderedDict = collections.OrderedDict
+else:
+    OrderedDict = dict
+
+def macro_eval(macro_defs, reporter):
+    """Compute macro values
+
+    macro_defs is the output from macro_definitions.  reporter is an
+    object that accepts reporter.error(line_number, message) and
+    reporter.note(line_number, message) calls to report errors
+    and error context invocations.
+
+    The returned dict contains the values of macros which are not
+    function-like, pairing their names with their computed values.
+
+    The current implementation is incomplete.  It is deliberately not
+    entirely faithful to C, even in the implemented parts.  It checks
+    that macro replacements follow certain syntactic rules even if
+    they are never evaluated.
+
+    """
+
+    # Unevaluated macro definitions by name.
+    definitions = OrderedDict()
+    for md in macro_defs:
+        if md.name in definitions:
+            reporter.error(md.line, 'macro {} redefined'.format(md.name))
+            reporter.note(definitions[md.name].line,
+                          'location of previous definition')
+        else:
+            definitions[md.name] = md
+
+    # String to value mappings for fully evaluated macros.
+    evaluated = OrderedDict()
+
+    # String to macro definitions during evaluation.  Nice error
+    # reporting relies on determinstic iteration order.
+    stack = OrderedDict()
+
+    def eval_token(current, token):
+        """Evaluate one macro token.
+
+        Integers and strings are returned as such (the latter still
+        quoted).  Identifiers are expanded.
+
+        None indicates an empty expansion or an error.
+
+        """
+
+        if token.kind == 'PP_NUMBER':
+            value = None
+            m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
+            if m:
+                try:
+                    value = int(m.group(1), 0)
+                except ValueError:
+                    pass
+            if value is None:
+                reporter.error(token.line,
+                    'invalid number {!r} in definition of {}'.format(
+                        token.text, current.name))
+            return value
+
+        if token.kind == 'STRING':
+            return token.text
+
+        if token.kind == 'CHARCONST' and len(token.text) == 3:
+            return ord(token.text[1])
+
+        if token.kind == 'IDENT':
+            name = token.text
+            result = eval1(current, name)
+            if name not in evaluated:
+                evaluated[name] = result
+            return result
+
+        reporter.error(token.line,
+            'unrecognized {!r} in definition of {}'.format(
+                token.text, current.name))
+        return None
+
+
+    def eval1(current, name):
+        """Evaluate one name.
+
+        The name is looked up and the macro definition evaluated
+        recursively if necessary.  The current argument is the macro
+        definition being evaluated.
+
+        None as a return value indicates an error.
+
+        """
+
+        # Fast path if the value has already been evaluated.
+        if name in evaluated:
+            return evaluated[name]
+
+        try:
+            md = definitions[name]
+        except KeyError:
+            reporter.error(current.line,
+                'reference to undefined identifier {} in definition of {}'
+                           .format(name, current.name))
+            return None
+
+        if md.name in stack:
+            # Recursive macro definition.
+            md = stack[name]
+            reporter.error(md.line,
+                'macro definition {} refers to itself'.format(md.name))
+            for md1 in reversed(list(stack.values())):
+                if md1 is md:
+                    break
+                reporter.note(md1.line,
+                              'evaluated from {}'.format(md1.name))
+            return None
+
+        stack[md.name] = md
+        if md.function:
+            reporter.error(current.line,
+                'attempt to evaluate function-like macro {}'.format(name))
+            reporter.note(md.line, 'definition of {}'.format(md.name))
+            return None
+
+        try:
+            body = md.body
+            if len(body) == 0:
+                # Empty expansion.
+                return None
+
+            # Remove surrounding ().
+            if body[0].text == '(' and body[-1].text == ')':
+                body = body[1:-1]
+                had_parens = True
+            else:
+                had_parens = False
+
+            if len(body) == 1:
+                return eval_token(md, body[0])
+
+            # Minimal expression evaluator for binary operators.
+            op = body[1].text
+            if len(body) == 3 and op in BINARY_OPERATORS:
+                if not had_parens:
+                    reporter.error(body[1].line,
+                        'missing parentheses around {} expression'.format(op))
+                    reporter.note(md.line,
+                                  'in definition of macro {}'.format(md.name))
+
+                left = eval_token(md, body[0])
+                right = eval_token(md, body[2])
+
+                if type(left) != type(1):
+                    reporter.error(left.line,
+                        'left operand of {} is not an integer'.format(op))
+                    reporter.note(md.line,
+                                  'in definition of macro {}'.format(md.name))
+                if type(right) != type(1):
+                    reporter.error(left.line,
+                        'right operand of {} is not an integer'.format(op))
+                    reporter.note(md.line,
+                                  'in definition of macro {}'.format(md.name))
+                return BINARY_OPERATORS[op](left, right)
+
+            reporter.error(md.line,
+                'uninterpretable macro token sequence: {}'.format(
+                    ' '.join(md.body_lowered)))
+            return None
+        finally:
+            del stack[md.name]
+
+    # Start of main body of macro_eval.
+    for md in definitions.values():
+        name = md.name
+        if name not in evaluated and not md.function:
+            evaluated[name] = eval1(md, name)
+    return evaluated
diff --git a/support/Makefile b/support/Makefile
index 9b50eac117..551d02941f 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -274,12 +274,12 @@  $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o
 tests = \
   README-testing \
   tst-support-namespace \
+  tst-support-open-dev-null-range \
+  tst-support-process_state \
   tst-support_blob_repeat \
   tst-support_capture_subprocess \
   tst-support_descriptors \
   tst-support_format_dns_packet \
-  tst-support-open-dev-null-range \
-  tst-support-process_state \
   tst-support_quote_blob \
   tst-support_quote_blob_wide \
   tst-support_quote_string \
@@ -304,6 +304,12 @@  $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
 	$(evaluate-test)
 endif
 
+tests-special += $(objpfx)tst-glibcpp.out
+
+$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py
+	PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \
+	$(evaluate-test)
+
 $(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
 
 tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd)
diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py
new file mode 100644
index 0000000000..a2db1916cc
--- /dev/null
+++ b/support/tst-glibcpp.py
@@ -0,0 +1,217 @@ 
+#! /usr/bin/python3
+# Tests for scripts/glibcpp.py
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import inspect
+import sys
+
+import glibcpp
+
+# Error counter.
+errors = 0
+
+class TokenizerErrors:
+    """Used as the error reporter during tokenization."""
+
+    def __init__(self):
+        self.errors = []
+
+    def error(self, token, message):
+        self.errors.append((token, message))
+
+def check_macro_definitions(source, expected):
+    reporter = TokenizerErrors()
+    tokens = glibcpp.tokenize_c(source, reporter)
+
+    actual = []
+    for md in glibcpp.macro_definitions(tokens):
+        if md.function:
+            md_name = '{}({})'.format(md.name, ','.join(md.args_lowered))
+        else:
+            md_name = md.name
+        actual.append((md_name, md.body_lowered))
+
+    if actual != expected or reporter.errors:
+        global errors
+        errors += 1
+        # Obtain python source line information.
+        frame = inspect.stack(2)[1]
+        print('{}:{}: error: macro definition mismatch, actual definitions:'
+              .format(frame[1], frame[2]))
+        for md in actual:
+            print('note: {} {!r}'.format(md[0], md[1]))
+
+        if reporter.errors:
+            for err in reporter.errors:
+                print('note: tokenizer error: {}: {}'.format(
+                    err[0].line, err[1]))
+
+def check_macro_eval(source, expected, expected_errors=''):
+    reporter = TokenizerErrors()
+    tokens = list(glibcpp.tokenize_c(source, reporter))
+
+    if reporter.errors:
+        # Obtain python source line information.
+        frame = inspect.stack(2)[1]
+        for err in reporter.errors:
+            print('{}:{}: tokenizer error: {}: {}'.format(
+                frame[1], frame[2], err[0].line, err[1]))
+        return
+
+    class EvalReporter:
+        """Used as the error reporter during evaluation."""
+
+        def __init__(self):
+            self.lines = []
+
+        def error(self, line, message):
+            self.lines.append('{}: error: {}\n'.format(line, message))
+
+        def note(self, line, message):
+            self.lines.append('{}: note: {}\n'.format(line, message))
+
+    reporter = EvalReporter()
+    actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
+    actual_errors = ''.join(reporter.lines)
+    if actual != expected or actual_errors != expected_errors:
+        global errors
+        errors += 1
+        # Obtain python source line information.
+        frame = inspect.stack(2)[1]
+        print('{}:{}: error: macro evaluation mismatch, actual results:'
+              .format(frame[1], frame[2]))
+        for k, v in actual.items():
+            print('  {}: {!r}'.format(k, v))
+        for msg in reporter.lines:
+            sys.stdout.write('  | ' + msg)
+
+# Individual test cases follow.
+
+check_macro_definitions('', [])
+check_macro_definitions('int main()\n{\n{\n', [])
+check_macro_definitions("""
+#define A 1
+#define B 2 /* ignored */
+#define C 3 // also ignored
+#define D \
+ 4
+#define STRING "string"
+#define FUNCLIKE(a, b) (a + b)
+#define FUNCLIKE2(a, b) (a + \
+ b)
+""", [('A', ['1']),
+      ('B', ['2']),
+      ('C', ['3']),
+      ('D', ['4']),
+      ('STRING', ['"string"']),
+      ('FUNCLIKE(a,b)', list('(a+b)')),
+      ('FUNCLIKE2(a,b)', list('(a+b)')),
+      ])
+check_macro_definitions('#define MACRO', [('MACRO', [])])
+check_macro_definitions('#define MACRO\n', [('MACRO', [])])
+check_macro_definitions('#define MACRO()', [('MACRO()', [])])
+check_macro_definitions('#define MACRO()\n', [('MACRO()', [])])
+
+check_macro_eval('#define A 1', {'A': 1})
+check_macro_eval('#define A (1)', {'A': 1})
+check_macro_eval('#define A (1 + 1)', {'A': 2})
+check_macro_eval('#define A (1U << 31)', {'A': 1 << 31})
+check_macro_eval('''\
+#define A (B + 1)
+#define B 10
+#define F(x) ignored
+#define C "not ignored"
+''', {
+    'A': 11,
+    'B': 10,
+    'C': '"not ignored"',
+})
+
+# Checking for evaluation errors.
+check_macro_eval('''\
+#define A 1
+#define A 2
+''', {
+    'A': 1,
+}, '''\
+2: error: macro A redefined
+1: note: location of previous definition
+''')
+
+check_macro_eval('''\
+#define A A
+#define B 1
+''', {
+    'A': None,
+    'B': 1,
+}, '''\
+1: error: macro definition A refers to itself
+''')
+
+check_macro_eval('''\
+#define A B
+#define B A
+''', {
+    'A': None,
+    'B': None,
+}, '''\
+1: error: macro definition A refers to itself
+2: note: evaluated from B
+''')
+
+check_macro_eval('''\
+#define A B
+#define B C
+#define C A
+''', {
+    'A': None,
+    'B': None,
+    'C': None,
+}, '''\
+1: error: macro definition A refers to itself
+3: note: evaluated from C
+2: note: evaluated from B
+''')
+
+check_macro_eval('''\
+#define A 1 +
+''', {
+    'A': None,
+}, '''\
+1: error: uninterpretable macro token sequence: 1 +
+''')
+
+check_macro_eval('''\
+#define A 3*5
+''', {
+    'A': None,
+}, '''\
+1: error: uninterpretable macro token sequence: 3 * 5
+''')
+
+check_macro_eval('''\
+#define A 3 + 5
+''', {
+    'A': 8,
+}, '''\
+1: error: missing parentheses around + expression
+1: note: in definition of macro A
+''')
+
+if errors:
+    sys.exit(1)