[1/2] gdb: updates to gdbarch.py algorithm

Message ID fe3aafc30dc898d865c651b6f5acaa1eab3174f3.1677602918.git.aburgess@redhat.com
State New
Headers
Series Add new gdbarch::displaced_step_max_buffer_length field |

Commit Message

Andrew Burgess Feb. 28, 2023, 4:51 p.m. UTC
  Restructure how gdbarch.py generates the verify_gdbarch function.
Previously the postdefault handling was bundled together with the
validation.  This means that a field can't have both a postdefault,
and set its invalid attribute to a string.

This doesn't seem reasonable to me, I see no reason why a field can't
have both a postdefault (used when the tdep doesn't set the field),
and an invalid expression, which can be used to validate the value
that a tdep might set.

In this commit I restructure the verify_gdbarch generation code to
allow the above, there is no change in the actual generated code in
this commit, that will come in later commit.

I did end up having to remove the "invalid" attribute (where the
attribute was set to True) from a number of fields in this commit.
This invalid attribute was never having an effect as these components
all have a postdefault.  Consider; the "postdefault" is applied if the
field still has its initial value, while an "invalid" attribute set to
True means error if the field still has its default value.  But the
field never will have its default value, it will always have its
postdefault value.
---
 gdb/gdbarch.py            | 31 ++++++++++++++++---------
 gdb/gdbarch_components.py | 49 ++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 43 deletions(-)
  

Comments

Simon Marchi March 1, 2023, 3:09 a.m. UTC | #1
On 2/28/23 11:51, Andrew Burgess via Gdb-patches wrote:
> Restructure how gdbarch.py generates the verify_gdbarch function.
> Previously the postdefault handling was bundled together with the
> validation.  This means that a field can't have both a postdefault,
> and set its invalid attribute to a string.
> 
> This doesn't seem reasonable to me, I see no reason why a field can't
> have both a postdefault (used when the tdep doesn't set the field),
> and an invalid expression, which can be used to validate the value
> that a tdep might set.
> 
> In this commit I restructure the verify_gdbarch generation code to
> allow the above, there is no change in the actual generated code in
> this commit, that will come in later commit.
> 
> I did end up having to remove the "invalid" attribute (where the
> attribute was set to True) from a number of fields in this commit.
> This invalid attribute was never having an effect as these components
> all have a postdefault.  Consider; the "postdefault" is applied if the
> field still has its initial value, while an "invalid" attribute set to
> True means error if the field still has its default value.  But the
> field never will have its default value, it will always have its
> postdefault value.
> ---
>  gdb/gdbarch.py            | 31 ++++++++++++++++---------
>  gdb/gdbarch_components.py | 49 ++++++++++++++-------------------------
>  2 files changed, 37 insertions(+), 43 deletions(-)
> 
> diff --git a/gdb/gdbarch.py b/gdb/gdbarch.py
> index 93b1e8bf84e..7fea41c9572 100755
> --- a/gdb/gdbarch.py
> +++ b/gdb/gdbarch.py
> @@ -203,35 +203,44 @@ with open("gdbarch.c", "w") as f:
>          file=f,
>      )
>      for c in filter(not_info, components):
> -        if c.invalid is False:
> -            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
> -        elif c.predicate:
> -            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
> -        elif isinstance(c.invalid, str) and c.postdefault is not None:
> -            print(f"  if ({c.invalid})", file=f)
> -            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
> -        elif c.predefault is not None and c.postdefault is not None:
> +        # An opportunity to write in the 'postdefault' value.
> +        if c.postdefault is not None and c.predefault is not None:
>              print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>          elif c.postdefault is not None:
>              print(f"  if (gdbarch->{c.name} == 0)", file=f)
>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)

I would find this postdefault snippet easier to read like this, with a
single "if c.postdefault is not None", and then another condition inside
to decide what we should compare against:

        if c.postdefault is not None:
            if c.predefault is not None:
                print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
                print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
            else:
                print(f"  if (gdbarch->{c.name} == 0)", file=f)
                print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)

or even

        if c.postdefault is not None:
            predefault = c.predefault or "0"
            print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)

> +
> +        # Now validate the value.
> +        if c.invalid is False:
> +            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
> +        elif c.predicate:
> +            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
> +        elif c.invalid is None:

I think it's confusing for the "invalid" parameter to be able to be
None, that it's one to many state versus what we need to be able to
represent.  I think we can get by with string, True and False, where
True means "auto", where the validity check is generated if it makes
sense to.  Having one less state would help simplify things.  I hacked
this locally and it seems to work.  I can post this as a cleanup before
or on top of your patch, as you prefer.

Another cleanup that would help me understand what is going on would be
to change this long list of if/elif to something that looks more like a
decision tree.  On top of your patch, and on top of my suggestion to get
rid of the invalid=None state, this is what I made looks like:

        predefault = c.predefault or "0"

        # Now validate the value.
        if type(c.invalid) is str:
            print(f"  if ({c.invalid})", file=f)
            print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
        elif c.invalid:
            if c.predicate:
                print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
            elif c.postdefault:
                # We currently don't print anything, but we could print:
                # print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
                pass
            else:
                print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
                print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
        else:
            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)

That structure is clearer to me.  We see clearly the portions handling
the three states of "invalid" (str, True and False).  Inside invalid ==
True (which really means "auto"), we see that we skip generating the
check when either predicate or postdefault is set, the two situations
where generating the check doesn't make sense.

Another nice thing about this is that there isn't the "unhandled case
when generating gdbarch validation" case at the end.  Each branch of the
decision tree has an outcome.

Again, if you agree with this cleanup, we could do it before or after
your patch, as you wish.

> diff --git a/gdb/gdbarch_components.py b/gdb/gdbarch_components.py
> index caa65c334ec..1d420a513f9 100644
> --- a/gdb/gdbarch_components.py
> +++ b/gdb/gdbarch_components.py
> @@ -63,34 +63,28 @@
>  # * "predefault", "postdefault", and "invalid" - These are used for
>  # the initialization and verification steps:
>  #
> -# A gdbarch is zero-initialized.  Then, if a field has a pre-default,
> -# the field is set to that value.  After initialization is complete
> -# (that is, after the tdep code has a chance to change the settings),
> -# the post-initialization step is done.
> +# A gdbarch is zero-initialized.  Then, if a field has a "predefault",
> +# the field is set to that value.  This becomes the fields initial

Are you missing an apostrophe after fields?

> +# value.
>  #
> -# There is a generic algorithm to generate a "validation function" for
> -# all fields.  If the field has an "invalid" attribute with a string
> -# value, then this string is the expression (note that a string-valued
> -# "invalid" and "predicate" are mutually exclusive; and the case where
> -# invalid is True means to ignore this field and instead use the
> -# default checking that is about to be described).  Otherwise, if
> -# there is a "predefault", then the field is valid if it differs from
> -# the predefault.  Otherwise, the check is done against 0 (really NULL
> -# for function pointers, but same idea).
> -#
> -# In post-initialization / validation, there are several cases.
> +# After initialization is complete (that is, after the tdep code has a
> +# chance to change the settings), the post-initialization step is
> +# done.
>  #
> -# * If "invalid" is False, or if the field specifies "predicate",
> -# validation is skipped.  Otherwise, a validation step is emitted.
> +# If the field still has its initial value (see above), and the field
> +# has a "postdefault", then the post field is set to this value.

Do you really mean to say "the post field", and not just "the field"?

>  #
> -# * Otherwise, the validity is checked using the usual validation
> -# function (see above).  If the field is considered valid, nothing is
> -# done.
> +# After the possible "postdefault" assignment, validation is
> +# performed for fields that don't have a "predicate".
>  #
> -# * Otherwise, the field's value is invalid.  If there is a
> -# "postdefault", then the field is assigned that value.
> +# If the field has an "invalid" attribute with a string value, then
> +# this string is the expression that should evaluate to true when the
> +# field is invalid.
>  #
> -# * Otherwise, the gdbarch will fail validation and gdb will crash.
> +# Otherwise, if "invalid" is True, then the generic validation
> +# function is used: the field is considered invalid it it still

double "it"

Simon
  
Tom Tromey March 1, 2023, 3:58 p.m. UTC | #2
>>>>> "Andrew" == Andrew Burgess via Gdb-patches <gdb-patches@sourceware.org> writes:

Andrew> This doesn't seem reasonable to me, I see no reason why a field can't
Andrew> have both a postdefault (used when the tdep doesn't set the field),
Andrew> and an invalid expression, which can be used to validate the value
Andrew> that a tdep might set.

Andrew> In this commit I restructure the verify_gdbarch generation code to
Andrew> allow the above, there is no change in the actual generated code in
Andrew> this commit, that will come in later commit.

FWIW I'm generally in favor of anything that simplifies how one writes
new gdbarch hooks.  The current approach has always been pretty
confusing.

Andrew> I did end up having to remove the "invalid" attribute (where the
Andrew> attribute was set to True) from a number of fields in this commit.

Most likely these were all that way in the old gdbarch.sh and nobody
ever noticed.

Tom
  
Andrew Burgess March 2, 2023, 10:13 a.m. UTC | #3
Simon Marchi <simark@simark.ca> writes:

> On 2/28/23 11:51, Andrew Burgess via Gdb-patches wrote:
>> Restructure how gdbarch.py generates the verify_gdbarch function.
>> Previously the postdefault handling was bundled together with the
>> validation.  This means that a field can't have both a postdefault,
>> and set its invalid attribute to a string.
>> 
>> This doesn't seem reasonable to me, I see no reason why a field can't
>> have both a postdefault (used when the tdep doesn't set the field),
>> and an invalid expression, which can be used to validate the value
>> that a tdep might set.
>> 
>> In this commit I restructure the verify_gdbarch generation code to
>> allow the above, there is no change in the actual generated code in
>> this commit, that will come in later commit.
>> 
>> I did end up having to remove the "invalid" attribute (where the
>> attribute was set to True) from a number of fields in this commit.
>> This invalid attribute was never having an effect as these components
>> all have a postdefault.  Consider; the "postdefault" is applied if the
>> field still has its initial value, while an "invalid" attribute set to
>> True means error if the field still has its default value.  But the
>> field never will have its default value, it will always have its
>> postdefault value.
>> ---
>>  gdb/gdbarch.py            | 31 ++++++++++++++++---------
>>  gdb/gdbarch_components.py | 49 ++++++++++++++-------------------------
>>  2 files changed, 37 insertions(+), 43 deletions(-)
>> 
>> diff --git a/gdb/gdbarch.py b/gdb/gdbarch.py
>> index 93b1e8bf84e..7fea41c9572 100755
>> --- a/gdb/gdbarch.py
>> +++ b/gdb/gdbarch.py
>> @@ -203,35 +203,44 @@ with open("gdbarch.c", "w") as f:
>>          file=f,
>>      )
>>      for c in filter(not_info, components):
>> -        if c.invalid is False:
>> -            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>> -        elif c.predicate:
>> -            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>> -        elif isinstance(c.invalid, str) and c.postdefault is not None:
>> -            print(f"  if ({c.invalid})", file=f)
>> -            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>> -        elif c.predefault is not None and c.postdefault is not None:
>> +        # An opportunity to write in the 'postdefault' value.
>> +        if c.postdefault is not None and c.predefault is not None:
>>              print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
>>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>          elif c.postdefault is not None:
>>              print(f"  if (gdbarch->{c.name} == 0)", file=f)
>>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>
> I would find this postdefault snippet easier to read like this, with a
> single "if c.postdefault is not None", and then another condition inside
> to decide what we should compare against:
>
>         if c.postdefault is not None:
>             if c.predefault is not None:
>                 print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
>                 print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>             else:
>                 print(f"  if (gdbarch->{c.name} == 0)", file=f)
>                 print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>
> or even
>
>         if c.postdefault is not None:
>             predefault = c.predefault or "0"
>             print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
>             print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)

I went with the second approach, I like removing the duplicate print
calls.

>
>> +
>> +        # Now validate the value.
>> +        if c.invalid is False:
>> +            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>> +        elif c.predicate:
>> +            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>> +        elif c.invalid is None:
>
> I think it's confusing for the "invalid" parameter to be able to be
> None, that it's one to many state versus what we need to be able to
> represent.  I think we can get by with string, True and False, where
> True means "auto", where the validity check is generated if it makes
> sense to.  Having one less state would help simplify things.  I hacked
> this locally and it seems to work.  I can post this as a cleanup before
> or on top of your patch, as you prefer.
>
> Another cleanup that would help me understand what is going on would be
> to change this long list of if/elif to something that looks more like a
> decision tree.  On top of your patch, and on top of my suggestion to get
> rid of the invalid=None state, this is what I made looks like:
>
>         predefault = c.predefault or "0"
>
>         # Now validate the value.
>         if type(c.invalid) is str:
>             print(f"  if ({c.invalid})", file=f)
>             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
>         elif c.invalid:
>             if c.predicate:
>                 print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>             elif c.postdefault:
>                 # We currently don't print anything, but we could print:
>                 # print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>                 pass
>             else:
>                 print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
>                 print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
>         else:
>             print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>
> That structure is clearer to me.  We see clearly the portions handling
> the three states of "invalid" (str, True and False).  Inside invalid ==
> True (which really means "auto"), we see that we skip generating the
> check when either predicate or postdefault is set, the two situations
> where generating the check doesn't make sense.
>
> Another nice thing about this is that there isn't the "unhandled case
> when generating gdbarch validation" case at the end.  Each branch of the
> decision tree has an outcome.
>
> Again, if you agree with this cleanup, we could do it before or after
> your patch, as you wish.

Yeah, I think what you have above would be a great improvement.  I like
that we can (with what you propose) now have an "invalid" string and
also have a predicate, that feels like an improvement.

I've included an update of my patch below.   I haven't included the
"invalid" refactor that you describe above as it sounds like you already
have a patch for this work, and it would end up as a separate patch
anyway.  I'm fine with it going in after my work of before, whatever
works best.

Let me know what you think.

Thanks,
Andrew

---

commit 25850b05dce62e0c83cd6040670b72c77e6ed7ea
Author: Andrew Burgess <aburgess@redhat.com>
Date:   Thu Feb 23 18:18:45 2023 +0000

    gdb: updates to gdbarch.py algorithm
    
    Restructure how gdbarch.py generates the verify_gdbarch function.
    Previously the postdefault handling was bundled together with the
    validation.  This means that a field can't have both a postdefault,
    and set its invalid attribute to a string.
    
    This doesn't seem reasonable to me, I see no reason why a field can't
    have both a postdefault (used when the tdep doesn't set the field),
    and an invalid expression, which can be used to validate the value
    that a tdep might set.
    
    In this commit I restructure the verify_gdbarch generation code to
    allow the above, there is no change in the actual generated code in
    this commit, that will come in later commit.
    
    I did end up having to remove the "invalid" attribute (where the
    attribute was set to True) from a number of fields in this commit.
    This invalid attribute was never having an effect as these components
    all have a postdefault.  Consider; the "postdefault" is applied if the
    field still has its initial value, while an "invalid" attribute set to
    True means error if the field still has its default value.  But the
    field never will have its default value, it will always have its
    postdefault value.

diff --git a/gdb/gdbarch.py b/gdb/gdbarch.py
index 93b1e8bf84e..2447b249594 100755
--- a/gdb/gdbarch.py
+++ b/gdb/gdbarch.py
@@ -203,35 +203,44 @@ with open("gdbarch.c", "w") as f:
         file=f,
     )
     for c in filter(not_info, components):
+        # An opportunity to write in the 'postdefault' value.  We
+        # change field's value to the postdefault if its current value
+        # is not different to the initial value of the field.
+        if c.postdefault is not None:
+            init_value = c.predefault or "0"
+            print(f"  if (gdbarch->{c.name} == {init_value})", file=f)
+            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
+
+        # Now validate the value.
         if c.invalid is False:
             print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
         elif c.predicate:
             print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
-        elif isinstance(c.invalid, str) and c.postdefault is not None:
-            print(f"  if ({c.invalid})", file=f)
-            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
-        elif c.predefault is not None and c.postdefault is not None:
-            print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
-            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
-        elif c.postdefault is not None:
-            print(f"  if (gdbarch->{c.name} == 0)", file=f)
-            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
+        elif c.invalid is None:
+            # No validation has been requested for this component.
+            pass
         elif isinstance(c.invalid, str):
             print(f"  if ({c.invalid})", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
-        elif c.predefault is not None:
+        elif c.invalid is True and c.predefault is not None:
             print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
-        elif c.invalid is True:
+        elif c.invalid is True and c.postdefault is None:
             print(f"  if (gdbarch->{c.name} == 0)", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
+        elif c.invalid is True:
+            # This component has its 'invalid' field set to True, but
+            # also has a postdefault.  This makes no sense, the
+            # postdefault will have been applied above, so this field
+            # will not have a zero value.
+            raise Exception(f"component {c.name} has postdefault and invalid set to True")
         else:
             # We should not allow ourselves to simply do nothing here
             # because no other case applies.  If we end up here then
             # either the input data needs adjusting so one of the
             # above cases matches, or we need additional cases adding
             # here.
-            raise Exception("unhandled case when generating gdbarch validation")
+            raise Exception(f"unhandled case when generating gdbarch validation: {c.name}")
     print("  if (!log.empty ())", file=f)
     print(
         """    internal_error (_("verify_gdbarch: the following are invalid ...%s"),""",
diff --git a/gdb/gdbarch_components.py b/gdb/gdbarch_components.py
index caa65c334ec..5446b8fcaf8 100644
--- a/gdb/gdbarch_components.py
+++ b/gdb/gdbarch_components.py
@@ -63,34 +63,28 @@
 # * "predefault", "postdefault", and "invalid" - These are used for
 # the initialization and verification steps:
 #
-# A gdbarch is zero-initialized.  Then, if a field has a pre-default,
-# the field is set to that value.  After initialization is complete
-# (that is, after the tdep code has a chance to change the settings),
-# the post-initialization step is done.
+# A gdbarch is zero-initialized.  Then, if a field has a "predefault",
+# the field is set to that value.  This becomes the field's initial
+# value.
 #
-# There is a generic algorithm to generate a "validation function" for
-# all fields.  If the field has an "invalid" attribute with a string
-# value, then this string is the expression (note that a string-valued
-# "invalid" and "predicate" are mutually exclusive; and the case where
-# invalid is True means to ignore this field and instead use the
-# default checking that is about to be described).  Otherwise, if
-# there is a "predefault", then the field is valid if it differs from
-# the predefault.  Otherwise, the check is done against 0 (really NULL
-# for function pointers, but same idea).
-#
-# In post-initialization / validation, there are several cases.
+# After initialization is complete (that is, after the tdep code has a
+# chance to change the settings), the post-initialization step is
+# done.
 #
-# * If "invalid" is False, or if the field specifies "predicate",
-# validation is skipped.  Otherwise, a validation step is emitted.
+# If the field still has its initial value (see above), and the field
+# has a "postdefault", then the field is set to this value.
 #
-# * Otherwise, the validity is checked using the usual validation
-# function (see above).  If the field is considered valid, nothing is
-# done.
+# After the possible "postdefault" assignment, validation is
+# performed for fields that don't have a "predicate".
 #
-# * Otherwise, the field's value is invalid.  If there is a
-# "postdefault", then the field is assigned that value.
+# If the field has an "invalid" attribute with a string value, then
+# this string is the expression that should evaluate to true when the
+# field is invalid.
 #
-# * Otherwise, the gdbarch will fail validation and gdb will crash.
+# Otherwise, if "invalid" is True, then the generic validation
+# function is used: the field is considered invalid it still contains
+# its default value.  This validation is what is used within the _p
+# predicate function if the field has "predicate" set to True.
 #
 # Function and Method share:
 #
@@ -206,7 +200,6 @@ Value(
     type="const struct floatformat **",
     name="bfloat16_format",
     postdefault="floatformats_bfloat16",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->bfloat16_format)",
 )
 
@@ -221,7 +214,6 @@ Value(
     type="const struct floatformat **",
     name="half_format",
     postdefault="floatformats_ieee_half",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->half_format)",
 )
 
@@ -236,7 +228,6 @@ Value(
     type="const struct floatformat **",
     name="float_format",
     postdefault="floatformats_ieee_single",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->float_format)",
 )
 
@@ -251,7 +242,6 @@ Value(
     type="const struct floatformat **",
     name="double_format",
     postdefault="floatformats_ieee_double",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->double_format)",
 )
 
@@ -266,7 +256,6 @@ Value(
     type="const struct floatformat **",
     name="long_double_format",
     postdefault="floatformats_ieee_double",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->long_double_format)",
 )
 
@@ -289,7 +278,6 @@ One if `wchar_t' is signed, zero if unsigned.
     name="wchar_signed",
     predefault="-1",
     postdefault="1",
-    invalid=True,
 )
 
 Method(
@@ -332,7 +320,6 @@ addr_bit is the size of a target address as represented in gdb
     name="addr_bit",
     predefault="0",
     postdefault="gdbarch_ptr_bit (gdbarch)",
-    invalid=True,
 )
 
 Value(
@@ -355,7 +342,6 @@ and if Dwarf versions < 4 need to be supported.
     name="dwarf2_addr_size",
     predefault="0",
     postdefault="gdbarch_ptr_bit (gdbarch) / TARGET_CHAR_BIT",
-    invalid=True,
 )
 
 Value(
@@ -366,7 +352,6 @@ One if `char' acts like `signed char', zero if `unsigned char'.
     name="char_signed",
     predefault="-1",
     postdefault="1",
-    invalid=True,
 )
 
 Function(
  
Simon Marchi March 2, 2023, 4:49 p.m. UTC | #4
On 3/2/23 05:13, Andrew Burgess wrote:
> Simon Marchi <simark@simark.ca> writes:
> 
>> On 2/28/23 11:51, Andrew Burgess via Gdb-patches wrote:
>>> Restructure how gdbarch.py generates the verify_gdbarch function.
>>> Previously the postdefault handling was bundled together with the
>>> validation.  This means that a field can't have both a postdefault,
>>> and set its invalid attribute to a string.
>>>
>>> This doesn't seem reasonable to me, I see no reason why a field can't
>>> have both a postdefault (used when the tdep doesn't set the field),
>>> and an invalid expression, which can be used to validate the value
>>> that a tdep might set.
>>>
>>> In this commit I restructure the verify_gdbarch generation code to
>>> allow the above, there is no change in the actual generated code in
>>> this commit, that will come in later commit.
>>>
>>> I did end up having to remove the "invalid" attribute (where the
>>> attribute was set to True) from a number of fields in this commit.
>>> This invalid attribute was never having an effect as these components
>>> all have a postdefault.  Consider; the "postdefault" is applied if the
>>> field still has its initial value, while an "invalid" attribute set to
>>> True means error if the field still has its default value.  But the
>>> field never will have its default value, it will always have its
>>> postdefault value.
>>> ---
>>>  gdb/gdbarch.py            | 31 ++++++++++++++++---------
>>>  gdb/gdbarch_components.py | 49 ++++++++++++++-------------------------
>>>  2 files changed, 37 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/gdb/gdbarch.py b/gdb/gdbarch.py
>>> index 93b1e8bf84e..7fea41c9572 100755
>>> --- a/gdb/gdbarch.py
>>> +++ b/gdb/gdbarch.py
>>> @@ -203,35 +203,44 @@ with open("gdbarch.c", "w") as f:
>>>          file=f,
>>>      )
>>>      for c in filter(not_info, components):
>>> -        if c.invalid is False:
>>> -            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>>> -        elif c.predicate:
>>> -            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>>> -        elif isinstance(c.invalid, str) and c.postdefault is not None:
>>> -            print(f"  if ({c.invalid})", file=f)
>>> -            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>> -        elif c.predefault is not None and c.postdefault is not None:
>>> +        # An opportunity to write in the 'postdefault' value.
>>> +        if c.postdefault is not None and c.predefault is not None:
>>>              print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
>>>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>>          elif c.postdefault is not None:
>>>              print(f"  if (gdbarch->{c.name} == 0)", file=f)
>>>              print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>
>> I would find this postdefault snippet easier to read like this, with a
>> single "if c.postdefault is not None", and then another condition inside
>> to decide what we should compare against:
>>
>>         if c.postdefault is not None:
>>             if c.predefault is not None:
>>                 print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
>>                 print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>             else:
>>                 print(f"  if (gdbarch->{c.name} == 0)", file=f)
>>                 print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
>>
>> or even
>>
>>         if c.postdefault is not None:
>>             predefault = c.predefault or "0"
>>             print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
>>             print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
> 
> I went with the second approach, I like removing the duplicate print
> calls.
> 
>>
>>> +
>>> +        # Now validate the value.
>>> +        if c.invalid is False:
>>> +            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>>> +        elif c.predicate:
>>> +            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>>> +        elif c.invalid is None:
>>
>> I think it's confusing for the "invalid" parameter to be able to be
>> None, that it's one to many state versus what we need to be able to
>> represent.  I think we can get by with string, True and False, where
>> True means "auto", where the validity check is generated if it makes
>> sense to.  Having one less state would help simplify things.  I hacked
>> this locally and it seems to work.  I can post this as a cleanup before
>> or on top of your patch, as you prefer.
>>
>> Another cleanup that would help me understand what is going on would be
>> to change this long list of if/elif to something that looks more like a
>> decision tree.  On top of your patch, and on top of my suggestion to get
>> rid of the invalid=None state, this is what I made looks like:
>>
>>         predefault = c.predefault or "0"
>>
>>         # Now validate the value.
>>         if type(c.invalid) is str:
>>             print(f"  if ({c.invalid})", file=f)
>>             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
>>         elif c.invalid:
>>             if c.predicate:
>>                 print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>>             elif c.postdefault:
>>                 # We currently don't print anything, but we could print:
>>                 # print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
>>                 pass
>>             else:
>>                 print(f"  if (gdbarch->{c.name} == {predefault})", file=f)
>>                 print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
>>         else:
>>             print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
>>
>> That structure is clearer to me.  We see clearly the portions handling
>> the three states of "invalid" (str, True and False).  Inside invalid ==
>> True (which really means "auto"), we see that we skip generating the
>> check when either predicate or postdefault is set, the two situations
>> where generating the check doesn't make sense.
>>
>> Another nice thing about this is that there isn't the "unhandled case
>> when generating gdbarch validation" case at the end.  Each branch of the
>> decision tree has an outcome.
>>
>> Again, if you agree with this cleanup, we could do it before or after
>> your patch, as you wish.
> 
> Yeah, I think what you have above would be a great improvement.  I like
> that we can (with what you propose) now have an "invalid" string and
> also have a predicate, that feels like an improvement.

I'm not sure if it was intended, but I don't mind.  It would have been
useful for my return_value patch (which we will not use in the end).  In
there, I wanted an "invalid" expression to validate that return_value
was not set at the same time as return_value_as_value.  But it was also
possible to set neither, so I wanted a predicate as well (which would
just check != 0).

> I've included an update of my patch below.   I haven't included the
> "invalid" refactor that you describe above as it sounds like you already
> have a patch for this work, and it would end up as a separate patch
> anyway.  I'm fine with it going in after my work of before, whatever
> works best.
> 
> Let me know what you think.

Thanks, I think this looks good, and we can make further improvements
from there.

Approved-By: Simon Marchi <simon.marchi@efficios.com>

Simon
  

Patch

diff --git a/gdb/gdbarch.py b/gdb/gdbarch.py
index 93b1e8bf84e..7fea41c9572 100755
--- a/gdb/gdbarch.py
+++ b/gdb/gdbarch.py
@@ -203,35 +203,44 @@  with open("gdbarch.c", "w") as f:
         file=f,
     )
     for c in filter(not_info, components):
-        if c.invalid is False:
-            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
-        elif c.predicate:
-            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
-        elif isinstance(c.invalid, str) and c.postdefault is not None:
-            print(f"  if ({c.invalid})", file=f)
-            print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
-        elif c.predefault is not None and c.postdefault is not None:
+        # An opportunity to write in the 'postdefault' value.
+        if c.postdefault is not None and c.predefault is not None:
             print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
             print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
         elif c.postdefault is not None:
             print(f"  if (gdbarch->{c.name} == 0)", file=f)
             print(f"    gdbarch->{c.name} = {c.postdefault};", file=f)
+
+        # Now validate the value.
+        if c.invalid is False:
+            print(f"  /* Skip verify of {c.name}, invalid_p == 0 */", file=f)
+        elif c.predicate:
+            print(f"  /* Skip verify of {c.name}, has predicate.  */", file=f)
+        elif c.invalid is None:
+            # No validation has been requested for this component.
+            pass
         elif isinstance(c.invalid, str):
             print(f"  if ({c.invalid})", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
-        elif c.predefault is not None:
+        elif c.invalid is True and c.predefault is not None:
             print(f"  if (gdbarch->{c.name} == {c.predefault})", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
-        elif c.invalid is True:
+        elif c.invalid is True and c.postdefault is None:
             print(f"  if (gdbarch->{c.name} == 0)", file=f)
             print(f"""    log.puts ("\\n\\t{c.name}");""", file=f)
+        elif c.invalid is True:
+            # This component has its 'invalid' field set to True, but
+            # also has a postdefault.  This makes no sense, the
+            # postdefault will have been applied above, so this field
+            # will not have a zero value.
+            raise Exception(f"component {c.name} has postdefault and invalid set to True")
         else:
             # We should not allow ourselves to simply do nothing here
             # because no other case applies.  If we end up here then
             # either the input data needs adjusting so one of the
             # above cases matches, or we need additional cases adding
             # here.
-            raise Exception("unhandled case when generating gdbarch validation")
+            raise Exception(f"unhandled case when generating gdbarch validation: {c.name}")
     print("  if (!log.empty ())", file=f)
     print(
         """    internal_error (_("verify_gdbarch: the following are invalid ...%s"),""",
diff --git a/gdb/gdbarch_components.py b/gdb/gdbarch_components.py
index caa65c334ec..1d420a513f9 100644
--- a/gdb/gdbarch_components.py
+++ b/gdb/gdbarch_components.py
@@ -63,34 +63,28 @@ 
 # * "predefault", "postdefault", and "invalid" - These are used for
 # the initialization and verification steps:
 #
-# A gdbarch is zero-initialized.  Then, if a field has a pre-default,
-# the field is set to that value.  After initialization is complete
-# (that is, after the tdep code has a chance to change the settings),
-# the post-initialization step is done.
+# A gdbarch is zero-initialized.  Then, if a field has a "predefault",
+# the field is set to that value.  This becomes the fields initial
+# value.
 #
-# There is a generic algorithm to generate a "validation function" for
-# all fields.  If the field has an "invalid" attribute with a string
-# value, then this string is the expression (note that a string-valued
-# "invalid" and "predicate" are mutually exclusive; and the case where
-# invalid is True means to ignore this field and instead use the
-# default checking that is about to be described).  Otherwise, if
-# there is a "predefault", then the field is valid if it differs from
-# the predefault.  Otherwise, the check is done against 0 (really NULL
-# for function pointers, but same idea).
-#
-# In post-initialization / validation, there are several cases.
+# After initialization is complete (that is, after the tdep code has a
+# chance to change the settings), the post-initialization step is
+# done.
 #
-# * If "invalid" is False, or if the field specifies "predicate",
-# validation is skipped.  Otherwise, a validation step is emitted.
+# If the field still has its initial value (see above), and the field
+# has a "postdefault", then the post field is set to this value.
 #
-# * Otherwise, the validity is checked using the usual validation
-# function (see above).  If the field is considered valid, nothing is
-# done.
+# After the possible "postdefault" assignment, validation is
+# performed for fields that don't have a "predicate".
 #
-# * Otherwise, the field's value is invalid.  If there is a
-# "postdefault", then the field is assigned that value.
+# If the field has an "invalid" attribute with a string value, then
+# this string is the expression that should evaluate to true when the
+# field is invalid.
 #
-# * Otherwise, the gdbarch will fail validation and gdb will crash.
+# Otherwise, if "invalid" is True, then the generic validation
+# function is used: the field is considered invalid it it still
+# contains its default value.  This validation is what is used within
+# the _p predicate function if the field has "predicate" set to True.
 #
 # Function and Method share:
 #
@@ -206,7 +200,6 @@  Value(
     type="const struct floatformat **",
     name="bfloat16_format",
     postdefault="floatformats_bfloat16",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->bfloat16_format)",
 )
 
@@ -221,7 +214,6 @@  Value(
     type="const struct floatformat **",
     name="half_format",
     postdefault="floatformats_ieee_half",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->half_format)",
 )
 
@@ -236,7 +228,6 @@  Value(
     type="const struct floatformat **",
     name="float_format",
     postdefault="floatformats_ieee_single",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->float_format)",
 )
 
@@ -251,7 +242,6 @@  Value(
     type="const struct floatformat **",
     name="double_format",
     postdefault="floatformats_ieee_double",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->double_format)",
 )
 
@@ -266,7 +256,6 @@  Value(
     type="const struct floatformat **",
     name="long_double_format",
     postdefault="floatformats_ieee_double",
-    invalid=True,
     printer="pformat (gdbarch, gdbarch->long_double_format)",
 )
 
@@ -289,7 +278,6 @@  One if `wchar_t' is signed, zero if unsigned.
     name="wchar_signed",
     predefault="-1",
     postdefault="1",
-    invalid=True,
 )
 
 Method(
@@ -332,7 +320,6 @@  addr_bit is the size of a target address as represented in gdb
     name="addr_bit",
     predefault="0",
     postdefault="gdbarch_ptr_bit (gdbarch)",
-    invalid=True,
 )
 
 Value(
@@ -355,7 +342,6 @@  and if Dwarf versions < 4 need to be supported.
     name="dwarf2_addr_size",
     predefault="0",
     postdefault="gdbarch_ptr_bit (gdbarch) / TARGET_CHAR_BIT",
-    invalid=True,
 )
 
 Value(
@@ -366,7 +352,6 @@  One if `char' acts like `signed char', zero if `unsigned char'.
     name="char_signed",
     predefault="-1",
     postdefault="1",
-    invalid=True,
 )
 
 Function(