[v2,05/65] Arm: use is_whitespace()

Message ID 83414a2b-bfad-4381-8903-152ec0552e88@suse.com
State New
Headers
Series gas: whitespace handling |

Commit Message

Jan Beulich Jan. 27, 2025, 3:50 p.m. UTC
  Wherever blanks are permissible in input, tabs ought to be permissible,
too. This is particularly relevant when -f is passed to gas (alongside
appropriate input). At the same time use is_end_of_stmt() instead of an
open-coded nul char check.

In parse_neon_type() be more aggressive and remove the special casing of
certain characters altogether. The original default case simply having
"break" can't have been correct.
---
I don't think I see why parse_qfloat_immediate() checks for '\n'. If
that was needed, "line" (really: statement) separators would need
checking for, too. Yet an easy experiment demonstrates that this case is
working correctly despite the lack of a check for ';'.

The check for "0x" in parse_qfloat_immediate() seems fishy, too: If it
was actually needed, "0X" would apparently also checking form. Yet again
experimentally that's properly refused anyway, by atof_ieee() I guess.

While for parse_neon_type() the change improves the handling of this set
of (bad) examples (including the case of passing -f to gas):

	vcvt.bf016.f32	d0, q0
	vcvt.bf16.f032	d0, q0
	vcvt.b16.f32	d0, q0
	vcvt.b f16.f32	d0, q0
	vcvt.bf 16.f32	d0, q0
	vcvt.bf16.f 32	d0, q0
	vcvt.b	f16.f32	d0, q0
	vcvt.b	16.f32	d0, q0
	vcvt.b	32.f32	d0, q0
	vcvt.bf	16.f32	d0, q0

several are left which imo also ought to be rejected. Yet that will want
sorting separately.
---
v2: Also replace ISSPACE().
  

Comments

Richard Earnshaw (lists) Jan. 27, 2025, 4:31 p.m. UTC | #1
On 27/01/2025 15:50, Jan Beulich wrote:
> Wherever blanks are permissible in input, tabs ought to be permissible,
> too. This is particularly relevant when -f is passed to gas (alongside
> appropriate input). At the same time use is_end_of_stmt() instead of an
> open-coded nul char check.
> 
> In parse_neon_type() be more aggressive and remove the special casing of
> certain characters altogether. The original default case simply having
> "break" can't have been correct.
> ---
> I don't think I see why parse_qfloat_immediate() checks for '\n'. If
> that was needed, "line" (really: statement) separators would need
> checking for, too. Yet an easy experiment demonstrates that this case is
> working correctly despite the lack of a check for ';'.
> 
> The check for "0x" in parse_qfloat_immediate() seems fishy, too: If it
> was actually needed, "0X" would apparently also checking form. Yet again
> experimentally that's properly refused anyway, by atof_ieee() I guess.
> 
> While for parse_neon_type() the change improves the handling of this set
> of (bad) examples (including the case of passing -f to gas):
> 
> 	vcvt.bf016.f32	d0, q0
> 	vcvt.bf16.f032	d0, q0
> 	vcvt.b16.f32	d0, q0
> 	vcvt.b f16.f32	d0, q0
> 	vcvt.bf 16.f32	d0, q0
> 	vcvt.bf16.f 32	d0, q0
> 	vcvt.b	f16.f32	d0, q0
> 	vcvt.b	16.f32	d0, q0
> 	vcvt.b	32.f32	d0, q0
> 	vcvt.bf	16.f32	d0, q0
> 
> several are left which imo also ought to be rejected. Yet that will want
> sorting separately.
> ---
> v2: Also replace ISSPACE().
> 
> --- a/gas/config/tc-arm.c
> +++ b/gas/config/tc-arm.c
> @@ -1081,7 +1081,7 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP
>  
>  /* Separator character handling.  */
>  
> -#define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
> +#define skip_whitespace(str)  do { if (is_whitespace (*(str))) ++(str); } while (0)
>  
>  enum fp_16bit_format
>  {
> @@ -1510,13 +1510,9 @@ parse_neon_type (struct neon_type *type,
>  		  return FAIL;
>  		}
>  	      goto done;
> -	    case '0': case '1': case '2': case '3': case '4':
> -	    case '5': case '6': case '7': case '8': case '9':
> -	    case ' ': case '.':
> +	    default:
>  	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
>  	      return FAIL;
> -	    default:
> -	      break;
>  	    }

This entire switch statement has now degenerated into 'f' or error.  So I think it would be better to just replace it with an if-else.

>  	  break;
>  	default:
> @@ -5055,7 +5051,8 @@ set_fp16_format (int dummy ATTRIBUTE_UNU
>    new_format = ARM_FP16_FORMAT_DEFAULT;
>  
>    name = input_line_pointer;
> -  while (*input_line_pointer && !ISSPACE (*input_line_pointer))
> +  while (!is_end_of_stmt (*input_line_pointer)
> +	 && !is_whitespace (*input_line_pointer))
>      input_line_pointer++;
>  
>    saved_char = *input_line_pointer;
> @@ -5366,7 +5363,7 @@ parse_qfloat_immediate (char **ccp, int
>      return FAIL;
>    else
>      {
> -      for (; *fpnum != '\0' && *fpnum != ' ' && *fpnum != '\n'; fpnum++)
> +      for (; *fpnum != '\0' && !is_whitespace (*fpnum) && *fpnum != '\n'; fpnum++)
>  	if (*fpnum == '.' || *fpnum == 'e' || *fpnum == 'E')
>  	  {
>  	    found_fpchar = 1;
> @@ -22450,7 +22447,7 @@ opcode_lookup (char **str)
>    /* Scan up to the end of the mnemonic, which must end in white space,
>       '.' (in unified mode, or for Neon/VFP instructions), or end of string.  */
>    for (base = end = *str; *end != '\0'; end++)
> -    if (*end == ' ' || *end == '.')
> +    if (is_whitespace (*end) || *end == '.')
>        break;
>  
>    if (end == base)
> @@ -22481,7 +22478,7 @@ opcode_lookup (char **str)
>  	  if (parse_neon_type (&inst.vectype, str) == FAIL)
>  	    return NULL;
>  	}
> -      else if (end[offset] != '\0' && end[offset] != ' ')
> +      else if (end[offset] != '\0' && !is_whitespace (end[offset]))
>  	return NULL;
>      }
>    else
> 

OK with that change.

R.
  
Jan Beulich Jan. 27, 2025, 4:55 p.m. UTC | #2
On 27.01.2025 17:31, Richard Earnshaw (lists) wrote:
> On 27/01/2025 15:50, Jan Beulich wrote:
>> --- a/gas/config/tc-arm.c
>> +++ b/gas/config/tc-arm.c
>> @@ -1081,7 +1081,7 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP
>>  
>>  /* Separator character handling.  */
>>  
>> -#define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
>> +#define skip_whitespace(str)  do { if (is_whitespace (*(str))) ++(str); } while (0)
>>  
>>  enum fp_16bit_format
>>  {
>> @@ -1510,13 +1510,9 @@ parse_neon_type (struct neon_type *type,
>>  		  return FAIL;
>>  		}
>>  	      goto done;
>> -	    case '0': case '1': case '2': case '3': case '4':
>> -	    case '5': case '6': case '7': case '8': case '9':
>> -	    case ' ': case '.':
>> +	    default:
>>  	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
>>  	      return FAIL;
>> -	    default:
>> -	      break;
>>  	    }
> 
> This entire switch statement has now degenerated into 'f' or error.  So I think it would be better to just replace it with an if-else.

I can do that, but in other projects I'm active we'd deliberately ask
that switch() be used simply in the expectation that if any further
character would want checking for, code churn would then be lower. If
you're fine with the extra churn, I can of course adjust here.

Jan
  
Richard Earnshaw (lists) Jan. 27, 2025, 5:05 p.m. UTC | #3
On 27/01/2025 16:55, Jan Beulich wrote:
> On 27.01.2025 17:31, Richard Earnshaw (lists) wrote:
>> On 27/01/2025 15:50, Jan Beulich wrote:
>>> --- a/gas/config/tc-arm.c
>>> +++ b/gas/config/tc-arm.c
>>> @@ -1081,7 +1081,7 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP
>>>  
>>>  /* Separator character handling.  */
>>>  
>>> -#define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
>>> +#define skip_whitespace(str)  do { if (is_whitespace (*(str))) ++(str); } while (0)
>>>  
>>>  enum fp_16bit_format
>>>  {
>>> @@ -1510,13 +1510,9 @@ parse_neon_type (struct neon_type *type,
>>>  		  return FAIL;
>>>  		}
>>>  	      goto done;
>>> -	    case '0': case '1': case '2': case '3': case '4':
>>> -	    case '5': case '6': case '7': case '8': case '9':
>>> -	    case ' ': case '.':
>>> +	    default:
>>>  	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
>>>  	      return FAIL;
>>> -	    default:
>>> -	      break;
>>>  	    }
>>
>> This entire switch statement has now degenerated into 'f' or error.  So I think it would be better to just replace it with an if-else.
> 
> I can do that, but in other projects I'm active we'd deliberately ask
> that switch() be used simply in the expectation that if any further
> character would want checking for, code churn would then be lower. If
> you're fine with the extra churn, I can of course adjust here.
> 
> Jan

Never say never, but I can't see other characters being needed here.  I'll take that hit if it's needed at some point in the future.  Note, we already don't handle this when parsing the numbers after an 'f', so we're not consistent anyway.

I don't particularly like the error recovery here anyway, but that's another story.  Printing "unexpected type character `b' -- did you mean `bf'?" is not exactly informative when there's so little context shown: it's not even as though 'bf' is the complete answer, the type is 'bf16'; but to do this even close to properly we'd need to tokenize the input and print the entire substring up to the next token separator.


R.
  

Patch

--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1081,7 +1081,7 @@  const char FLT_CHARS[] = "rRsSfFdDxXeEpP
 
 /* Separator character handling.  */
 
-#define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
+#define skip_whitespace(str)  do { if (is_whitespace (*(str))) ++(str); } while (0)
 
 enum fp_16bit_format
 {
@@ -1510,13 +1510,9 @@  parse_neon_type (struct neon_type *type,
 		  return FAIL;
 		}
 	      goto done;
-	    case '0': case '1': case '2': case '3': case '4':
-	    case '5': case '6': case '7': case '8': case '9':
-	    case ' ': case '.':
+	    default:
 	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
 	      return FAIL;
-	    default:
-	      break;
 	    }
 	  break;
 	default:
@@ -5055,7 +5051,8 @@  set_fp16_format (int dummy ATTRIBUTE_UNU
   new_format = ARM_FP16_FORMAT_DEFAULT;
 
   name = input_line_pointer;
-  while (*input_line_pointer && !ISSPACE (*input_line_pointer))
+  while (!is_end_of_stmt (*input_line_pointer)
+	 && !is_whitespace (*input_line_pointer))
     input_line_pointer++;
 
   saved_char = *input_line_pointer;
@@ -5366,7 +5363,7 @@  parse_qfloat_immediate (char **ccp, int
     return FAIL;
   else
     {
-      for (; *fpnum != '\0' && *fpnum != ' ' && *fpnum != '\n'; fpnum++)
+      for (; *fpnum != '\0' && !is_whitespace (*fpnum) && *fpnum != '\n'; fpnum++)
 	if (*fpnum == '.' || *fpnum == 'e' || *fpnum == 'E')
 	  {
 	    found_fpchar = 1;
@@ -22450,7 +22447,7 @@  opcode_lookup (char **str)
   /* Scan up to the end of the mnemonic, which must end in white space,
      '.' (in unified mode, or for Neon/VFP instructions), or end of string.  */
   for (base = end = *str; *end != '\0'; end++)
-    if (*end == ' ' || *end == '.')
+    if (is_whitespace (*end) || *end == '.')
       break;
 
   if (end == base)
@@ -22481,7 +22478,7 @@  opcode_lookup (char **str)
 	  if (parse_neon_type (&inst.vectype, str) == FAIL)
 	    return NULL;
 	}
-      else if (end[offset] != '\0' && end[offset] != ' ')
+      else if (end[offset] != '\0' && !is_whitespace (end[offset]))
 	return NULL;
     }
   else