[v2] x86/Intel: avoid infinite recursion in i386_intel_simplify_symbol() (again)

Message ID 8ffded37-0027-4de5-9e23-37ed43dba6f8@suse.com
State New
Headers
Series [v2] x86/Intel: avoid infinite recursion in i386_intel_simplify_symbol() (again) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jan Beulich May 15, 2026, 1:03 p.m. UTC
  PR gas/30308

The same problem has appeared again due to insufficient use of the
"resolving" property (respective helpers originally introduced for this
PR) in 22f8905d9f38 ("x86/Intel: don't modify equates' expressions").

Also put in place a testcase, so this (hopefully) won't regress again.
---
v2: Restrict loop continuation further. Better and wider testcase.
  

Comments

Alan Modra May 16, 2026, 4:16 a.m. UTC | #1
On Fri, May 15, 2026 at 03:03:32PM +0200, Jan Beulich wrote:
> @@ -405,6 +409,11 @@ i386_intel_simplify_symbol (symbolS *sym
>  	  if (e->X_op == O_symbol && !e->X_add_number)
>  	    {
>  	      sym = e->X_add_symbol;
> +	      if (symbol_resolving_p (sym))
> +		return sym;
> +	      seg = S_GET_SEGMENT (sym);
> +	      if (seg != expr_section && seg != reg_section)
> +		return sym;
>  	      continue;
>  	    }
>  	  sym = make_expr_symbol (e);

I suspect that trying to cut the loop like this is wrong.

I don't have a testcase specific to this change but in trying to
create one I found the following:

 .intel_syntax noprefix
a=b
b=esi
x=a
a=1
b=2
 mov eax,[x]

Contrast with:
 .intel_syntax noprefix
x=esi
 mov eax,[x]
  
Jan Beulich May 18, 2026, 6:02 a.m. UTC | #2
On 16.05.2026 06:16, Alan Modra wrote:
> On Fri, May 15, 2026 at 03:03:32PM +0200, Jan Beulich wrote:
>> @@ -405,6 +409,11 @@ i386_intel_simplify_symbol (symbolS *sym
>>  	  if (e->X_op == O_symbol && !e->X_add_number)
>>  	    {
>>  	      sym = e->X_add_symbol;
>> +	      if (symbol_resolving_p (sym))
>> +		return sym;
>> +	      seg = S_GET_SEGMENT (sym);
>> +	      if (seg != expr_section && seg != reg_section)
>> +		return sym;
>>  	      continue;
>>  	    }
>>  	  sym = make_expr_symbol (e);
> 
> I suspect that trying to cut the loop like this is wrong.

I further suspect that looking for just O_symbol is insufficient here,
too. As mentioned in the bug, what I'm lacking is a good idea towards
making all of this more sane. (Quite likely expression walking like
what we do for Intel syntax parsing would better exist somewhere in
common code anyway.)

> I don't have a testcase specific to this change but in trying to
> create one I found the following:
> 
>  .intel_syntax noprefix
> a=b
> b=esi
> x=a
> a=1
> b=2
>  mov eax,[x]
> 
> Contrast with:
>  .intel_syntax noprefix
> x=esi
>  mov eax,[x]

I'll take a look at what you mean here.

Jan
  
Alan Modra May 19, 2026, 11:44 p.m. UTC | #3
I poked at this problem a little yesterday, and came up with the
following patch that passes for all the testcases I threw at it.

===
Commit 22f8905d9f38 reintroduced pr30308.  This fix resolves equates
safely in the presence of symbol loops by using symbol_equated_to.
Symbols aren't prematurely modified by copying their value
expressions before i386_intel_simplify does its work.  I figure that
it is safer to do this for all symbols rather than just the particular
case of equates, so the in_equate parameter can disappear.

diff --git a/gas/config/tc-i386-intel.c b/gas/config/tc-i386-intel.c
index 09248e8f8f9..dca33d3262a 100644
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -381,49 +381,38 @@ i386_intel_simplify_register (expressionS *e)
 }
 
 static symbolS *
-i386_intel_simplify_symbol (symbolS *sym, bool in_equate)
+i386_intel_simplify_symbol (symbolS *sym)
 {
-  if (symbol_resolving_p (sym))
-    return sym;
+  symbolS *orig = sym;
+  offsetT off;
+  sym = symbol_equated_to (sym, &off);
+  if (sym == NULL || off != 0)
+    return orig;
 
   segT seg = S_GET_SEGMENT (sym);
-  if (seg != expr_section && seg != reg_section && !symbol_equated_p(sym))
+  if (seg == undefined_section
+      || (seg != expr_section && seg != reg_section && sym == orig))
     return sym;
 
-  for (;;)
-    {
-      /* While we're after equates, symbol_equated_p() isn't suitable here.  */
-      if (symbol_on_chain(sym, symbol_rootP, symbol_lastP))
-	{
-	  in_equate = true;
-	  sym = symbol_clone (sym, 0);
-	}
-      else if (in_equate)
-	{
-	  expressionS *e = symbol_get_value_expression (sym);
-
-	  if (e->X_op == O_symbol && !e->X_add_number)
-	    {
-	      sym = e->X_add_symbol;
-	      continue;
-	    }
-	  sym = make_expr_symbol (e);
-	}
-
-      break;
-    }
-
-  symbol_mark_resolving (sym);
-  int ret = i386_intel_simplify (symbol_get_value_expression (sym), in_equate);
-  if (ret == 2)
-    S_SET_SEGMENT (sym, absolute_section);
-  symbol_clear_resolving (sym);
+  /* i386_intel_simplify modifies its arg.  We don't want to make
+     premature changes to symbols here, particularly for a symbol
+     equate.  Changing a symbol may affect future uses of that
+     symbol.  So copy the symbol value and make a new symbol after
+     i386_intel_simplify has done its work.  */
+  expressionS *e = symbol_get_value_expression (sym);
+  expressionS exp;
+  memcpy (&exp, e, sizeof exp);
+  int ret = i386_intel_simplify (&exp);
+  if (ret == 0)
+    return NULL;
 
-  return ret ? sym : NULL;
+  if (memcmp (&exp, e, sizeof exp))
+    sym = make_expr_symbol (&exp);
+  return sym;
 }
 
 static int
-i386_intel_simplify (expressionS *e, bool in_equate)
+i386_intel_simplify (expressionS *e)
 {
   const reg_entry *the_reg = (this_operand >= 0
 			      ? i.op[this_operand].regs : NULL);
@@ -440,7 +429,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
     case O_index:
       if (e->X_add_symbol)
 	{
-	  newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_add_symbol);
 	  if (!newsym
 	      || !i386_intel_check(the_reg, intel_state.base,
 				   intel_state.index))
@@ -449,7 +438,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	}
       if (!intel_state.in_offset)
 	++intel_state.in_bracket;
-      newsym = i386_intel_simplify_symbol (e->X_op_symbol, in_equate);
+      newsym = i386_intel_simplify_symbol (e->X_op_symbol);
       if (!intel_state.in_offset)
 	--intel_state.in_bracket;
       if (!newsym)
@@ -476,7 +465,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
     case O_offset:
       intel_state.has_offset = 1;
       ++intel_state.in_offset;
-      newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+      newsym = i386_intel_simplify_symbol (e->X_add_symbol);
       --intel_state.in_offset;
       if (!newsym || !i386_intel_check(the_reg, base, state_index))
 	return 0;
@@ -505,7 +494,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	  as_bad (_("invalid use of register"));
 	  return 0;
 	}
-      newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+      newsym = i386_intel_simplify_symbol (e->X_add_symbol);
       if (!newsym)
 	return 0;
       e->X_add_symbol = newsym;
@@ -519,7 +508,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	  as_bad (_("invalid use of register"));
 	  return 0;
 	}
-      newsym = i386_intel_simplify_symbol (e->X_op_symbol, in_equate);
+      newsym = i386_intel_simplify_symbol (e->X_op_symbol);
       if (!newsym
 	  || !i386_intel_check(the_reg, intel_state.base,
 			       intel_state.index))
@@ -548,7 +537,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	  expressionS *left = NULL, *right = NULL;
 	  segT leftseg = NULL, rightseg = NULL;
 
-	  newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_add_symbol);
 	  if (newsym)
 	    {
 	      e->X_add_symbol = newsym;
@@ -563,7 +552,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 		}
 	    }
 
-	  newsym = i386_intel_simplify_symbol (e->X_op_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_op_symbol);
 	  if (newsym)
 	    {
 	      e->X_op_symbol = newsym;
@@ -609,7 +598,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	  if (!intel_state.in_scale++)
 	    intel_state.scale_factor = 1;
 
-	  newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_add_symbol);
 	  if (newsym)
 	    {
 	      e->X_add_symbol = newsym;
@@ -620,7 +609,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 		  other = symbol_get_value_expression (e->X_add_symbol);
 		}
 
-	      newsym = i386_intel_simplify_symbol (e->X_op_symbol, in_equate);
+	      newsym = i386_intel_simplify_symbol (e->X_op_symbol);
 	    }
 
 	  if (newsym)
@@ -705,7 +694,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
     fallthrough:
       if (e->X_add_symbol)
 	{
-	  newsym = i386_intel_simplify_symbol (e->X_add_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_add_symbol);
 	  if (!newsym)
 	    return 0;
 	  e->X_add_symbol = newsym;
@@ -722,7 +711,7 @@ i386_intel_simplify (expressionS *e, bool in_equate)
 	return 0;
       if (e->X_op_symbol)
 	{
-	  newsym = i386_intel_simplify_symbol (e->X_op_symbol, in_equate);
+	  newsym = i386_intel_simplify_symbol (e->X_op_symbol);
 	  if (!newsym)
 	    return 0;
 	  e->X_op_symbol = newsym;
@@ -798,7 +787,7 @@ i386_intel_operand (char *operand_string, int got_a_float)
   expr_mode = expr_operator_none;
   memset (&exp, 0, sizeof(exp));
   exp_seg = expression (&exp);
-  ret = i386_intel_simplify (&exp, false);
+  ret = i386_intel_simplify (&exp);
   intel_syntax = 1;
 
   SKIP_WHITESPACE ();
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index d3287513a75..988b024f0b8 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -167,7 +167,7 @@ static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 				       const char *);
 static int i386_att_operand (char *);
 static int i386_intel_operand (char *, int);
-static int i386_intel_simplify (expressionS *, bool);
+static int i386_intel_simplify (expressionS *);
 static int i386_intel_parse_name (const char *, expressionS *, enum expr_mode);
 static const reg_entry *parse_register (const char *, char **);
 static const char *parse_insn (const char *, char *, enum parse_mode);
@@ -13527,7 +13527,7 @@ x86_cons (expressionS *exp, int size)
   intel_syntax = -intel_syntax;
 
   if (intel_syntax)
-    i386_intel_simplify (exp, false);
+    i386_intel_simplify (exp);
 
   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
   if (size <= 4 && expr_mode == expr_operator_present
  
Hans-Peter Nilsson May 20, 2026, 12:24 a.m. UTC | #4
On Wed, 20 May 2026, Alan Modra wrote:

> --- a/gas/config/tc-i386-intel.c

> +  if (memcmp (&exp, e, sizeof exp))
> +    sym = make_expr_symbol (&exp);

You almost got me there ("but why copy if it compares the 
same"). :)  May I suggest "if (memcmp (...) != 0)"?

brgds, H-P
  
Jan Beulich May 20, 2026, 6:50 a.m. UTC | #5
On 20.05.2026 01:44, Alan Modra wrote:
> I poked at this problem a little yesterday, and came up with the
> following patch that passes for all the testcases I threw at it.
> 
> ===
> Commit 22f8905d9f38 reintroduced pr30308.  This fix resolves equates
> safely in the presence of symbol loops by using symbol_equated_to.
> Symbols aren't prematurely modified by copying their value
> expressions before i386_intel_simplify does its work.  I figure that
> it is safer to do this for all symbols rather than just the particular
> case of equates, so the in_equate parameter can disappear.

This of course is a very nice (side-)effect of the change. I was worried
of copying too many symbols, but ...

> --- a/gas/config/tc-i386-intel.c
> +++ b/gas/config/tc-i386-intel.c
> @@ -381,49 +381,38 @@ i386_intel_simplify_register (expressionS *e)
>  }
>  
>  static symbolS *
> -i386_intel_simplify_symbol (symbolS *sym, bool in_equate)
> +i386_intel_simplify_symbol (symbolS *sym)
>  {
> -  if (symbol_resolving_p (sym))
> -    return sym;
> +  symbolS *orig = sym;
> +  offsetT off;
> +  sym = symbol_equated_to (sym, &off);
> +  if (sym == NULL || off != 0)
> +    return orig;
>  
>    segT seg = S_GET_SEGMENT (sym);
> -  if (seg != expr_section && seg != reg_section && !symbol_equated_p(sym))
> +  if (seg == undefined_section
> +      || (seg != expr_section && seg != reg_section && sym == orig))
>      return sym;
>  
> -  for (;;)
> -    {
> -      /* While we're after equates, symbol_equated_p() isn't suitable here.  */
> -      if (symbol_on_chain(sym, symbol_rootP, symbol_lastP))
> -	{
> -	  in_equate = true;
> -	  sym = symbol_clone (sym, 0);
> -	}
> -      else if (in_equate)
> -	{
> -	  expressionS *e = symbol_get_value_expression (sym);
> -
> -	  if (e->X_op == O_symbol && !e->X_add_number)
> -	    {
> -	      sym = e->X_add_symbol;
> -	      continue;
> -	    }
> -	  sym = make_expr_symbol (e);
> -	}
> -
> -      break;
> -    }
> -
> -  symbol_mark_resolving (sym);
> -  int ret = i386_intel_simplify (symbol_get_value_expression (sym), in_equate);
> -  if (ret == 2)
> -    S_SET_SEGMENT (sym, absolute_section);
> -  symbol_clear_resolving (sym);
> +  /* i386_intel_simplify modifies its arg.  We don't want to make
> +     premature changes to symbols here, particularly for a symbol
> +     equate.  Changing a symbol may affect future uses of that
> +     symbol.  So copy the symbol value and make a new symbol after
> +     i386_intel_simplify has done its work.  */
> +  expressionS *e = symbol_get_value_expression (sym);
> +  expressionS exp;
> +  memcpy (&exp, e, sizeof exp);
> +  int ret = i386_intel_simplify (&exp);
> +  if (ret == 0)
> +    return NULL;
>  
> -  return ret ? sym : NULL;
> +  if (memcmp (&exp, e, sizeof exp))
> +    sym = make_expr_symbol (&exp);

... this way you limit things as well. Let's go with your patch, and thank
you very much to looking into this.

One further remark lightly related to H-P's: I was first tempted to suggest
to replace the memcpy() by structure assignment. Yet then I realized that
this would be at risk of breaking the use of memcmp(). Perhaps the comment
could be extended to briefly mention this, to reduce the risk of someone
trying to replace the memcpy()?

Jan
  
Jan Beulich May 20, 2026, 6:52 a.m. UTC | #6
On 20.05.2026 01:44, Alan Modra wrote:
> I poked at this problem a little yesterday, and came up with the
> following patch that passes for all the testcases I threw at it.
> 
> ===
> Commit 22f8905d9f38 reintroduced pr30308.  This fix resolves equates
> safely in the presence of symbol loops by using symbol_equated_to.
> Symbols aren't prematurely modified by copying their value
> expressions before i386_intel_simplify does its work.  I figure that
> it is safer to do this for all symbols rather than just the particular
> case of equates, so the in_equate parameter can disappear.

Oh, one other thing: Would you mind either picking the testcase from my
patch and include it here, or extend that to your liking, or make an
entirely different one?

Jan
  
Alan Modra May 20, 2026, 9:22 a.m. UTC | #7
On Wed, May 20, 2026 at 08:50:39AM +0200, Jan Beulich wrote:
> On 20.05.2026 01:44, Alan Modra wrote:
> > I poked at this problem a little yesterday, and came up with the
> > following patch that passes for all the testcases I threw at it.
> > 
> > ===
> > Commit 22f8905d9f38 reintroduced pr30308.  This fix resolves equates
> > safely in the presence of symbol loops by using symbol_equated_to.
> > Symbols aren't prematurely modified by copying their value
> > expressions before i386_intel_simplify does its work.  I figure that
> > it is safer to do this for all symbols rather than just the particular
> > case of equates, so the in_equate parameter can disappear.
> 
> This of course is a very nice (side-)effect of the change. I was worried
> of copying too many symbols, but ...
> 
> > --- a/gas/config/tc-i386-intel.c
> > +++ b/gas/config/tc-i386-intel.c
> > @@ -381,49 +381,38 @@ i386_intel_simplify_register (expressionS *e)
> >  }
> >  
> >  static symbolS *
> > -i386_intel_simplify_symbol (symbolS *sym, bool in_equate)
> > +i386_intel_simplify_symbol (symbolS *sym)
> >  {
> > -  if (symbol_resolving_p (sym))
> > -    return sym;
> > +  symbolS *orig = sym;
> > +  offsetT off;
> > +  sym = symbol_equated_to (sym, &off);
> > +  if (sym == NULL || off != 0)
> > +    return orig;
> >  
> >    segT seg = S_GET_SEGMENT (sym);
> > -  if (seg != expr_section && seg != reg_section && !symbol_equated_p(sym))
> > +  if (seg == undefined_section
> > +      || (seg != expr_section && seg != reg_section && sym == orig))
> >      return sym;
> >  
> > -  for (;;)
> > -    {
> > -      /* While we're after equates, symbol_equated_p() isn't suitable here.  */
> > -      if (symbol_on_chain(sym, symbol_rootP, symbol_lastP))
> > -	{
> > -	  in_equate = true;
> > -	  sym = symbol_clone (sym, 0);
> > -	}
> > -      else if (in_equate)
> > -	{
> > -	  expressionS *e = symbol_get_value_expression (sym);
> > -
> > -	  if (e->X_op == O_symbol && !e->X_add_number)
> > -	    {
> > -	      sym = e->X_add_symbol;
> > -	      continue;
> > -	    }
> > -	  sym = make_expr_symbol (e);
> > -	}
> > -
> > -      break;
> > -    }
> > -
> > -  symbol_mark_resolving (sym);
> > -  int ret = i386_intel_simplify (symbol_get_value_expression (sym), in_equate);
> > -  if (ret == 2)
> > -    S_SET_SEGMENT (sym, absolute_section);
> > -  symbol_clear_resolving (sym);
> > +  /* i386_intel_simplify modifies its arg.  We don't want to make
> > +     premature changes to symbols here, particularly for a symbol
> > +     equate.  Changing a symbol may affect future uses of that
> > +     symbol.  So copy the symbol value and make a new symbol after
> > +     i386_intel_simplify has done its work.  */
> > +  expressionS *e = symbol_get_value_expression (sym);
> > +  expressionS exp;
> > +  memcpy (&exp, e, sizeof exp);
> > +  int ret = i386_intel_simplify (&exp);
> > +  if (ret == 0)
> > +    return NULL;
> >  
> > -  return ret ? sym : NULL;
> > +  if (memcmp (&exp, e, sizeof exp))
> > +    sym = make_expr_symbol (&exp);
> 
> ... this way you limit things as well. Let's go with your patch, and thank
> you very much to looking into this.
> 
> One further remark lightly related to H-P's: I was first tempted to suggest
> to replace the memcpy() by structure assignment.

I wrote it that way at first..

> Yet then I realized that
> this would be at risk of breaking the use of memcmp(). Perhaps the comment
> could be extended to briefly mention this, to reduce the risk of someone
> trying to replace the memcpy()?

Done.
  

Patch

--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -383,6 +383,8 @@  i386_intel_simplify_register (expression
 static symbolS *
 i386_intel_simplify_symbol (symbolS *sym, bool in_equate)
 {
+  symbolS *orig = NULL;
+
   if (symbol_resolving_p (sym))
     return sym;
 
@@ -396,7 +398,9 @@  i386_intel_simplify_symbol (symbolS *sym
       if (symbol_on_chain(sym, symbol_rootP, symbol_lastP))
 	{
 	  in_equate = true;
+	  orig = sym;
 	  sym = symbol_clone (sym, 0);
+	  symbol_mark_resolving (orig);
 	}
       else if (in_equate)
 	{
@@ -405,6 +409,11 @@  i386_intel_simplify_symbol (symbolS *sym
 	  if (e->X_op == O_symbol && !e->X_add_number)
 	    {
 	      sym = e->X_add_symbol;
+	      if (symbol_resolving_p (sym))
+		return sym;
+	      seg = S_GET_SEGMENT (sym);
+	      if (seg != expr_section && seg != reg_section)
+		return sym;
 	      continue;
 	    }
 	  sym = make_expr_symbol (e);
@@ -419,6 +428,9 @@  i386_intel_simplify_symbol (symbolS *sym
     S_SET_SEGMENT (sym, absolute_section);
   symbol_clear_resolving (sym);
 
+  if (orig)
+    symbol_clear_resolving (orig);
+
   return ret ? sym : NULL;
 }
 
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -63,6 +63,7 @@  if [gas_32_check] then {
     run_dump_test "opcode-suffix"
     run_dump_test "intel"
     run_dump_test "intel-intel"
+    run_list_test "intel-equ-loop"
     run_dump_test "intel16"
     run_list_test "intelbad" ""
     run_dump_test "intelok"
--- /dev/null
+++ b/gas/testsuite/gas/i386/intel-equ-loop.l
@@ -0,0 +1,8 @@ 
+.*: Assembler messages:
+.*: Error: symbol definition loop .*
+.*: Error: can't resolve .*
+.*: Error: symbol definition loop .*
+.*: Error: can't resolve .*
+.*: Error: symbol definition loop .*
+.*: Error: can't resolve .*
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/intel-equ-loop.s
@@ -0,0 +1,16 @@ 
+	.intel_syntax noprefix
+
+	a = a
+	mov eax, [a]
+
+	b = c
+	c = b
+	mov eax, [b]
+	mov eax, [c]
+
+	d = e
+	e = d
+	x = d
+	d = 1
+	e = 2
+	mov eax, [x]