[2/2] x86: permit non-immediate offset operands with direct far branches

Message ID 2728e87c-67d4-4007-a632-eed27d31336f@suse.com
State New
Headers
Series x86/gas: far direct branch plus fix_new_exp() simplification |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jan Beulich Oct. 14, 2024, 6:39 a.m. UTC
  While Intel syntax permits such already (as can be observed by there
not being a need to prefix the respective operand with "offset"), AT&T
syntax so far strictly insists on two immediate operands. Multiple
(successive) immediate operands are somewhat problematic anyway, as
it's never really clear what their order ought to be. While there's no
apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ, for
LCALL and LJMP we can aid programmers by permitting alternative forms,
with the offset operand being a "displacement" rather than an
"immediate". The order of the two operands the doesn't matter; they're
distinguished by type.

Mark the new templates AT&T-only; the original ones really should have
been so, too. For backwards compatibility reasons we can't really
correct that mistake ...
---
While the proper Intel syntax operand form is sel:offset, for some
reason we also support two (comma separated) operands. The ambiguity
there is being left alone, as the sel:offset form is enough to avoid it.
  

Comments

H.J. Lu Oct. 14, 2024, 6:50 a.m. UTC | #1
On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:

> While Intel syntax permits such already (as can be observed by there
> not being a need to prefix the respective operand with "offset"), AT&T
> syntax so far strictly insists on two immediate operands. Multiple
> (successive) immediate operands are somewhat problematic anyway, as
> it's never really clear what their order ought to be. While there's no
> apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ, for
> LCALL and LJMP we can aid programmers by permitting alternative forms,
> with the offset operand being a "displacement" rather than an
> "immediate". The order of the two operands the doesn't matter; they're
> distinguished by type.
>
> Mark the new templates AT&T-only; the original ones really should have
> been so, too. For backwards compatibility reasons we can't really
> correct that mistake ...
> ---
> While the proper Intel syntax operand form is sel:offset, for some
> reason we also support two (comma separated) operands. The ambiguity
> there is being left alone, as the sel:offset form is enough to avoid it.
>

I don't think this is a good idea.  $ is used to denote an immediate
operand in
AT&T syntax.  This change serves no practical purposes for programmers.


> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -7061,13 +7061,16 @@ i386_assemble (char *line)
>      swap_operands ();
>
>    /* The order of the immediates should be reversed for 2-immediates EXTRQ
> -     and INSERTQ instructions.  Also UWRMSR wants its immediate to be in
> the
> -     "canonical" place (first), despite it appearing last (in AT&T
> syntax, or
> -     because of the swapping above) in the incoming set of operands.  */
> +     and INSERTQ instructions.  Also direct far branches and UWRMSR want
> their
> +     immediate to be in the "canonical" place (first), despite it
> (possibly)
> +     appearing last (in AT&T syntax, or because of the swapping above) in
> the
> +     incoming set of operands.  */
>    if ((i.imm_operands == 2
>         && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
> -      || (t->mnem_off == MN_uwrmsr && i.imm_operands
> -         && i.operands > i.imm_operands))
> +      || (((t->opcode_modifier.jump == JUMP_INTERSEGMENT
> +           && !operand_type_check (i.types[0], imm))
> +          || t->mnem_off == MN_uwrmsr)
> +         && i.imm_operands && i.operands > i.imm_operands))
>        swap_2_operands (0, 1);
>
>    if (i.imm_operands)
> @@ -11621,6 +11624,7 @@ output_interseg_jump (void)
>    int size;
>    int prefix;
>    int code16;
> +  const expressionS *exp1;
>
>    code16 = 0;
>    if (flag_code == CODE_16BIT)
> @@ -11659,9 +11663,11 @@ output_interseg_jump (void)
>      *p++ = i.prefix[REX_PREFIX];
>
>    *p++ = i.tm.base_opcode;
> -  if (i.op[1].imms->X_op == O_constant)
> +  exp1 = operand_type_check (i.types[0], imm) ? i.op[1].imms
> +                                             : i.op[1].disps;
> +  if (exp1->X_op == O_constant)
>      {
> -      offsetT n = i.op[1].imms->X_add_number;
> +      offsetT n = exp1->X_add_number;
>
>        if (size == 2
>           && !fits_in_unsigned_word (n)
> @@ -11674,7 +11680,7 @@ output_interseg_jump (void)
>      }
>    else
>      fix_new_exp (frag_now, p - frag_now->fr_literal, size,
> -                i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
> +                exp1, 0, reloc (size, 0, 0, i.reloc[1]));
>
>    p += size;
>    if (i.op[0].imms->X_op == O_constant)
> --- a/gas/testsuite/gas/i386/jump.d
> +++ b/gas/testsuite/gas/i386/jump.d
> @@ -15,28 +15,32 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    66 ff 2c bd 00 00 00 00         ljmpw
> \*0x0\(,%edi,4\) 1c: (R_386_)?(dir)?32   xxx
>  [      ]*[a-f0-9]+:    ff 2d 00 00 00 00       ljmp   \*0x0    22:
> (R_386_)?(dir)?32   xxx
>  [      ]*[a-f0-9]+:    66 ff 2d 00 00 00 00    ljmpw  \*0x0    29:
> (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    ea 00 00 00 00 34 12    ljmp   \$0x1234,\$0x0
>  2e: (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    e8 c7 ff ff ff          call   (0x0|0 <.text>)
> -[      ]*[a-f0-9]+:    e8 ((fc|c2) ff ff ff|00 00 00 00)
>  call   (0x)?(0|3a|3e)( <.text(\+0x3e)?>)?       3a: (R_386_PC)?(DISP)?32
>       xxx
> -[      ]*[a-f0-9]+:    ff 15 00 00 00 00       call   \*0x0    40:
> (R_386_)?(dir)?32   xxx
> +[      ]*[a-f0-9]+:    ea 00 00 00 00 34 12    ljmp   \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    ea 00 00 00 00 34 12    ljmp   \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    ea 00 00 00 00 34 12    ljmp   \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    e8 b9 ff ff ff          call   (0x0|0 <.text>)
> +[      ]*[a-f0-9]+:    e8 ((fc|b4) ff ff ff|00 00 00 00)
>  call   (0x)?(0|48|4c)( <.text(\+0x4c)?>)?[      ]+[a-f0-9]+:
> (R_386_PC|DISP)32  xxx
> +[      ]*[a-f0-9]+:    ff 15 00 00 00 00       call   \*0x0[
>  ]+[a-f0-9]+: (R_386_|dir)?32    xxx
>  [      ]*[a-f0-9]+:    ff d7                   call   \*%edi
>  [      ]*[a-f0-9]+:    ff 17                   call   \*\(%edi\)
> -[      ]*[a-f0-9]+:    ff 1c bd 00 00 00 00    lcall  \*0x0\(,%edi,4\)
> 4b: (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    66 ff 1c bd 00 00 00 00         lcallw
> \*0x0\(,%edi,4\) 53: (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    ff 1d 00 00 00 00       lcall  \*0x0    59:
> (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    66 ff 1d 00 00 00 00    lcallw \*0x0    60:
> (R_386_)?(dir)?32   xxx
> -[      ]*[a-f0-9]+:    9a 00 00 00 00 34 12    lcall  \$0x1234,\$0x0
>  65: (R_386_)?(dir)?32   xxx
> +[      ]*[a-f0-9]+:    ff 1c bd 00 00 00 00    lcall  \*0x0\(,%edi,4\)[
>       ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    66 ff 1c bd 00 00 00 00         lcallw
> \*0x0\(,%edi,4\)[        ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    ff 1d 00 00 00 00       lcall  \*0x0[
>  ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    66 ff 1d 00 00 00 00    lcallw \*0x0[
>  ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    9a 00 00 00 00 34 12    lcall  \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    9a 00 00 00 00 34 12    lcall  \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
> +[      ]*[a-f0-9]+:    9a 00 00 00 00 34 12    lcall  \$0x1234,\$0x0[
> ]+[a-f0-9]+: (R_386_|dir)?32    xxx
>  [      ]*[a-f0-9]+:    66 ff 13                callw  \*\(%ebx\)
>  [      ]*[a-f0-9]+:    ff 13                   call   \*\(%ebx\)
>  [      ]*[a-f0-9]+:    ff 1b                   lcall  \*\(%ebx\)
>  [      ]*[a-f0-9]+:    66 ff 23                jmpw   \*\(%ebx\)
>  [      ]*[a-f0-9]+:    ff 23                   jmp    \*\(%ebx\)
>  [      ]*[a-f0-9]+:    ff 2b                   ljmp   \*\(%ebx\)
> -[      ]*[a-f0-9]+:    eb 00                   jmp    (0x)?7b(
> <.text(\+0x7b)?>)?
> +[      ]*[a-f0-9]+:    eb 00                   jmp    (0x)?97(
> <.text(\+0x97)?>)?
>  [      ]*[a-f0-9]+:    90                      nop
> -[      ]*[a-f0-9]+:    e3 00                   jecxz  (0x)?7e(
> <.text(\+0x7e)?>)?
> +[      ]*[a-f0-9]+:    e3 00                   jecxz  (0x)?9a(
> <.text(\+0x9a)?>)?
>  [      ]*[a-f0-9]+:    90                      nop
> -[      ]*[a-f0-9]+:    eb 00                   jmp    (0x)?81(
> <.text(\+0x81)?>)?
> +[      ]*[a-f0-9]+:    eb 00                   jmp    (0x)?9d(
> <.text(\+0x9d)?>)?
>  [      ]*[a-f0-9]+:    90                      nop
>  [      ]*[a-f0-9]+:    9a 90 90 90 90 90 90    lcall
> \$0x9090,\$0x90909090
>  [      ]*[a-f0-9]+:    9a 90 90 90 90 90 90    lcall
> \$0x9090,\$0x90909090
> --- a/gas/testsuite/gas/i386/jump.s
> +++ b/gas/testsuite/gas/i386/jump.s
> @@ -13,6 +13,8 @@
>         ljmp    *xxx
>         ljmpw   *xxx
>         ljmp    $0x1234,$xxx
> +       ljmp    $0x1234,xxx
> +       ljmp    xxx,$0x1234
>
>         call    1b
>         call    xxx
> @@ -24,6 +26,8 @@
>         lcall   *xxx
>         lcallw  *xxx
>         lcall   $0x1234,$xxx
> +       lcall   $0x1234,xxx
> +       lcall   xxx,$0x1234
>
>         .intel_syntax noprefix
>         call    word ptr [ebx]
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -494,6 +494,7 @@ call, 0x9a, No64, JumpInterSegment|Impli
>  call, 0xff/3, 0,
> Amd64|Modrm|JumpAbsolute|ImplicitStackOp|DefaultSize|NoSuf, {
> Dword|Fword|BaseIndex }
>  call, 0xff/3, x64, Intel64|Modrm|JumpAbsolute|ImplicitStackOp|NoSuf, {
> Dword|Fword|Tbyte|BaseIndex }
>  lcall, 0x9a, No64, JumpInterSegment|DefaultSize|No_bSuf|No_sSuf|No_qSuf,
> { Imm16, Imm16|Imm32 }
> +lcall, 0x9a, No64,
> JumpInterSegment|DefaultSize|No_bSuf|No_sSuf|No_qSuf|ATTSyntax, { Imm16,
> Disp16|Disp32 }
>  lcall, 0xff/3, 0,
> Amd64|Modrm|JumpAbsolute|DefaultSize|No_bSuf|No_sSuf|No_qSuf, {
> Unspecified|BaseIndex }
>  lcall, 0xff/3, x64, Intel64|Modrm|JumpAbsolute|No_bSuf|No_sSuf, {
> Unspecified|BaseIndex }
>
> @@ -507,6 +508,7 @@ jmp, 0xea, No64, JumpInterSegment|No_bSu
>  jmp, 0xff/5, 0, Amd64|Modrm|JumpAbsolute|NoSuf, { Dword|Fword|BaseIndex }
>  jmp, 0xff/5, x64, Intel64|Modrm|JumpAbsolute|NoSuf, {
> Dword|Fword|Tbyte|BaseIndex }
>  ljmp, 0xea, No64, JumpInterSegment|No_bSuf|No_sSuf|No_qSuf, { Imm16,
> Imm16|Imm32 }
> +ljmp, 0xea, No64, JumpInterSegment|No_bSuf|No_sSuf|No_qSuf|ATTSyntax, {
> Imm16, Disp16|Disp32 }
>  ljmp, 0xff/5, 0, Amd64|Modrm|JumpAbsolute|No_bSuf|No_sSuf|No_qSuf, {
> Unspecified|BaseIndex }
>  ljmp, 0xff/5, x64, Intel64|Modrm|JumpAbsolute|No_bSuf|No_sSuf, {
> Unspecified|BaseIndex }
>
>
>
>
  
Jan Beulich Oct. 14, 2024, 7 a.m. UTC | #2
On 14.10.2024 08:50, H.J. Lu wrote:
> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> While Intel syntax permits such already (as can be observed by there
>> not being a need to prefix the respective operand with "offset"), AT&T
>> syntax so far strictly insists on two immediate operands. Multiple
>> (successive) immediate operands are somewhat problematic anyway, as
>> it's never really clear what their order ought to be. While there's no
>> apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ, for
>> LCALL and LJMP we can aid programmers by permitting alternative forms,
>> with the offset operand being a "displacement" rather than an
>> "immediate". The order of the two operands the doesn't matter; they're
>> distinguished by type.
>>
>> Mark the new templates AT&T-only; the original ones really should have
>> been so, too. For backwards compatibility reasons we can't really
>> correct that mistake ...
>> ---
>> While the proper Intel syntax operand form is sel:offset, for some
>> reason we also support two (comma separated) operands. The ambiguity
>> there is being left alone, as the sel:offset form is enough to avoid it.
>>
> 
> I don't think this is a good idea.  $ is used to denote an immediate
> operand in
> AT&T syntax.

Yet as with JMPABS the question is whether this really is an immediate.
Just take Lili's most recent comment there: It can be looked at as a
displacement/offset relative to the specified segment/selector. IOW like
there I think two perspectives are possible, and - as expressed by the
post-commit-message remark - they're already both supported in Intel
syntax.

>  This change serves no practical purposes for programmers.

I'm (once again) puzzled by such statements of yours: How do you know?

The main question I'd like to see answered is: Is there any (realistic)
risk of causing harm by permitting the alternative forms?

Jan
  
H.J. Lu Oct. 14, 2024, 7:25 a.m. UTC | #3
On Mon, Oct 14, 2024, 3:00 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 14.10.2024 08:50, H.J. Lu wrote:
> > On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
> >
> >> While Intel syntax permits such already (as can be observed by there
> >> not being a need to prefix the respective operand with "offset"), AT&T
> >> syntax so far strictly insists on two immediate operands. Multiple
> >> (successive) immediate operands are somewhat problematic anyway, as
> >> it's never really clear what their order ought to be. While there's no
> >> apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ, for
> >> LCALL and LJMP we can aid programmers by permitting alternative forms,
> >> with the offset operand being a "displacement" rather than an
> >> "immediate". The order of the two operands the doesn't matter; they're
> >> distinguished by type.
> >>
> >> Mark the new templates AT&T-only; the original ones really should have
> >> been so, too. For backwards compatibility reasons we can't really
> >> correct that mistake ...
> >> ---
> >> While the proper Intel syntax operand form is sel:offset, for some
> >> reason we also support two (comma separated) operands. The ambiguity
> >> there is being left alone, as the sel:offset form is enough to avoid it.
> >>
> >
> > I don't think this is a good idea.  $ is used to denote an immediate
> > operand in
> > AT&T syntax.
>
> Yet as with JMPABS the question is whether this really is an immediate.
>

I believe this is encoded as an immediate operand.

Just take Lili's most recent comment there: It can be looked at as a
> displacement/offset relative to the specified segment/selector. IOW like
> there I think two perspectives are possible, and - as expressed by the
> post-commit-message remark - they're already both supported in Intel
> syntax.
>
> >  This change serves no practical purposes for programmers.
>
> I'm (once again) puzzled by such statements of yours: How do you know?
>

Programmers write codes which work for all assemblers.
The current assemblers will be in use for a very long time.
This new syntax means that 0x1000 can mean an address
or an immediate, depending on instructions.
Very few codes will use it.



> The main question I'd like to see answered is: Is there any (realistic)
> risk of causing harm by permitting the alternative forms?
>
> Jan
>
>
  
Jan Beulich Oct. 14, 2024, 7:59 a.m. UTC | #4
On 14.10.2024 09:25, H.J. Lu wrote:
> On Mon, Oct 14, 2024, 3:00 PM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 14.10.2024 08:50, H.J. Lu wrote:
>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>>> While Intel syntax permits such already (as can be observed by there
>>>> not being a need to prefix the respective operand with "offset"), AT&T
>>>> syntax so far strictly insists on two immediate operands. Multiple
>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>> it's never really clear what their order ought to be. While there's no
>>>> apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ, for
>>>> LCALL and LJMP we can aid programmers by permitting alternative forms,
>>>> with the offset operand being a "displacement" rather than an
>>>> "immediate". The order of the two operands the doesn't matter; they're
>>>> distinguished by type.
>>>>
>>>> Mark the new templates AT&T-only; the original ones really should have
>>>> been so, too. For backwards compatibility reasons we can't really
>>>> correct that mistake ...
>>>> ---
>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>> reason we also support two (comma separated) operands. The ambiguity
>>>> there is being left alone, as the sel:offset form is enough to avoid it.
>>>>
>>>
>>> I don't think this is a good idea.  $ is used to denote an immediate
>>> operand in
>>> AT&T syntax.
>>
>> Yet as with JMPABS the question is whether this really is an immediate.
> 
> I believe this is encoded as an immediate operand.

We've been there in the JMPABS discussion already: Encoding and how to
treat things internally to the assembler as well as how to represent
things in source code are, to a fair degree, separate aspects.

> Just take Lili's most recent comment there: It can be looked at as a
>> displacement/offset relative to the specified segment/selector. IOW like
>> there I think two perspectives are possible, and - as expressed by the
>> post-commit-message remark - they're already both supported in Intel
>> syntax.
>>
>>>  This change serves no practical purposes for programmers.
>>
>> I'm (once again) puzzled by such statements of yours: How do you know?
> 
> Programmers write codes which work for all assemblers.
> The current assemblers will be in use for a very long time.
> This new syntax means that 0x1000 can mean an address
> or an immediate, depending on instructions.

Why that? 0x1000 (in AT&T syntax) always means an address; $0x1000
always means an immediate. My point is that for JMPABS, LCALL, and LJMP
the distinction between immediate and address is blurred. Personally I
would even go as far as saying that using immediates for the offsets
there has always been a mistake, as these are (taking your wording
above) addresses. The representation as immediates in source code was
presumably derived from this being immediates in the encoding, yet that
model wasn't consistently followed anyway - as previously expressed
(again in the JMPABS discussion) near direct CALL/JMP have immediate
operands too in their encodings, yet they're not expressed as such in
source (and validly so, imo).

> Very few codes will use it.

Which is fine for now, and may change going forward. You didn't
comment on my reasoning for making the change at all, which leaves me
with the impression that you apparently can't argue against that, yet
you're trying to make up (unrelated) reasons to argue against the
change as such (which, I'm sorry to say that, again feel like largely
subjective).

Jan
  
Cui, Lili Oct. 14, 2024, 8:13 a.m. UTC | #5
> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands with direct
> far branches
> 
> On 14.10.2024 08:50, H.J. Lu wrote:
> > On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
> >
> >> While Intel syntax permits such already (as can be observed by there
> >> not being a need to prefix the respective operand with "offset"),
> >> AT&T syntax so far strictly insists on two immediate operands.
> >> Multiple
> >> (successive) immediate operands are somewhat problematic anyway, as
> >> it's never really clear what their order ought to be. While there's
> >> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
> >> for LCALL and LJMP we can aid programmers by permitting alternative
> >> forms, with the offset operand being a "displacement" rather than an
> >> "immediate". The order of the two operands the doesn't matter;
> >> they're distinguished by type.
> >>
> >> Mark the new templates AT&T-only; the original ones really should
> >> have been so, too. For backwards compatibility reasons we can't
> >> really correct that mistake ...
> >> ---
> >> While the proper Intel syntax operand form is sel:offset, for some
> >> reason we also support two (comma separated) operands. The ambiguity
> >> there is being left alone, as the sel:offset form is enough to avoid it.
> >>
> >
> > I don't think this is a good idea.  $ is used to denote an immediate
> > operand in AT&T syntax.
> 
> Yet as with JMPABS the question is whether this really is an immediate.
> Just take Lili's most recent comment there: It can be looked at as a
> displacement/offset relative to the specified segment/selector. IOW like there I
> think two perspectives are possible, and - as expressed by the post-commit-
> message remark - they're already both supported in Intel syntax.
> 

I am on the same page as Hulin and H.J. I insist that offset uses "foo" and immediate uses "$foo", even in the special case where the base is 0, conceptually it is still an offset (mov 0x12345678, %eax).  The current implementation and documentation are clear and reasonable. I believe you already know the point I want to make, so I won’t reply again.

Lili.

> >  This change serves no practical purposes for programmers.
> 
> I'm (once again) puzzled by such statements of yours: How do you know?
> 
> The main question I'd like to see answered is: Is there any (realistic) risk of
> causing harm by permitting the alternative forms?
> 
> Jan
  
Jan Beulich Oct. 14, 2024, 10:32 a.m. UTC | #6
On 14.10.2024 10:13, Cui, Lili wrote:
>> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands with direct
>> far branches
>>
>> On 14.10.2024 08:50, H.J. Lu wrote:
>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>>> While Intel syntax permits such already (as can be observed by there
>>>> not being a need to prefix the respective operand with "offset"),
>>>> AT&T syntax so far strictly insists on two immediate operands.
>>>> Multiple
>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>> it's never really clear what their order ought to be. While there's
>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
>>>> for LCALL and LJMP we can aid programmers by permitting alternative
>>>> forms, with the offset operand being a "displacement" rather than an
>>>> "immediate". The order of the two operands the doesn't matter;
>>>> they're distinguished by type.
>>>>
>>>> Mark the new templates AT&T-only; the original ones really should
>>>> have been so, too. For backwards compatibility reasons we can't
>>>> really correct that mistake ...
>>>> ---
>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>> reason we also support two (comma separated) operands. The ambiguity
>>>> there is being left alone, as the sel:offset form is enough to avoid it.
>>>>
>>>
>>> I don't think this is a good idea.  $ is used to denote an immediate
>>> operand in AT&T syntax.
>>
>> Yet as with JMPABS the question is whether this really is an immediate.
>> Just take Lili's most recent comment there: It can be looked at as a
>> displacement/offset relative to the specified segment/selector. IOW like there I
>> think two perspectives are possible, and - as expressed by the post-commit-
>> message remark - they're already both supported in Intel syntax.
>>
> 
> I am on the same page as Hulin and H.J. I insist that offset uses "foo" and immediate
> uses "$foo",

And we agree here. I can only repeat that the issue here is that it is
depending on personal perspective whether one considers operands to
effectively any form of direct branch to be an "offset" or to be an
"immediate".

> even in the special case where the base is 0, conceptually it is still an offset (mov 0x12345678, %eax).  The current implementation and documentation are clear and reasonable. I believe you already know the point I want to make, so I won’t reply again.

Right, but are you also at least trying to understand the point(s) I've
been trying to make for quite some time now? Why do we need to prescribe
to people how to write their code, when offering liberty has no negative
consequences?

Jan
  
H.J. Lu Oct. 14, 2024, 10:49 a.m. UTC | #7
On Mon, Oct 14, 2024, 6:32 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 14.10.2024 10:13, Cui, Lili wrote:
> >> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands with
> direct
> >> far branches
> >>
> >> On 14.10.2024 08:50, H.J. Lu wrote:
> >>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>
> >>>> While Intel syntax permits such already (as can be observed by there
> >>>> not being a need to prefix the respective operand with "offset"),
> >>>> AT&T syntax so far strictly insists on two immediate operands.
> >>>> Multiple
> >>>> (successive) immediate operands are somewhat problematic anyway, as
> >>>> it's never really clear what their order ought to be. While there's
> >>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
> >>>> for LCALL and LJMP we can aid programmers by permitting alternative
> >>>> forms, with the offset operand being a "displacement" rather than an
> >>>> "immediate". The order of the two operands the doesn't matter;
> >>>> they're distinguished by type.
> >>>>
> >>>> Mark the new templates AT&T-only; the original ones really should
> >>>> have been so, too. For backwards compatibility reasons we can't
> >>>> really correct that mistake ...
> >>>> ---
> >>>> While the proper Intel syntax operand form is sel:offset, for some
> >>>> reason we also support two (comma separated) operands. The ambiguity
> >>>> there is being left alone, as the sel:offset form is enough to avoid
> it.
> >>>>
> >>>
> >>> I don't think this is a good idea.  $ is used to denote an immediate
> >>> operand in AT&T syntax.
> >>
> >> Yet as with JMPABS the question is whether this really is an immediate.
> >> Just take Lili's most recent comment there: It can be looked at as a
> >> displacement/offset relative to the specified segment/selector. IOW
> like there I
> >> think two perspectives are possible, and - as expressed by the
> post-commit-
> >> message remark - they're already both supported in Intel syntax.
> >>
> >
> > I am on the same page as Hulin and H.J. I insist that offset uses "foo"
> and immediate
> > uses "$foo",
>
> And we agree here. I can only repeat that the issue here is that it is
> depending on personal perspective whether one considers operands to
> effectively any form of direct branch to be an "offset" or to be an
> "immediate".
>
> > even in the special case where the base is 0, conceptually it is still
> an offset (mov 0x12345678, %eax).  The current implementation and
> documentation are clear and reasonable. I believe you already know the
> point I want to make, so I won’t reply again.
>
> Right, but are you also at least trying to understand the point(s) I've
> been trying to make for quite some time now? Why do we need to prescribe
> to people how to write their code, when offering liberty has no negative
> consequences?
>

Unnecessary syntax without real benefits is a negative consequence.


> Jan
>
>
  
Jan Beulich Oct. 14, 2024, 11:30 a.m. UTC | #8
On 14.10.2024 12:49, H.J. Lu wrote:
> On Mon, Oct 14, 2024, 6:32 PM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 14.10.2024 10:13, Cui, Lili wrote:
>>>> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands with
>> direct
>>>> far branches
>>>>
>>>> On 14.10.2024 08:50, H.J. Lu wrote:
>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>
>>>>>> While Intel syntax permits such already (as can be observed by there
>>>>>> not being a need to prefix the respective operand with "offset"),
>>>>>> AT&T syntax so far strictly insists on two immediate operands.
>>>>>> Multiple
>>>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>>>> it's never really clear what their order ought to be. While there's
>>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
>>>>>> for LCALL and LJMP we can aid programmers by permitting alternative
>>>>>> forms, with the offset operand being a "displacement" rather than an
>>>>>> "immediate". The order of the two operands the doesn't matter;
>>>>>> they're distinguished by type.
>>>>>>
>>>>>> Mark the new templates AT&T-only; the original ones really should
>>>>>> have been so, too. For backwards compatibility reasons we can't
>>>>>> really correct that mistake ...
>>>>>> ---
>>>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>>>> reason we also support two (comma separated) operands. The ambiguity
>>>>>> there is being left alone, as the sel:offset form is enough to avoid
>> it.
>>>>>>
>>>>>
>>>>> I don't think this is a good idea.  $ is used to denote an immediate
>>>>> operand in AT&T syntax.
>>>>
>>>> Yet as with JMPABS the question is whether this really is an immediate.
>>>> Just take Lili's most recent comment there: It can be looked at as a
>>>> displacement/offset relative to the specified segment/selector. IOW
>> like there I
>>>> think two perspectives are possible, and - as expressed by the
>> post-commit-
>>>> message remark - they're already both supported in Intel syntax.
>>>>
>>>
>>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
>> and immediate
>>> uses "$foo",
>>
>> And we agree here. I can only repeat that the issue here is that it is
>> depending on personal perspective whether one considers operands to
>> effectively any form of direct branch to be an "offset" or to be an
>> "immediate".
>>
>>> even in the special case where the base is 0, conceptually it is still
>> an offset (mov 0x12345678, %eax).  The current implementation and
>> documentation are clear and reasonable. I believe you already know the
>> point I want to make, so I won’t reply again.
>>
>> Right, but are you also at least trying to understand the point(s) I've
>> been trying to make for quite some time now? Why do we need to prescribe
>> to people how to write their code, when offering liberty has no negative
>> consequences?
> 
> Unnecessary syntax without real benefits is a negative consequence.

So helping engineers, by disentangling the offset vs immediate situation
that AT&T mode effectively had forever, isn't a "real benefit" in your eyes?
Looks like a benefit can only ever be something _you_ consider beneficial.
Again - may I ask that you please try to be objective, not subjective?

Jan
  
H.J. Lu Oct. 15, 2024, 7:53 p.m. UTC | #9
On Mon, Oct 14, 2024, 7:30 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 14.10.2024 12:49, H.J. Lu wrote:
> > On Mon, Oct 14, 2024, 6:32 PM Jan Beulich <jbeulich@suse.com> wrote:
> >
> >> On 14.10.2024 10:13, Cui, Lili wrote:
> >>>> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands
> with
> >> direct
> >>>> far branches
> >>>>
> >>>> On 14.10.2024 08:50, H.J. Lu wrote:
> >>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>
> >>>>>> While Intel syntax permits such already (as can be observed by there
> >>>>>> not being a need to prefix the respective operand with "offset"),
> >>>>>> AT&T syntax so far strictly insists on two immediate operands.
> >>>>>> Multiple
> >>>>>> (successive) immediate operands are somewhat problematic anyway, as
> >>>>>> it's never really clear what their order ought to be. While there's
> >>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
> >>>>>> for LCALL and LJMP we can aid programmers by permitting alternative
> >>>>>> forms, with the offset operand being a "displacement" rather than an
> >>>>>> "immediate". The order of the two operands the doesn't matter;
> >>>>>> they're distinguished by type.
> >>>>>>
> >>>>>> Mark the new templates AT&T-only; the original ones really should
> >>>>>> have been so, too. For backwards compatibility reasons we can't
> >>>>>> really correct that mistake ...
> >>>>>> ---
> >>>>>> While the proper Intel syntax operand form is sel:offset, for some
> >>>>>> reason we also support two (comma separated) operands. The ambiguity
> >>>>>> there is being left alone, as the sel:offset form is enough to avoid
> >> it.
> >>>>>>
> >>>>>
> >>>>> I don't think this is a good idea.  $ is used to denote an immediate
> >>>>> operand in AT&T syntax.
> >>>>
> >>>> Yet as with JMPABS the question is whether this really is an
> immediate.
> >>>> Just take Lili's most recent comment there: It can be looked at as a
> >>>> displacement/offset relative to the specified segment/selector. IOW
> >> like there I
> >>>> think two perspectives are possible, and - as expressed by the
> >> post-commit-
> >>>> message remark - they're already both supported in Intel syntax.
> >>>>
> >>>
> >>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
> >> and immediate
> >>> uses "$foo",
> >>
> >> And we agree here. I can only repeat that the issue here is that it is
> >> depending on personal perspective whether one considers operands to
> >> effectively any form of direct branch to be an "offset" or to be an
> >> "immediate".
> >>
> >>> even in the special case where the base is 0, conceptually it is still
> >> an offset (mov 0x12345678, %eax).  The current implementation and
> >> documentation are clear and reasonable. I believe you already know the
> >> point I want to make, so I won’t reply again.
> >>
> >> Right, but are you also at least trying to understand the point(s) I've
> >> been trying to make for quite some time now? Why do we need to prescribe
> >> to people how to write their code, when offering liberty has no negative
> >> consequences?
> >
> > Unnecessary syntax without real benefits is a negative consequence.
>
> So helping engineers, by disentangling the offset vs immediate situation
> that AT&T mode effectively had forever, isn't a "real benefit" in your
> eyes?
>

Real benefit is to provide a CPU feature which can't be
done by the current assembler.
Your new syntax may confuse programmers.
People may use it without realizing it doesn't work
with older versions
of assembler or
other assemblers.

Looks like a benefit can only ever be something _you_ consider beneficial.
> Again - may I ask that you please try to be objective, not subjective?
>
> Jan
>
>
  
Cui, Lili Oct. 16, 2024, 1:55 a.m. UTC | #10
>>>>>> On 14.10.2024 08:50, H.J. Lu wrote:
>>>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <mailto:jbeulich@suse.com> wrote:
>>>>>>>
>>>>>>>> While Intel syntax permits such already (as can be observed by there
>>>>>>>> not being a need to prefix the respective operand with "offset"),
>>>>>>>> AT&T syntax so far strictly insists on two immediate operands.
>>>>>>>> Multiple
>>>>>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>>>>>> it's never really clear what their order ought to be. While there's
>>>>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
>>>>>>>> for LCALL and LJMP we can aid programmers by permitting alternative
>>>>>>>> forms, with the offset operand being a "displacement" rather than an
>>>>>>>> "immediate". The order of the two operands the doesn't matter;
>>>>>>>> they're distinguished by type.
>>>>>>>>
>>>>>>>> Mark the new templates AT&T-only; the original ones really should
>>>>>>>> have been so, too. For backwards compatibility reasons we can't
>>>>>>>> really correct that mistake ...
>>>>>>>> ---
>>>>>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>>>>>> reason we also support two (comma separated) operands. The ambiguity
>>>>>>>> there is being left alone, as the sel:offset form is enough to avoid
>>>> it.
>>>>>>>>
>>>>>>>
>>>>>>> I don't think this is a good idea.  $ is used to denote an immediate
>>>>>>> operand in AT&T syntax.
>>>>>>
>>>>>> Yet as with JMPABS the question is whether this really is an immediate.
>>>>>> Just take Lili's most recent comment there: It can be looked at as a
>>>>>> displacement/offset relative to the specified segment/selector. IOW
>>>> like there I
>>>>>> think two perspectives are possible, and - as expressed by the
>>>> post-commit-
>>>>>> message remark - they're already both supported in Intel syntax.
>>>>>>
>>>>>
>>>>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
>>>> and immediate
>>>>> uses "$foo",
>>>>
>>>> And we agree here. I can only repeat that the issue here is that it is
>>>> depending on personal perspective whether one considers operands to
>>>> effectively any form of direct branch to be an "offset" or to be an
>>>> "immediate".
>>>>
>>>>> even in the special case where the base is 0, conceptually it is still
>>>> an offset (mov 0x12345678, %eax).  The current implementation and
>>>> documentation are clear and reasonable. I believe you already know the
>>>> point I want to make, so I won’t reply again.
>>>>
>>>> Right, but are you also at least trying to understand the point(s) I've
>>>> been trying to make for quite some time now? Why do we need to prescribe
>>>> to people how to write their code, when offering liberty has no negative
>>>> consequences?
>>> 
>>> Unnecessary syntax without real benefits is a negative consequence.
>>
>>So helping engineers, by disentangling the offset vs immediate situation
>>that AT&T mode effectively had forever, isn't a "real benefit" in your eyes?
>>
>Real benefit is to provide a CPU feature which can't be done by the current assembler.
>Your new syntax may confuse programmers.
>People may use it without realizing it doesn't work
>with older versions of assembler or other assemblers.

Yes, and we should follow the spec:

"JMPABS is a 64-bit only ISA extension, and acts as a near-direct branch with an absolute target.
The 64-bit immediate operand is treated an as absolute effective address, which is subject to canonicality
checks.

 tempRIP = <target64 from instruction>;
 IF tempRIP is not canonical:
 THEN #GP(0);
 ELSE
 RIP = tempRIP;"

Lili.
  
Jan Beulich Oct. 16, 2024, 7:05 a.m. UTC | #11
On 15.10.2024 21:53, H.J. Lu wrote:
> On Mon, Oct 14, 2024, 7:30 PM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 14.10.2024 12:49, H.J. Lu wrote:
>>> On Mon, Oct 14, 2024, 6:32 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>>> On 14.10.2024 10:13, Cui, Lili wrote:
>>>>>> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands
>> with
>>>> direct
>>>>>> far branches
>>>>>>
>>>>>> On 14.10.2024 08:50, H.J. Lu wrote:
>>>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>
>>>>>>>> While Intel syntax permits such already (as can be observed by there
>>>>>>>> not being a need to prefix the respective operand with "offset"),
>>>>>>>> AT&T syntax so far strictly insists on two immediate operands.
>>>>>>>> Multiple
>>>>>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>>>>>> it's never really clear what their order ought to be. While there's
>>>>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
>>>>>>>> for LCALL and LJMP we can aid programmers by permitting alternative
>>>>>>>> forms, with the offset operand being a "displacement" rather than an
>>>>>>>> "immediate". The order of the two operands the doesn't matter;
>>>>>>>> they're distinguished by type.
>>>>>>>>
>>>>>>>> Mark the new templates AT&T-only; the original ones really should
>>>>>>>> have been so, too. For backwards compatibility reasons we can't
>>>>>>>> really correct that mistake ...
>>>>>>>> ---
>>>>>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>>>>>> reason we also support two (comma separated) operands. The ambiguity
>>>>>>>> there is being left alone, as the sel:offset form is enough to avoid
>>>> it.
>>>>>>>>
>>>>>>>
>>>>>>> I don't think this is a good idea.  $ is used to denote an immediate
>>>>>>> operand in AT&T syntax.
>>>>>>
>>>>>> Yet as with JMPABS the question is whether this really is an
>> immediate.
>>>>>> Just take Lili's most recent comment there: It can be looked at as a
>>>>>> displacement/offset relative to the specified segment/selector. IOW
>>>> like there I
>>>>>> think two perspectives are possible, and - as expressed by the
>>>> post-commit-
>>>>>> message remark - they're already both supported in Intel syntax.
>>>>>>
>>>>>
>>>>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
>>>> and immediate
>>>>> uses "$foo",
>>>>
>>>> And we agree here. I can only repeat that the issue here is that it is
>>>> depending on personal perspective whether one considers operands to
>>>> effectively any form of direct branch to be an "offset" or to be an
>>>> "immediate".
>>>>
>>>>> even in the special case where the base is 0, conceptually it is still
>>>> an offset (mov 0x12345678, %eax).  The current implementation and
>>>> documentation are clear and reasonable. I believe you already know the
>>>> point I want to make, so I won’t reply again.
>>>>
>>>> Right, but are you also at least trying to understand the point(s) I've
>>>> been trying to make for quite some time now? Why do we need to prescribe
>>>> to people how to write their code, when offering liberty has no negative
>>>> consequences?
>>>
>>> Unnecessary syntax without real benefits is a negative consequence.
>>
>> So helping engineers, by disentangling the offset vs immediate situation
>> that AT&T mode effectively had forever, isn't a "real benefit" in your
>> eyes?
> 
> Real benefit is to provide a CPU feature which can't be
> done by the current assembler.
> Your new syntax may confuse programmers.
> People may use it without realizing it doesn't work
> with older versions
> of assembler or
> other assemblers.

That's true for many other enhancements we've been making, including any new
ISA extension support is being added for.

Jan
  
Jan Beulich Oct. 16, 2024, 7:11 a.m. UTC | #12
On 16.10.2024 03:55, Cui, Lili wrote:
>>>>>>> On 14.10.2024 08:50, H.J. Lu wrote:
>>>>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <mailto:jbeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>>> While Intel syntax permits such already (as can be observed by there
>>>>>>>>> not being a need to prefix the respective operand with "offset"),
>>>>>>>>> AT&T syntax so far strictly insists on two immediate operands.
>>>>>>>>> Multiple
>>>>>>>>> (successive) immediate operands are somewhat problematic anyway, as
>>>>>>>>> it's never really clear what their order ought to be. While there's
>>>>>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and INSERTQ,
>>>>>>>>> for LCALL and LJMP we can aid programmers by permitting alternative
>>>>>>>>> forms, with the offset operand being a "displacement" rather than an
>>>>>>>>> "immediate". The order of the two operands the doesn't matter;
>>>>>>>>> they're distinguished by type.
>>>>>>>>>
>>>>>>>>> Mark the new templates AT&T-only; the original ones really should
>>>>>>>>> have been so, too. For backwards compatibility reasons we can't
>>>>>>>>> really correct that mistake ...
>>>>>>>>> ---
>>>>>>>>> While the proper Intel syntax operand form is sel:offset, for some
>>>>>>>>> reason we also support two (comma separated) operands. The ambiguity
>>>>>>>>> there is being left alone, as the sel:offset form is enough to avoid
>>>>> it.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I don't think this is a good idea.  $ is used to denote an immediate
>>>>>>>> operand in AT&T syntax.
>>>>>>>
>>>>>>> Yet as with JMPABS the question is whether this really is an immediate.
>>>>>>> Just take Lili's most recent comment there: It can be looked at as a
>>>>>>> displacement/offset relative to the specified segment/selector. IOW
>>>>> like there I
>>>>>>> think two perspectives are possible, and - as expressed by the
>>>>> post-commit-
>>>>>>> message remark - they're already both supported in Intel syntax.
>>>>>>>
>>>>>>
>>>>>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
>>>>> and immediate
>>>>>> uses "$foo",
>>>>>
>>>>> And we agree here. I can only repeat that the issue here is that it is
>>>>> depending on personal perspective whether one considers operands to
>>>>> effectively any form of direct branch to be an "offset" or to be an
>>>>> "immediate".
>>>>>
>>>>>> even in the special case where the base is 0, conceptually it is still
>>>>> an offset (mov 0x12345678, %eax).  The current implementation and
>>>>> documentation are clear and reasonable. I believe you already know the
>>>>> point I want to make, so I won’t reply again.
>>>>>
>>>>> Right, but are you also at least trying to understand the point(s) I've
>>>>> been trying to make for quite some time now? Why do we need to prescribe
>>>>> to people how to write their code, when offering liberty has no negative
>>>>> consequences?
>>>>
>>>> Unnecessary syntax without real benefits is a negative consequence.
>>>
>>> So helping engineers, by disentangling the offset vs immediate situation
>>> that AT&T mode effectively had forever, isn't a "real benefit" in your eyes?
>>>
>> Real benefit is to provide a CPU feature which can't be done by the current assembler.
>> Your new syntax may confuse programmers.
>> People may use it without realizing it doesn't work
>> with older versions of assembler or other assemblers.
> 
> Yes, and we should follow the spec:
> 
> "JMPABS is a 64-bit only ISA extension, and acts as a near-direct branch with an absolute target.
> The 64-bit immediate operand is treated an as absolute effective address, which is subject to canonicality
> checks.

This use of "immediate" in the spec was brought before. I can only repeat that
an almost identical use of "immediate" exists on the SDM's CALL insn page (just
to give an example): "The operand can be an immediate value, a general-purpose
register, or a memory location." Yet we treat direct CALLs as having
displacement operands. Just to also repeat (iirc not even the 1st time): How
insn encoding is described doesn't always match what operands the insn takes in
assembly sources.

Jan
  
H.J. Lu Oct. 16, 2024, 10:02 a.m. UTC | #13
On Wed, Oct 16, 2024, 3:05 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 15.10.2024 21:53, H.J. Lu wrote:
> > On Mon, Oct 14, 2024, 7:30 PM Jan Beulich <jbeulich@suse.com> wrote:
> >
> >> On 14.10.2024 12:49, H.J. Lu wrote:
> >>> On Mon, Oct 14, 2024, 6:32 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>
> >>>> On 14.10.2024 10:13, Cui, Lili wrote:
> >>>>>> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands
> >> with
> >>>> direct
> >>>>>> far branches
> >>>>>>
> >>>>>> On 14.10.2024 08:50, H.J. Lu wrote:
> >>>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich <jbeulich@suse.com>
> wrote:
> >>>>>>>
> >>>>>>>> While Intel syntax permits such already (as can be observed by
> there
> >>>>>>>> not being a need to prefix the respective operand with "offset"),
> >>>>>>>> AT&T syntax so far strictly insists on two immediate operands.
> >>>>>>>> Multiple
> >>>>>>>> (successive) immediate operands are somewhat problematic anyway,
> as
> >>>>>>>> it's never really clear what their order ought to be. While
> there's
> >>>>>>>> no apparent way of dealing with this for ENTER, EXTRQ, and
> INSERTQ,
> >>>>>>>> for LCALL and LJMP we can aid programmers by permitting
> alternative
> >>>>>>>> forms, with the offset operand being a "displacement" rather than
> an
> >>>>>>>> "immediate". The order of the two operands the doesn't matter;
> >>>>>>>> they're distinguished by type.
> >>>>>>>>
> >>>>>>>> Mark the new templates AT&T-only; the original ones really should
> >>>>>>>> have been so, too. For backwards compatibility reasons we can't
> >>>>>>>> really correct that mistake ...
> >>>>>>>> ---
> >>>>>>>> While the proper Intel syntax operand form is sel:offset, for some
> >>>>>>>> reason we also support two (comma separated) operands. The
> ambiguity
> >>>>>>>> there is being left alone, as the sel:offset form is enough to
> avoid
> >>>> it.
> >>>>>>>>
> >>>>>>>
> >>>>>>> I don't think this is a good idea.  $ is used to denote an
> immediate
> >>>>>>> operand in AT&T syntax.
> >>>>>>
> >>>>>> Yet as with JMPABS the question is whether this really is an
> >> immediate.
> >>>>>> Just take Lili's most recent comment there: It can be looked at as a
> >>>>>> displacement/offset relative to the specified segment/selector. IOW
> >>>> like there I
> >>>>>> think two perspectives are possible, and - as expressed by the
> >>>> post-commit-
> >>>>>> message remark - they're already both supported in Intel syntax.
> >>>>>>
> >>>>>
> >>>>> I am on the same page as Hulin and H.J. I insist that offset uses
> "foo"
> >>>> and immediate
> >>>>> uses "$foo",
> >>>>
> >>>> And we agree here. I can only repeat that the issue here is that it is
> >>>> depending on personal perspective whether one considers operands to
> >>>> effectively any form of direct branch to be an "offset" or to be an
> >>>> "immediate".
> >>>>
> >>>>> even in the special case where the base is 0, conceptually it is
> still
> >>>> an offset (mov 0x12345678, %eax).  The current implementation and
> >>>> documentation are clear and reasonable. I believe you already know the
> >>>> point I want to make, so I won’t reply again.
> >>>>
> >>>> Right, but are you also at least trying to understand the point(s)
> I've
> >>>> been trying to make for quite some time now? Why do we need to
> prescribe
> >>>> to people how to write their code, when offering liberty has no
> negative
> >>>> consequences?
> >>>
> >>> Unnecessary syntax without real benefits is a negative consequence.
> >>
> >> So helping engineers, by disentangling the offset vs immediate situation
> >> that AT&T mode effectively had forever, isn't a "real benefit" in your
> >> eyes?
> >
> > Real benefit is to provide a CPU feature which can't be
> > done by the current assembler.
> > Your new syntax may confuse programmers.
> > People may use it without realizing it doesn't work
> > with older versions
> > of assembler or
> > other assemblers.
>
> That's true for many other enhancements we've been making, including any
> new
> ISA extension support is being added for.
>

We are discussing existing instructions.


> Jan
>
  
Jan Beulich Oct. 16, 2024, 10:04 a.m. UTC | #14
On 16.10.2024 12:02, H.J. Lu wrote:
> On Wed, Oct 16, 2024, 3:05 PM Jan Beulich <jbeulich@suse.com> wrote:
>> On 15.10.2024 21:53, H.J. Lu wrote:
>>> Real benefit is to provide a CPU feature which can't be
>>> done by the current assembler.
>>> Your new syntax may confuse programmers.
>>> People may use it without realizing it doesn't work
>>> with older versions
>>> of assembler or
>>> other assemblers.
>>
>> That's true for many other enhancements we've been making, including any
>> new
>> ISA extension support is being added for.
> 
> We are discussing existing instructions.

Sure; I mentioned new ISAs merely for completeness. Various improvements
were done to pre-existing insns, too. (Same goes for directives, where
specific forms may be usable only with new enough gas, even if the
directive itself may have been around for much longer.)

Jan
  
H.J. Lu Oct. 16, 2024, 10:14 a.m. UTC | #15
On Wed, Oct 16, 2024, 6:04 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 16.10.2024 12:02, H.J. Lu wrote:
> > On Wed, Oct 16, 2024, 3:05 PM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 15.10.2024 21:53, H.J. Lu wrote:
> >>> Real benefit is to provide a CPU feature which can't be
> >>> done by the current assembler.
> >>> Your new syntax may confuse programmers.
> >>> People may use it without realizing it doesn't work
> >>> with older versions
> >>> of assembler or
> >>> other assemblers.
> >>
> >> That's true for many other enhancements we've been making, including any
> >> new
> >> ISA extension support is being added for.
> >
> > We are discussing existing instructions.
>
> Sure; I mentioned new ISAs merely for completeness. Various improvements
> were done to pre-existing insns, too. (Same goes for directives, where
> specific forms may be usable only with new enough gas, even if the
> directive itself may have been around for much longer.)
>

We have been adding new features which can't be done before.
But this change isn't such a case.


> Jan
>
>
  
Cui, Lili Oct. 17, 2024, 2:48 a.m. UTC | #16
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Wednesday, October 16, 2024 3:11 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: Binutils <binutils@sourceware.org>; Hu, Lin1 <lin1.hu@intel.com>; H.J. Lu
> <hjl.tools@gmail.com>
> Subject: Re: [PATCH 2/2] x86: permit non-immediate offset operands with
> direct far branches
> 
> On 16.10.2024 03:55, Cui, Lili wrote:
> >>>>>>> On 14.10.2024 08:50, H.J. Lu wrote:
> >>>>>>>> On Mon, Oct 14, 2024, 2:39 PM Jan Beulich
> <mailto:jbeulich@suse.com> wrote:
> >>>>>>>>
> >>>>>>>>> While Intel syntax permits such already (as can be observed by
> >>>>>>>>> there not being a need to prefix the respective operand with
> >>>>>>>>> "offset"), AT&T syntax so far strictly insists on two immediate
> operands.
> >>>>>>>>> Multiple
> >>>>>>>>> (successive) immediate operands are somewhat problematic
> >>>>>>>>> anyway, as it's never really clear what their order ought to
> >>>>>>>>> be. While there's no apparent way of dealing with this for
> >>>>>>>>> ENTER, EXTRQ, and INSERTQ, for LCALL and LJMP we can aid
> >>>>>>>>> programmers by permitting alternative forms, with the offset
> >>>>>>>>> operand being a "displacement" rather than an "immediate". The
> >>>>>>>>> order of the two operands the doesn't matter; they're
> distinguished by type.
> >>>>>>>>>
> >>>>>>>>> Mark the new templates AT&T-only; the original ones really
> >>>>>>>>> should have been so, too. For backwards compatibility reasons
> >>>>>>>>> we can't really correct that mistake ...
> >>>>>>>>> ---
> >>>>>>>>> While the proper Intel syntax operand form is sel:offset, for
> >>>>>>>>> some reason we also support two (comma separated) operands.
> >>>>>>>>> The ambiguity there is being left alone, as the sel:offset
> >>>>>>>>> form is enough to avoid
> >>>>> it.
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>> I don't think this is a good idea.  $ is used to denote an
> >>>>>>>> immediate operand in AT&T syntax.
> >>>>>>>
> >>>>>>> Yet as with JMPABS the question is whether this really is an
> immediate.
> >>>>>>> Just take Lili's most recent comment there: It can be looked at
> >>>>>>> as a displacement/offset relative to the specified
> >>>>>>> segment/selector. IOW
> >>>>> like there I
> >>>>>>> think two perspectives are possible, and - as expressed by the
> >>>>> post-commit-
> >>>>>>> message remark - they're already both supported in Intel syntax.
> >>>>>>>
> >>>>>>
> >>>>>> I am on the same page as Hulin and H.J. I insist that offset uses "foo"
> >>>>> and immediate
> >>>>>> uses "$foo",
> >>>>>
> >>>>> And we agree here. I can only repeat that the issue here is that
> >>>>> it is depending on personal perspective whether one considers
> >>>>> operands to effectively any form of direct branch to be an
> >>>>> "offset" or to be an "immediate".
> >>>>>
> >>>>>> even in the special case where the base is 0, conceptually it is
> >>>>>> still
> >>>>> an offset (mov 0x12345678, %eax).  The current implementation and
> >>>>> documentation are clear and reasonable. I believe you already know
> >>>>> the point I want to make, so I won’t reply again.
> >>>>>
> >>>>> Right, but are you also at least trying to understand the point(s)
> >>>>> I've been trying to make for quite some time now? Why do we need
> >>>>> to prescribe to people how to write their code, when offering
> >>>>> liberty has no negative consequences?
> >>>>
> >>>> Unnecessary syntax without real benefits is a negative consequence.
> >>>
> >>> So helping engineers, by disentangling the offset vs immediate
> >>> situation that AT&T mode effectively had forever, isn't a "real benefit" in
> your eyes?
> >>>
> >> Real benefit is to provide a CPU feature which can't be done by the current
> assembler.
> >> Your new syntax may confuse programmers.
> >> People may use it without realizing it doesn't work with older
> >> versions of assembler or other assemblers.
> >
> > Yes, and we should follow the spec:
> >
> > "JMPABS is a 64-bit only ISA extension, and acts as a near-direct branch
> with an absolute target.
> > The 64-bit immediate operand is treated an as absolute effective
> > address, which is subject to canonicality checks.
> 
> This use of "immediate" in the spec was brought before. I can only repeat
> that an almost identical use of "immediate" exists on the SDM's CALL insn
> page (just to give an example): "The operand can be an immediate value, a
> general-purpose register, or a memory location." Yet we treat direct CALLs as
> having displacement operands. Just to also repeat (iirc not even the 1st time):
> How insn encoding is described doesn't always match what operands the insn
> takes in assembly sources.
> 

I think this "immediate value" means that offset/displacement itself is an immediate value (not read from any other registers), we regard this immediate value as the offset/displacement operand. You can see it defined as offset/displacement in many places in the description of SDM's CALL, please do not take it out of context.

In the description of SDM's CALL:
Instruction Operand Encoding table:
Operand 1 --> offset
Operation --> tempRIP := RIP + tempDEST;


In the description of APX's JMPABS:
Description:
The 64-bit immediate operand is treated an as absolute effective address, which is subject to canonicality checks.
Operation --> RIP = tempRIP;

The current trunk clearly reflects this, and the reason we insist on rejecting your patch is because we sincerely find this difficult to accept. I kindly ask you to reconsider our suggestion.

Thanks,
Lili.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7061,13 +7061,16 @@  i386_assemble (char *line)
     swap_operands ();
 
   /* The order of the immediates should be reversed for 2-immediates EXTRQ
-     and INSERTQ instructions.  Also UWRMSR wants its immediate to be in the
-     "canonical" place (first), despite it appearing last (in AT&T syntax, or
-     because of the swapping above) in the incoming set of operands.  */
+     and INSERTQ instructions.  Also direct far branches and UWRMSR want their
+     immediate to be in the "canonical" place (first), despite it (possibly)
+     appearing last (in AT&T syntax, or because of the swapping above) in the
+     incoming set of operands.  */
   if ((i.imm_operands == 2
        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
-      || (t->mnem_off == MN_uwrmsr && i.imm_operands
-	  && i.operands > i.imm_operands))
+      || (((t->opcode_modifier.jump == JUMP_INTERSEGMENT
+	    && !operand_type_check (i.types[0], imm))
+	   || t->mnem_off == MN_uwrmsr)
+	  && i.imm_operands && i.operands > i.imm_operands))
       swap_2_operands (0, 1);
 
   if (i.imm_operands)
@@ -11621,6 +11624,7 @@  output_interseg_jump (void)
   int size;
   int prefix;
   int code16;
+  const expressionS *exp1;
 
   code16 = 0;
   if (flag_code == CODE_16BIT)
@@ -11659,9 +11663,11 @@  output_interseg_jump (void)
     *p++ = i.prefix[REX_PREFIX];
 
   *p++ = i.tm.base_opcode;
-  if (i.op[1].imms->X_op == O_constant)
+  exp1 = operand_type_check (i.types[0], imm) ? i.op[1].imms
+					      : i.op[1].disps;
+  if (exp1->X_op == O_constant)
     {
-      offsetT n = i.op[1].imms->X_add_number;
+      offsetT n = exp1->X_add_number;
 
       if (size == 2
 	  && !fits_in_unsigned_word (n)
@@ -11674,7 +11680,7 @@  output_interseg_jump (void)
     }
   else
     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
-		 i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
+		 exp1, 0, reloc (size, 0, 0, i.reloc[1]));
 
   p += size;
   if (i.op[0].imms->X_op == O_constant)
--- a/gas/testsuite/gas/i386/jump.d
+++ b/gas/testsuite/gas/i386/jump.d
@@ -15,28 +15,32 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 ff 2c bd 00 00 00 00 	ljmpw  \*0x0\(,%edi,4\)	1c: (R_386_)?(dir)?32	xxx
 [ 	]*[a-f0-9]+:	ff 2d 00 00 00 00    	ljmp   \*0x0	22: (R_386_)?(dir)?32	xxx
 [ 	]*[a-f0-9]+:	66 ff 2d 00 00 00 00 	ljmpw  \*0x0	29: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	ea 00 00 00 00 34 12 	ljmp   \$0x1234,\$0x0	2e: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	e8 c7 ff ff ff       	call   (0x0|0 <.text>)
-[ 	]*[a-f0-9]+:	e8 ((fc|c2) ff ff ff|00 00 00 00)       	call   (0x)?(0|3a|3e)( <.text(\+0x3e)?>)?	3a: (R_386_PC)?(DISP)?32	xxx
-[ 	]*[a-f0-9]+:	ff 15 00 00 00 00    	call   \*0x0	40: (R_386_)?(dir)?32	xxx
+[ 	]*[a-f0-9]+:	ea 00 00 00 00 34 12 	ljmp   \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	ea 00 00 00 00 34 12 	ljmp   \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	ea 00 00 00 00 34 12 	ljmp   \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	e8 b9 ff ff ff       	call   (0x0|0 <.text>)
+[ 	]*[a-f0-9]+:	e8 ((fc|b4) ff ff ff|00 00 00 00)       	call   (0x)?(0|48|4c)( <.text(\+0x4c)?>)?[ 	]+[a-f0-9]+: (R_386_PC|DISP)32	xxx
+[ 	]*[a-f0-9]+:	ff 15 00 00 00 00    	call   \*0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
 [ 	]*[a-f0-9]+:	ff d7                	call   \*%edi
 [ 	]*[a-f0-9]+:	ff 17                	call   \*\(%edi\)
-[ 	]*[a-f0-9]+:	ff 1c bd 00 00 00 00 	lcall  \*0x0\(,%edi,4\)	4b: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	66 ff 1c bd 00 00 00 00 	lcallw \*0x0\(,%edi,4\)	53: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	ff 1d 00 00 00 00    	lcall  \*0x0	59: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	66 ff 1d 00 00 00 00 	lcallw \*0x0	60: (R_386_)?(dir)?32	xxx
-[ 	]*[a-f0-9]+:	9a 00 00 00 00 34 12 	lcall  \$0x1234,\$0x0	65: (R_386_)?(dir)?32	xxx
+[ 	]*[a-f0-9]+:	ff 1c bd 00 00 00 00 	lcall  \*0x0\(,%edi,4\)[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	66 ff 1c bd 00 00 00 00 	lcallw \*0x0\(,%edi,4\)[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	ff 1d 00 00 00 00    	lcall  \*0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	66 ff 1d 00 00 00 00 	lcallw \*0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	9a 00 00 00 00 34 12 	lcall  \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	9a 00 00 00 00 34 12 	lcall  \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
+[ 	]*[a-f0-9]+:	9a 00 00 00 00 34 12 	lcall  \$0x1234,\$0x0[ 	]+[a-f0-9]+: (R_386_|dir)?32	xxx
 [ 	]*[a-f0-9]+:	66 ff 13             	callw  \*\(%ebx\)
 [ 	]*[a-f0-9]+:	ff 13                	call   \*\(%ebx\)
 [ 	]*[a-f0-9]+:	ff 1b                	lcall  \*\(%ebx\)
 [ 	]*[a-f0-9]+:	66 ff 23             	jmpw   \*\(%ebx\)
 [ 	]*[a-f0-9]+:	ff 23                	jmp    \*\(%ebx\)
 [ 	]*[a-f0-9]+:	ff 2b                	ljmp   \*\(%ebx\)
-[ 	]*[a-f0-9]+:	eb 00                	jmp    (0x)?7b( <.text(\+0x7b)?>)?
+[ 	]*[a-f0-9]+:	eb 00                	jmp    (0x)?97( <.text(\+0x97)?>)?
 [ 	]*[a-f0-9]+:	90                   	nop
-[ 	]*[a-f0-9]+:	e3 00                	jecxz  (0x)?7e( <.text(\+0x7e)?>)?
+[ 	]*[a-f0-9]+:	e3 00                	jecxz  (0x)?9a( <.text(\+0x9a)?>)?
 [ 	]*[a-f0-9]+:	90                   	nop
-[ 	]*[a-f0-9]+:	eb 00                	jmp    (0x)?81( <.text(\+0x81)?>)?
+[ 	]*[a-f0-9]+:	eb 00                	jmp    (0x)?9d( <.text(\+0x9d)?>)?
 [ 	]*[a-f0-9]+:	90                   	nop
 [ 	]*[a-f0-9]+:	9a 90 90 90 90 90 90 	lcall  \$0x9090,\$0x90909090
 [ 	]*[a-f0-9]+:	9a 90 90 90 90 90 90 	lcall  \$0x9090,\$0x90909090
--- a/gas/testsuite/gas/i386/jump.s
+++ b/gas/testsuite/gas/i386/jump.s
@@ -13,6 +13,8 @@ 
 	ljmp	*xxx
 	ljmpw	*xxx
 	ljmp	$0x1234,$xxx
+	ljmp	$0x1234,xxx
+	ljmp	xxx,$0x1234
 
 	call	1b
 	call	xxx
@@ -24,6 +26,8 @@ 
 	lcall	*xxx
 	lcallw	*xxx
 	lcall	$0x1234,$xxx
+	lcall	$0x1234,xxx
+	lcall	xxx,$0x1234
 
 	.intel_syntax noprefix
 	call	word ptr [ebx]
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -494,6 +494,7 @@  call, 0x9a, No64, JumpInterSegment|Impli
 call, 0xff/3, 0, Amd64|Modrm|JumpAbsolute|ImplicitStackOp|DefaultSize|NoSuf, { Dword|Fword|BaseIndex }
 call, 0xff/3, x64, Intel64|Modrm|JumpAbsolute|ImplicitStackOp|NoSuf, { Dword|Fword|Tbyte|BaseIndex }
 lcall, 0x9a, No64, JumpInterSegment|DefaultSize|No_bSuf|No_sSuf|No_qSuf, { Imm16, Imm16|Imm32 }
+lcall, 0x9a, No64, JumpInterSegment|DefaultSize|No_bSuf|No_sSuf|No_qSuf|ATTSyntax, { Imm16, Disp16|Disp32 }
 lcall, 0xff/3, 0, Amd64|Modrm|JumpAbsolute|DefaultSize|No_bSuf|No_sSuf|No_qSuf, { Unspecified|BaseIndex }
 lcall, 0xff/3, x64, Intel64|Modrm|JumpAbsolute|No_bSuf|No_sSuf, { Unspecified|BaseIndex }
 
@@ -507,6 +508,7 @@  jmp, 0xea, No64, JumpInterSegment|No_bSu
 jmp, 0xff/5, 0, Amd64|Modrm|JumpAbsolute|NoSuf, { Dword|Fword|BaseIndex }
 jmp, 0xff/5, x64, Intel64|Modrm|JumpAbsolute|NoSuf, { Dword|Fword|Tbyte|BaseIndex }
 ljmp, 0xea, No64, JumpInterSegment|No_bSuf|No_sSuf|No_qSuf, { Imm16, Imm16|Imm32 }
+ljmp, 0xea, No64, JumpInterSegment|No_bSuf|No_sSuf|No_qSuf|ATTSyntax, { Imm16, Disp16|Disp32 }
 ljmp, 0xff/5, 0, Amd64|Modrm|JumpAbsolute|No_bSuf|No_sSuf|No_qSuf, { Unspecified|BaseIndex }
 ljmp, 0xff/5, x64, Intel64|Modrm|JumpAbsolute|No_bSuf|No_sSuf, { Unspecified|BaseIndex }