[2/3] PR 34029: bpf: allow w regs in store insns similar to llvm

Message ID 20260330224203.3110788-3-vineet.gupta@linux.dev
State New
Headers
Series bpf gas updates |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed

Commit Message

Vineet Gupta March 30, 2026, 10:42 p.m. UTC
  |  *(u32 *)(r9 +0) = w5
|  Error: unexpected register name `w5' in expression

This syntax is allowed by llvm, athough encoding wise it is same as
the insn with rN src reg.

The original motivation was a now abondoned gcc change which would
generate these patterns.

Signed-off-by: Vineet Gupta <vineet.gupta@linux.dev>
---
 gas/testsuite/gas/bpf/mem-be-pseudoc.d | 5 ++++-
 gas/testsuite/gas/bpf/mem-pseudoc.d    | 5 ++++-
 gas/testsuite/gas/bpf/mem-pseudoc.s    | 3 +++
 include/opcode/bpf.h                   | 1 +
 opcodes/bpf-opc.c                      | 6 ++++++
 5 files changed, 18 insertions(+), 2 deletions(-)
  

Comments

Jose E. Marchesi March 31, 2026, 9:06 a.m. UTC | #1
Hi Vineet.
Thanks for the patch.

> |  *(u32 *)(r9 +0) = w5
> |  Error: unexpected register name `w5' in expression
>
> This syntax is allowed by llvm, athough encoding wise it is same as
> the insn with rN src reg.
>
> The original motivation was a now abondoned gcc change which would
> generate these patterns.
>
> Signed-off-by: Vineet Gupta <vineet.gupta@linux.dev>
> ---
>  gas/testsuite/gas/bpf/mem-be-pseudoc.d | 5 ++++-
>  gas/testsuite/gas/bpf/mem-pseudoc.d    | 5 ++++-
>  gas/testsuite/gas/bpf/mem-pseudoc.s    | 3 +++
>  include/opcode/bpf.h                   | 1 +
>  opcodes/bpf-opc.c                      | 6 ++++++
>  5 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/gas/testsuite/gas/bpf/mem-be-pseudoc.d b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
> index b7715c463a24..8a0980dd3dfa 100644
> --- a/gas/testsuite/gas/bpf/mem-be-pseudoc.d
> +++ b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
> @@ -30,4 +30,7 @@ Disassembly of section .text:
>    98:	89 21 7e ef 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
>    a0:	91 21 7e ef 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
>    a8:	99 21 7e ef 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
> -  b0:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
> +  b0:	63 12 7e ef 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
> +  b8:	6b 12 7e ef 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
> +  c0:	73 12 7e ef 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
> +  c8:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
> diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.d b/gas/testsuite/gas/bpf/mem-pseudoc.d
> index b704de51f8fa..6863cd923f3a 100644
> --- a/gas/testsuite/gas/bpf/mem-pseudoc.d
> +++ b/gas/testsuite/gas/bpf/mem-pseudoc.d
> @@ -30,4 +30,7 @@ Disassembly of section .text:
>    98:	89 12 ef 7e 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
>    a0:	91 12 ef 7e 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
>    a8:	99 12 ef 7e 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
> -  b0:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
> +  b0:	63 21 ef 7e 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
> +  b8:	6b 21 ef 7e 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
> +  c0:	73 21 ef 7e 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
> +  c8:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
> diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.s b/gas/testsuite/gas/bpf/mem-pseudoc.s
> index 199077539163..08797ae58dcf 100644
> --- a/gas/testsuite/gas/bpf/mem-pseudoc.s
> +++ b/gas/testsuite/gas/bpf/mem-pseudoc.s
> @@ -23,4 +23,7 @@
>          r2 = *(s16*)(r1+0x7eef)
>          r2 = *(s8*)(r1+0x7eef)
>          r2 = *(s64*)(r1+0x7eef)
> +	*(u32 *)(r1 + 32495) = w2
> +	*(u16 *)(r1 + 32495) = w2
> +	*(u8 *)(r1 + 32495) = w2
>  	r2 = *(u32 *)(r1 + 0)

Please add the new instructions immediately following the equivalent
load instructions, and add a comment in the test source file, noting
that these are equivalent to the rN variants.

> diff --git a/include/opcode/bpf.h b/include/opcode/bpf.h
> index e4ccd430f0ce..7befef454425 100644
> --- a/include/opcode/bpf.h
> +++ b/include/opcode/bpf.h
> @@ -198,6 +198,7 @@ enum bpf_insn_id
>    BPF_INSN_LDXSB, BPF_INSN_LDXSH, BPF_INSN_LDXSW, BPF_INSN_LDXSDW,
>    /* Generic store instructions (from register.)  */
>    BPF_INSN_STXBR, BPF_INSN_STXHR, BPF_INSN_STXWR, BPF_INSN_STXDWR,
> +  BPF_INSN_STXBW, BPF_INSN_STXHW, BPF_INSN_STXWW,
>    BPF_INSN_STXBI, BPF_INSN_STXHI, BPF_INSN_STXWI, BPF_INSN_STXDWI,
>    /* Compare-and-jump instructions (reg OP reg.)  */
>    BPF_INSN_JAR, BPF_INSN_JEQR, BPF_INSN_JGTR, BPF_INSN_JSGTR,
> diff --git a/opcodes/bpf-opc.c b/opcodes/bpf-opc.c
> index aa5493280eaa..c46ff07901c9 100644
> --- a/opcodes/bpf-opc.c
> +++ b/opcodes/bpf-opc.c
> @@ -236,6 +236,12 @@ const struct bpf_opcode bpf_opcodes[] =
>     BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
>    {BPF_INSN_STXDWR, "stxdw%W[ %dr %o16 ] , %sr", "* ( u64 * ) ( %dr %o16 ) = %sr",
>     BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_DW|BPF_MODE_MEM},
> +  {BPF_INSN_STXBW, "stxb%W[ %dr %o16 ] , %sr", "* ( u8 * ) ( %dr %o16 ) = %sw",
> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_B|BPF_MODE_MEM},
> +  {BPF_INSN_STXHW, "stxh%W[ %dr %o16 ] , %sr", "* ( u16 * ) ( %dr %o16 ) = %sw",
> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_H|BPF_MODE_MEM},
> +  {BPF_INSN_STXWW, "stxw%W[ %dr %o16 ], %sr", "* ( u32 * ) ( %dr %o16 ) = %sw",
> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
>  
>    /* Generic store instructions (from 32-bit immediate.)  */
>    {BPF_INSN_STXBI, "stb%W[ %dr %o16 ] , %i32", "* ( u8 * ) ( %dr %o16 ) = %i32",


There must be no need to introduce new BPF_INSN_* entries: you are
adding new syntax to existing instructions, not new instructions.

(Note that, if you were to add new BPF_INSN_* entries, the R and I in
the BPF_INSN_* entries refer to register and immediate, respectively.
The nature of the operation is encoded in its name/mnemonic, like in
MOV32R.)

I think it would be better to avoid replication in the table by instead
defining new format tags, %dR and %sR, to denote a register written
using either r or w, "any" register format.  The disassembler would then
just print 'r' for these. WDYT?
  
Vineet Gupta March 31, 2026, 2:48 p.m. UTC | #2
On 3/31/26 2:06 AM, Jose E. Marchesi wrote:
> Hi Vineet.
> Thanks for the patch.
>
>> |  *(u32 *)(r9 +0) = w5
>> |  Error: unexpected register name `w5' in expression
>>
>> This syntax is allowed by llvm, athough encoding wise it is same as
>> the insn with rN src reg.
>>
>> The original motivation was a now abondoned gcc change which would
>> generate these patterns.
>>
>> Signed-off-by: Vineet Gupta <vineet.gupta@linux.dev>
>> ---
>>  gas/testsuite/gas/bpf/mem-be-pseudoc.d | 5 ++++-
>>  gas/testsuite/gas/bpf/mem-pseudoc.d    | 5 ++++-
>>  gas/testsuite/gas/bpf/mem-pseudoc.s    | 3 +++
>>  include/opcode/bpf.h                   | 1 +
>>  opcodes/bpf-opc.c                      | 6 ++++++
>>  5 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/gas/testsuite/gas/bpf/mem-be-pseudoc.d b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
>> index b7715c463a24..8a0980dd3dfa 100644
>> --- a/gas/testsuite/gas/bpf/mem-be-pseudoc.d
>> +++ b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
>> @@ -30,4 +30,7 @@ Disassembly of section .text:
>>    98:	89 21 7e ef 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
>>    a0:	91 21 7e ef 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
>>    a8:	99 21 7e ef 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
>> -  b0:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
>> +  b0:	63 12 7e ef 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
>> +  b8:	6b 12 7e ef 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
>> +  c0:	73 12 7e ef 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
>> +  c8:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
>> diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.d b/gas/testsuite/gas/bpf/mem-pseudoc.d
>> index b704de51f8fa..6863cd923f3a 100644
>> --- a/gas/testsuite/gas/bpf/mem-pseudoc.d
>> +++ b/gas/testsuite/gas/bpf/mem-pseudoc.d
>> @@ -30,4 +30,7 @@ Disassembly of section .text:
>>    98:	89 12 ef 7e 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
>>    a0:	91 12 ef 7e 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
>>    a8:	99 12 ef 7e 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
>> -  b0:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
>> +  b0:	63 21 ef 7e 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
>> +  b8:	6b 21 ef 7e 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
>> +  c0:	73 21 ef 7e 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
>> +  c8:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
>> diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.s b/gas/testsuite/gas/bpf/mem-pseudoc.s
>> index 199077539163..08797ae58dcf 100644
>> --- a/gas/testsuite/gas/bpf/mem-pseudoc.s
>> +++ b/gas/testsuite/gas/bpf/mem-pseudoc.s
>> @@ -23,4 +23,7 @@
>>          r2 = *(s16*)(r1+0x7eef)
>>          r2 = *(s8*)(r1+0x7eef)
>>          r2 = *(s64*)(r1+0x7eef)
>> +	*(u32 *)(r1 + 32495) = w2
>> +	*(u16 *)(r1 + 32495) = w2
>> +	*(u8 *)(r1 + 32495) = w2
>>  	r2 = *(u32 *)(r1 + 0)
> Please add the new instructions immediately following the equivalent
> load instructions, and add a comment in the test source file, noting
> that these are equivalent to the rN variants.

OK.

>> diff --git a/include/opcode/bpf.h b/include/opcode/bpf.h
>> index e4ccd430f0ce..7befef454425 100644
>> --- a/include/opcode/bpf.h
>> +++ b/include/opcode/bpf.h
>> @@ -198,6 +198,7 @@ enum bpf_insn_id
>>    BPF_INSN_LDXSB, BPF_INSN_LDXSH, BPF_INSN_LDXSW, BPF_INSN_LDXSDW,
>>    /* Generic store instructions (from register.)  */
>>    BPF_INSN_STXBR, BPF_INSN_STXHR, BPF_INSN_STXWR, BPF_INSN_STXDWR,
>> +  BPF_INSN_STXBW, BPF_INSN_STXHW, BPF_INSN_STXWW,
>>    BPF_INSN_STXBI, BPF_INSN_STXHI, BPF_INSN_STXWI, BPF_INSN_STXDWI,
>>    /* Compare-and-jump instructions (reg OP reg.)  */
>>    BPF_INSN_JAR, BPF_INSN_JEQR, BPF_INSN_JGTR, BPF_INSN_JSGTR,
>> diff --git a/opcodes/bpf-opc.c b/opcodes/bpf-opc.c
>> index aa5493280eaa..c46ff07901c9 100644
>> --- a/opcodes/bpf-opc.c
>> +++ b/opcodes/bpf-opc.c
>> @@ -236,6 +236,12 @@ const struct bpf_opcode bpf_opcodes[] =
>>     BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
>>    {BPF_INSN_STXDWR, "stxdw%W[ %dr %o16 ] , %sr", "* ( u64 * ) ( %dr %o16 ) = %sr",
>>     BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_DW|BPF_MODE_MEM},
>> +  {BPF_INSN_STXBW, "stxb%W[ %dr %o16 ] , %sr", "* ( u8 * ) ( %dr %o16 ) = %sw",
>> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_B|BPF_MODE_MEM},
>> +  {BPF_INSN_STXHW, "stxh%W[ %dr %o16 ] , %sr", "* ( u16 * ) ( %dr %o16 ) = %sw",
>> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_H|BPF_MODE_MEM},
>> +  {BPF_INSN_STXWW, "stxw%W[ %dr %o16 ], %sr", "* ( u32 * ) ( %dr %o16 ) = %sw",
>> +   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
>>  
>>    /* Generic store instructions (from 32-bit immediate.)  */
>>    {BPF_INSN_STXBI, "stb%W[ %dr %o16 ] , %i32", "* ( u8 * ) ( %dr %o16 ) = %i32",
>
> There must be no need to introduce new BPF_INSN_* entries: you are
> adding new syntax to existing instructions, not new instructions.
>
> (Note that, if you were to add new BPF_INSN_* entries, the R and I in
> the BPF_INSN_* entries refer to register and immediate, respectively.
> The nature of the operation is encoded in its name/mnemonic, like in
> MOV32R.)

OK, fair point.

> I think it would be better to avoid replication in the table by instead
> defining new format tags, %dR and %sR, to denote a register written
> using either r or w, "any" register format.  The disassembler would then
> just print 'r' for these. WDYT?

Sound good. I'll respin along those line and resubmit.

Thx,
-Vineet
  

Patch

diff --git a/gas/testsuite/gas/bpf/mem-be-pseudoc.d b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
index b7715c463a24..8a0980dd3dfa 100644
--- a/gas/testsuite/gas/bpf/mem-be-pseudoc.d
+++ b/gas/testsuite/gas/bpf/mem-be-pseudoc.d
@@ -30,4 +30,7 @@  Disassembly of section .text:
   98:	89 21 7e ef 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
   a0:	91 21 7e ef 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
   a8:	99 21 7e ef 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
-  b0:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
+  b0:	63 12 7e ef 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
+  b8:	6b 12 7e ef 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
+  c0:	73 12 7e ef 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
+  c8:	61 21 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.d b/gas/testsuite/gas/bpf/mem-pseudoc.d
index b704de51f8fa..6863cd923f3a 100644
--- a/gas/testsuite/gas/bpf/mem-pseudoc.d
+++ b/gas/testsuite/gas/bpf/mem-pseudoc.d
@@ -30,4 +30,7 @@  Disassembly of section .text:
   98:	89 12 ef 7e 00 00 00 00 	r2=\*\(s16\*\)\(r1\+0x7eef\)
   a0:	91 12 ef 7e 00 00 00 00 	r2=\*\(s8\*\)\(r1\+0x7eef\)
   a8:	99 12 ef 7e 00 00 00 00 	r2=\*\(s64\*\)\(r1\+0x7eef\)
-  b0:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
+  b0:	63 21 ef 7e 00 00 00 00 	\*\(u32\*\)\(r1\+0x7eef\)=r2
+  b8:	6b 21 ef 7e 00 00 00 00 	\*\(u16\*\)\(r1\+0x7eef\)=r2
+  c0:	73 21 ef 7e 00 00 00 00 	\*\(u8\*\)\(r1\+0x7eef\)=r2
+  c8:	61 12 00 00 00 00 00 00 	r2=\*\(u32\*\)\(r1\+0x0\)
diff --git a/gas/testsuite/gas/bpf/mem-pseudoc.s b/gas/testsuite/gas/bpf/mem-pseudoc.s
index 199077539163..08797ae58dcf 100644
--- a/gas/testsuite/gas/bpf/mem-pseudoc.s
+++ b/gas/testsuite/gas/bpf/mem-pseudoc.s
@@ -23,4 +23,7 @@ 
         r2 = *(s16*)(r1+0x7eef)
         r2 = *(s8*)(r1+0x7eef)
         r2 = *(s64*)(r1+0x7eef)
+	*(u32 *)(r1 + 32495) = w2
+	*(u16 *)(r1 + 32495) = w2
+	*(u8 *)(r1 + 32495) = w2
 	r2 = *(u32 *)(r1 + 0)
diff --git a/include/opcode/bpf.h b/include/opcode/bpf.h
index e4ccd430f0ce..7befef454425 100644
--- a/include/opcode/bpf.h
+++ b/include/opcode/bpf.h
@@ -198,6 +198,7 @@  enum bpf_insn_id
   BPF_INSN_LDXSB, BPF_INSN_LDXSH, BPF_INSN_LDXSW, BPF_INSN_LDXSDW,
   /* Generic store instructions (from register.)  */
   BPF_INSN_STXBR, BPF_INSN_STXHR, BPF_INSN_STXWR, BPF_INSN_STXDWR,
+  BPF_INSN_STXBW, BPF_INSN_STXHW, BPF_INSN_STXWW,
   BPF_INSN_STXBI, BPF_INSN_STXHI, BPF_INSN_STXWI, BPF_INSN_STXDWI,
   /* Compare-and-jump instructions (reg OP reg.)  */
   BPF_INSN_JAR, BPF_INSN_JEQR, BPF_INSN_JGTR, BPF_INSN_JSGTR,
diff --git a/opcodes/bpf-opc.c b/opcodes/bpf-opc.c
index aa5493280eaa..c46ff07901c9 100644
--- a/opcodes/bpf-opc.c
+++ b/opcodes/bpf-opc.c
@@ -236,6 +236,12 @@  const struct bpf_opcode bpf_opcodes[] =
    BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
   {BPF_INSN_STXDWR, "stxdw%W[ %dr %o16 ] , %sr", "* ( u64 * ) ( %dr %o16 ) = %sr",
    BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_DW|BPF_MODE_MEM},
+  {BPF_INSN_STXBW, "stxb%W[ %dr %o16 ] , %sr", "* ( u8 * ) ( %dr %o16 ) = %sw",
+   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_B|BPF_MODE_MEM},
+  {BPF_INSN_STXHW, "stxh%W[ %dr %o16 ] , %sr", "* ( u16 * ) ( %dr %o16 ) = %sw",
+   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_H|BPF_MODE_MEM},
+  {BPF_INSN_STXWW, "stxw%W[ %dr %o16 ], %sr", "* ( u32 * ) ( %dr %o16 ) = %sw",
+   BPF_V1, BPF_CODE, BPF_CLASS_STX|BPF_SIZE_W|BPF_MODE_MEM},
 
   /* Generic store instructions (from 32-bit immediate.)  */
   {BPF_INSN_STXBI, "stb%W[ %dr %o16 ] , %i32", "* ( u8 * ) ( %dr %o16 ) = %i32",