[v2,1/2] x86/Intel: SHLD/SHRD have dual meaning
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Testing passed
|
Commit Message
Since we uniformly permit D suffixes in Intel mode whenever in AT&T mode
an L suffix may be used, we need to be consistent with this.
Take the easy route, despite that still leading to an anomaly which is
also visible from the new testcase:
shld eax, ecx, 1
shld eax, ecx, cl
can mean two things with APX: SHL with a D suffix in NDD EVEX encoding,
or the traditional SHLD in legacy encoding.
Comments
Since we just report warning for redundant intel suffixes, we still need this patch for "shld ecx, 1". When we reject it, we need to remember to revert the change in may_need_pass2.
Lili
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, May 17, 2024 6:48 PM
> To: Binutils <binutils@sourceware.org>
> Cc: H.J. Lu <hjl.tools@gmail.com>
> Subject: [PATCH v2 1/2] x86/Intel: SHLD/SHRD have dual meaning
>
> Since we uniformly permit D suffixes in Intel mode whenever in AT&T mode an
> L suffix may be used, we need to be consistent with this.
>
> Take the easy route, despite that still leading to an anomaly which is also
> visible from the new testcase:
>
> shld eax, ecx, 1
> shld eax, ecx, cl
>
> can mean two things with APX: SHL with a D suffix in NDD EVEX encoding, or
> the traditional SHLD in legacy encoding.
>
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -5392,7 +5392,7 @@ static void init_globals (void) }
>
> /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
> - parsing pass. Instead of introducing a rarely use new insn attribute this
> + parsing pass. Instead of introducing a rarely used new insn
> + attribute this
> utilizes a common pattern between affected templates. It is deemed
> acceptable that this will lead to unnecessary pass 2 preparations in a
> limited set of cases. */
> @@ -5404,7 +5404,10 @@ static INLINE bool may_need_pass2 (const
> : (t->opcode_space == SPACE_0F
> && (t->base_opcode | 1) == 0xbf)
> || (t->opcode_space == SPACE_BASE
> - && t->base_opcode == 0x63);
> + && t->base_opcode == 0x63)
> + || (intel_syntax /* shld / shrd may mean suffixed shl / shr. */
> + && t->opcode_space == SPACE_EVEXMAP4
> + && (t->base_opcode | 8) == 0x2c);
> }
>
> #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/intel-suffix.d
> @@ -0,0 +1,34 @@
> +#objdump: -dw
> +#name: Intel syntax w/ suffixes
> +
> +.*: +file format .*
> +
> +Disassembly of section \.text:
> +0+0 <.*>:
> +[ ]*[a-f0-9]+: 0f a4 c8 01[ ]+shld \$0x1,%ecx,%eax
> +[ ]*[a-f0-9]+: 0f a5 c8[ ]+shld %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 e1[ ]+shl \$1,%ecx
> +[ ]*[a-f0-9]+: d3 e1[ ]+shl %cl,%ecx
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d1 e1[ ]+shl \$1,%ecx,%eax
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d3 e1[ ]+shl %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 e1[ ]+shl \$1,%ecx
> +[ ]*[a-f0-9]+: d3 e1[ ]+shl %cl,%ecx
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d1 c1[ ]+rol \$1,%ecx,%eax
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d3 c1[ ]+rol %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 c1[ ]+rol \$1,%ecx
> +[ ]*[a-f0-9]+: d3 c1[ ]+rol %cl,%ecx
> +
> +0+[0-9a-f]+ <.*>:
> +[ ]*[a-f0-9]+: 0f ac c8 01[ ]+shrd \$0x1,%ecx,%eax
> +[ ]*[a-f0-9]+: 0f ad c8[ ]+shrd %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 e9[ ]+shr \$1,%ecx
> +[ ]*[a-f0-9]+: d3 e9[ ]+shr %cl,%ecx
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d1 f9[ ]+sar \$1,%ecx,%eax
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d3 f9[ ]+sar %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 f9[ ]+sar \$1,%ecx
> +[ ]*[a-f0-9]+: d3 f9[ ]+sar %cl,%ecx
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d1 c9[ ]+ror \$1,%ecx,%eax
> +[ ]*[a-f0-9]+: 62 f4 7c 18 d3 c9[ ]+ror %cl,%ecx,%eax
> +[ ]*[a-f0-9]+: d1 c9[ ]+ror \$1,%ecx
> +[ ]*[a-f0-9]+: d3 c9[ ]+ror %cl,%ecx
> +#pass
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/intel-suffix.s
> @@ -0,0 +1,39 @@
> + .intel_syntax noprefix
> + .text
> +left:
> + shld eax, ecx, 1
> + shld eax, ecx, cl
> +
> + shld ecx, 1
> + shld ecx, cl
> +
> + sald eax, ecx, 1
> + sald eax, ecx, cl
> +
> + sald ecx, 1
> + sald ecx, cl
> +
> + rold eax, ecx, 1
> + rold eax, ecx, cl
> +
> + rold ecx, 1
> + rold ecx, cl
> +
> +right:
> + shrd eax, ecx, 1
> + shrd eax, ecx, cl
> +
> + shrd ecx, 1
> + shrd ecx, cl
> +
> + sard eax, ecx, 1
> + sard eax, ecx, cl
> +
> + sard ecx, 1
> + sard ecx, cl
> +
> + rord eax, ecx, 1
> + rord eax, ecx, cl
> +
> + rord ecx, 1
> + rord ecx, cl
> --- a/gas/testsuite/gas/i386/x86-64.exp
> +++ b/gas/testsuite/gas/i386/x86-64.exp
> @@ -160,6 +160,7 @@ run_dump_test "x86-64-disp-intel"
> run_list_test "disp-imm-64"
> run_dump_test "intel-movs64"
> run_dump_test "intel-cmps64"
> +run_dump_test "intel-suffix"
> run_dump_test "x86-64-disp32"
> run_dump_test "rexw"
> run_list_test "x86-64-specific-reg"
@@ -5392,7 +5392,7 @@ static void init_globals (void)
}
/* Helper for md_assemble() to decide whether to prepare for a possible 2nd
- parsing pass. Instead of introducing a rarely use new insn attribute this
+ parsing pass. Instead of introducing a rarely used new insn attribute this
utilizes a common pattern between affected templates. It is deemed
acceptable that this will lead to unnecessary pass 2 preparations in a
limited set of cases. */
@@ -5404,7 +5404,10 @@ static INLINE bool may_need_pass2 (const
: (t->opcode_space == SPACE_0F
&& (t->base_opcode | 1) == 0xbf)
|| (t->opcode_space == SPACE_BASE
- && t->base_opcode == 0x63);
+ && t->base_opcode == 0x63)
+ || (intel_syntax /* shld / shrd may mean suffixed shl / shr. */
+ && t->opcode_space == SPACE_EVEXMAP4
+ && (t->base_opcode | 8) == 0x2c);
}
#if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
@@ -0,0 +1,34 @@
+#objdump: -dw
+#name: Intel syntax w/ suffixes
+
+.*: +file format .*
+
+Disassembly of section \.text:
+0+0 <.*>:
+[ ]*[a-f0-9]+: 0f a4 c8 01[ ]+shld \$0x1,%ecx,%eax
+[ ]*[a-f0-9]+: 0f a5 c8[ ]+shld %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 e1[ ]+shl \$1,%ecx
+[ ]*[a-f0-9]+: d3 e1[ ]+shl %cl,%ecx
+[ ]*[a-f0-9]+: 62 f4 7c 18 d1 e1[ ]+shl \$1,%ecx,%eax
+[ ]*[a-f0-9]+: 62 f4 7c 18 d3 e1[ ]+shl %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 e1[ ]+shl \$1,%ecx
+[ ]*[a-f0-9]+: d3 e1[ ]+shl %cl,%ecx
+[ ]*[a-f0-9]+: 62 f4 7c 18 d1 c1[ ]+rol \$1,%ecx,%eax
+[ ]*[a-f0-9]+: 62 f4 7c 18 d3 c1[ ]+rol %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 c1[ ]+rol \$1,%ecx
+[ ]*[a-f0-9]+: d3 c1[ ]+rol %cl,%ecx
+
+0+[0-9a-f]+ <.*>:
+[ ]*[a-f0-9]+: 0f ac c8 01[ ]+shrd \$0x1,%ecx,%eax
+[ ]*[a-f0-9]+: 0f ad c8[ ]+shrd %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 e9[ ]+shr \$1,%ecx
+[ ]*[a-f0-9]+: d3 e9[ ]+shr %cl,%ecx
+[ ]*[a-f0-9]+: 62 f4 7c 18 d1 f9[ ]+sar \$1,%ecx,%eax
+[ ]*[a-f0-9]+: 62 f4 7c 18 d3 f9[ ]+sar %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 f9[ ]+sar \$1,%ecx
+[ ]*[a-f0-9]+: d3 f9[ ]+sar %cl,%ecx
+[ ]*[a-f0-9]+: 62 f4 7c 18 d1 c9[ ]+ror \$1,%ecx,%eax
+[ ]*[a-f0-9]+: 62 f4 7c 18 d3 c9[ ]+ror %cl,%ecx,%eax
+[ ]*[a-f0-9]+: d1 c9[ ]+ror \$1,%ecx
+[ ]*[a-f0-9]+: d3 c9[ ]+ror %cl,%ecx
+#pass
@@ -0,0 +1,39 @@
+ .intel_syntax noprefix
+ .text
+left:
+ shld eax, ecx, 1
+ shld eax, ecx, cl
+
+ shld ecx, 1
+ shld ecx, cl
+
+ sald eax, ecx, 1
+ sald eax, ecx, cl
+
+ sald ecx, 1
+ sald ecx, cl
+
+ rold eax, ecx, 1
+ rold eax, ecx, cl
+
+ rold ecx, 1
+ rold ecx, cl
+
+right:
+ shrd eax, ecx, 1
+ shrd eax, ecx, cl
+
+ shrd ecx, 1
+ shrd ecx, cl
+
+ sard eax, ecx, 1
+ sard eax, ecx, cl
+
+ sard ecx, 1
+ sard ecx, cl
+
+ rord eax, ecx, 1
+ rord eax, ecx, cl
+
+ rord ecx, 1
+ rord ecx, cl
@@ -160,6 +160,7 @@ run_dump_test "x86-64-disp-intel"
run_list_test "disp-imm-64"
run_dump_test "intel-movs64"
run_dump_test "intel-cmps64"
+run_dump_test "intel-suffix"
run_dump_test "x86-64-disp32"
run_dump_test "rexw"
run_list_test "x86-64-specific-reg"