Support Intel USER_MSR

Message ID 20231010072401.1383177-1-lin1.hu@intel.com
State New
Headers
Series Support Intel USER_MSR |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed

Commit Message

Hu, Lin1 Oct. 10, 2023, 7:24 a.m. UTC
  This patch aims to support Intel USER_MSR.

gas/ChangeLog:

	* NEWS: Support Intel USER_MSR.
	* config/tc-i386.c: Add user_msr and add VEXMAP7 in
	opc_spc and build_vex_prefix.
	(build_modrm_byte): Swap Operands for USER_MSR.
	* doc/c-i386.texi: Document .user_msr.
	* testsuite/gas/i386/i386.exp: Run USER_MSR tests.
	* testsuite/gas/i386/x86-64.exp: Ditto.
	* testsuite/gas/i386/user_msr-inval.l: New test.
	* testsuite/gas/i386/user_msr-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-user_msr-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-user_msr.d: Ditto.
	* testsuite/gas/i386/x86-64-user_msr.s: Ditto.

opcodes/ChangeLog:

	* i386-dis.c (get32_operand0): Add a new function
	to get operand0 that is IMM32.
	(Gq): New.
	(Id0): Ditto.
	(get_valid_dis386): Add VEX_MAP7 in VEX prefix.
	(OP_I): Handle d_0_mode.
	(d_0_mode): New.
	(REG_VEX_MAP7_F8_L_0_W_0_M_1): New.
	(MOD_VEX_MAP7_F8_L_0_W_0): Ditto.
	(PREFIX_VEX_MAP7_F8_L_0_W_0_M_1_R_0_X86_64): Ditto.
	(X86_64_VEX_MAP7_F8_L_0_W_0_M_1_R_0): Ditto.
	(VEX_MAP7): Ditto.
	(VEX_LEN_MAP7_F8): Ditto.
	(VEX_W_MAP7_F8_L_0): Ditto.
	(MOD_0F38F8): Ditto.
	(PREFIX_0F38F8_M_0): Ditto.
	(PREFIX_0F38F8_M_1_X86_64): Ditto.
	(X86_64_0F38F8_M_1): Ditto.
	(PREFIX_0F38F8): Remove.
	(prefix_table): Add PREFIX_0F38F8_M_1_X86_64.
	Remove PREFIX_0F38F8.
	(reg_table): Add REG_VEX_MAP7_F8_L_0_W_0_M_1,
	PREFIX_VEX_MAP7_F8_L_0_W_0_M_1_R_0_X86_64.
	(x86_64_table): Add X86_64_0F38F8_PREFIX_3_M_1,
	X86_64_VEX_MAP7_F8_L_0_W_0_M_1_R_0 and X86_64_0F38F8_M_1.
	(vex_table): Add VEX_MAP7.
	(vex_len_table): Add VEX_LEN_MAP7_F8,
	VEX_W_MAP7_F8_L_0.
	(mod_table): New entry for USER_MSR and
	add MOD_VEX_MAP7_F8_L_0_W_0 and MOD_0F38F8.
	* i386-gen.c (cpu_flag_init): Add CPU_USER_MSR_FLAGS and
	CPU_ANY_USER_MSR_FLAGS.
	* i386-init.h: Regenerated.
	* i386-mnem.h: Ditto.
	* i386-opc.h (SPACE_VEXMAP7): New.
	(SWAP_SOURCE_DEST): Ditto.
	(CPU_USER_MSR_FLAGS): Ditoo.
	(CPU_ANY_USER_MSR_FLAGS): Ditto.
	(i386_cpu_flags): Add cpuuser_msr.
	* i386-opc.tbl: Add USER_MSR instructions.
	* i386-tbl.h: Regenerated.
---
 gas/NEWS                                      |    2 +
 gas/config/tc-i386.c                          |   15 +
 gas/doc/c-i386.texi                           |    3 +-
 gas/testsuite/gas/i386/i386.exp               |    1 +
 gas/testsuite/gas/i386/user_msr-inval.l       |    3 +
 gas/testsuite/gas/i386/user_msr-inval.s       |    7 +
 .../gas/i386/x86-64-user_msr-intel.d          |   18 +
 gas/testsuite/gas/i386/x86-64-user_msr.d      |   18 +
 gas/testsuite/gas/i386/x86-64-user_msr.s      |   15 +
 gas/testsuite/gas/i386/x86-64.exp             |    2 +
 opcodes/i386-dis.c                            |  387 ++++-
 opcodes/i386-gen.c                            |    2 +
 opcodes/i386-init.h                           |  816 +++++------
 opcodes/i386-mnem.h                           | 1278 +++++++++--------
 opcodes/i386-opc.h                            |   10 +
 opcodes/i386-opc.tbl                          |   12 +
 opcodes/i386-tbl.h                            |  272 ++--
 17 files changed, 1698 insertions(+), 1163 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/user_msr-inval.l
 create mode 100644 gas/testsuite/gas/i386/user_msr-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-user_msr-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-user_msr.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-user_msr.s
  

Comments

Jan Beulich Oct. 16, 2023, 12:11 p.m. UTC | #1
On 10.10.2023 09:24, Hu, Lin1 wrote:
> @@ -3863,6 +3864,8 @@ build_vex_prefix (const insn_template *t)
>  	case SPACE_0F:
>  	case SPACE_0F38:
>  	case SPACE_0F3A:
> +	case SPACE_EVEXMAP5:
> +	case SPACE_VEXMAP7:
>  	  i.vex.bytes[0] = 0xc4;
>  	  break;

I can see the need for the latter line you add, but why the former?
(If it is needed for some reason, this is a strong hint at the description
being overly brief.)

> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
>        source = v;
>        v = tmp;
>      }
> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
> +    {
> +      if (dest == (unsigned int) ~0)
> +	source = source ^ 1;
> +      else
> +	{
> +	  unsigned int tmp = source;
> +
> +	  source = dest;
> +	  dest = tmp;
> +	}
> +    }

Why is this needed? There's only a single register operand in both
affected insn forms (see comment below on the 2-register form).
Furthermore I think it would be easier if you "canonicalized" the
early immediate to be the 1st operand, such that for all other
purposes immediates remain first.

As a cosmetic nit: Please have a blank line ahead of the if() block
(if it needs to stay).

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/user_msr-inval.s
> @@ -0,0 +1,7 @@
> +# Check Illegal 32bit USER_MSR instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	urdmsr	%r12, %r14	 #USER_MSR
> +	uwrmsr	%r12, %r14	 #USER_MSR

As per comments on earlier series: What use are the comments here?
(Applicable also again below.)

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> @@ -0,0 +1,15 @@
> +# Check 64bit USER_MSR instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	urdmsr	%r14, %r12	 #USER_MSR
> +	urdmsr	$51515151, %r12	 #USER_MSR
> +	uwrmsr	%r12, %r14	 #USER_MSR
> +	uwrmsr	%r12, $51515151	 #USER_MSR
> +
> +.intel_syntax noprefix

Nit: Please indent directives.

> +	urdmsr	r12, r14	 #USER_MSR
> +	urdmsr	r12, 51515151	 #USER_MSR
> +	uwrmsr	r14, r12	 #USER_MSR
> +	uwrmsr	51515151, r12	 #USER_MSR

I think varying registers slightly more (such that each two-register
form has one low-8 and one high-8 operand, totaling to two forms each
to prove that the REX.[RB] bits are also correctly dealt with) would
be better.

Btw, what's the interaction here with APX? The legacy forms are going
to use REX2, but the VEX forms would need EVEX variants then.

> @@ -618,6 +620,8 @@ enum
>    w_mode,
>    /* double word operand  */
>    d_mode,
> +  /* double word operand 0 */
> +  d_0_mode,

Why is this needed? IOW why does d_mode not do? Or alternatively why
isn't this a name indicating that it's an unsigned 32-bit value (as
opposed to other 32-bit immediates in 64-bit mode)?

> @@ -845,6 +849,7 @@ enum
>    REG_VEX_0FAE,
>    REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
>    REG_VEX_0F38F3_L_0,
> +  REG_VEX_MAP7_F8_L_0_W_0_M_1,
>  
>    REG_XOP_09_01_L_0,
>    REG_XOP_09_02_L_0,
> @@ -893,8 +898,10 @@ enum
>    MOD_0FC7_REG_6,
>    MOD_0FC7_REG_7,
>    MOD_0F38DC_PREFIX_1,
> +  MOD_0F38F8,
>  
>    MOD_VEX_0F3849_X86_64_L_0_W_0,
> +  MOD_VEX_MAP7_F8_L_0_W_0,
>  };

As before - no new mod_table[] entries please which don't have both
branches populated.

> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>    },
> +  /* VEX_MAP7 */
> +  {
> +    /* 00 */
> +    { Bad_Opcode },

I wonder whether adding a full new table (rather than some special case
code) is really a god use of space. Of course if you know that more of it
will be populated in the not too distant future ...

> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
>    return true;
>  }
>  
> +/* The function is used to get imm32, when imm32 is operand 0, and ins only has 2 operands. */
> +static bool
> +get32_operand0 (instr_info *ins, bfd_vma *res)
> +{
> +
> +  if (!fetch_code (ins->info, ins->codep + 5))
> +    return false;
> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
> +  return true;
> +}

Instead of this (which assumes ModRM.mod == 3) I think you want to
arrange for dealing with ModRM first. We already have OP_Skip_MODRM()
for such needs, which you could use in a first "hidden" operand.

> @@ -3346,3 +3349,12 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}
>  eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
>  
>  // FRED instructions end.
> +
> +// USER_MSR instructions.
> +
> +urdmsr, 0xf20f38f8, USER_MSR|x64, Modrm|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Reg64 }

Iirc RegMem is the attribute to use here, not any new one.

> +urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|IgnoreSize|NoSuf, { Imm32S, Reg64 }

This and ...

> +uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|IgnoreSize|NoSuf, { Reg64, Reg64 }
> +uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Imm32S }

... this needs to use Imm32, not Imm32S. I understand this is going to
cause complications elsewhere, but we can't afford getting this wrong.

Also in all forms I think you don't mean IgnoreSize, but NoRex64.

Jan
  
Hu, Lin1 Oct. 18, 2023, 7:51 a.m. UTC | #2
Thanks for your review. I responded individually under each comment.
Attached is the modified version.

BRs,
Lin
-----Original Message-----
From: Jan Beulich <jbeulich@suse.com> 
Sent: Monday, October 16, 2023 8:11 PM
To: Hu, Lin1 <lin1.hu@intel.com>
Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
Subject: Re: [PATCH] Support Intel USER_MSR

On 10.10.2023 09:24, Hu, Lin1 wrote:
> @@ -3863,6 +3864,8 @@ build_vex_prefix (const insn_template *t)
>  	case SPACE_0F:
>  	case SPACE_0F38:
>  	case SPACE_0F3A:
> +	case SPACE_EVEXMAP5:
> +	case SPACE_VEXMAP7:
>  	  i.vex.bytes[0] = 0xc4;
>  	  break;

> I can see the need for the latter line you add, but why the former?
> (If it is needed for some reason, this is a strong hint at the description being overly brief.)

I think it's a wrong that is made by reordering patches. I have removed it.

> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
>        source = v;
>        v = tmp;
>      }
> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
> +    {
> +      if (dest == (unsigned int) ~0)
> +	source = source ^ 1;
> +      else
> +	{
> +	  unsigned int tmp = source;
> +
> +	  source = dest;
> +	  dest = tmp;
> +	}
> +    }

> Why is this needed? There's only a single register operand in both affected insn forms (see comment below on the 2-register form).
Furthermore I think it would be easier if you "canonicalized" the early immediate to be the 1st operand, such that for all other purposes immediates remain first.

> As a cosmetic nit: Please have a blank line ahead of the if() block (if it needs to stay).

Indeed, I've only kept the part that deals with a single register. Do you mean to complain to the person who designed the insn. Unfortunately, that's impossible.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/user_msr-inval.s
> @@ -0,0 +1,7 @@
> +# Check Illegal 32bit USER_MSR instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	urdmsr	%r12, %r14	 #USER_MSR
> +	uwrmsr	%r12, %r14	 #USER_MSR

> As per comments on earlier series: What use are the comments here?
> (Applicable also again below.)

I have removed it.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> @@ -0,0 +1,15 @@
> +# Check 64bit USER_MSR instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	urdmsr	%r14, %r12	 #USER_MSR
> +	urdmsr	$51515151, %r12	 #USER_MSR
> +	uwrmsr	%r12, %r14	 #USER_MSR
> +	uwrmsr	%r12, $51515151	 #USER_MSR
> +
> +.intel_syntax noprefix

> Nit: Please indent directives.
Have removed these comments.

> +	urdmsr	r12, r14	 #USER_MSR
> +	urdmsr	r12, 51515151	 #USER_MSR
> +	uwrmsr	r14, r12	 #USER_MSR
> +	uwrmsr	51515151, r12	 #USER_MSR

> I think varying registers slightly more (such that each two-register form has one low-8 and one high-8 operand, totaling to two forms each to prove that the REX.[RB] bits are also correctly dealt with) would be better.

I have added some other tests here.

> Btw, what's the interaction here with APX? The legacy forms are going to use REX2, but the VEX forms would need EVEX variants then.

Not at the moment. If it gets added to the documentation later, I'll add its.

> @@ -618,6 +620,8 @@ enum
>    w_mode,
>    /* double word operand  */
>    d_mode,
> +  /* double word operand 0 */
> +  d_0_mode,

> Why is this needed? IOW why does d_mode not do? Or alternatively why isn't this a name indicating that it's an unsigned 32-bit value (as opposed to other 32-bit immediates in 64-bit mode)?

I want to output imm32 first, but modrm byte is behind imm32, so I need to skip modrm for the moment. If I use { "uwrmsr", { Id, Eq }, 0} Id will read modrm byte. The mode is for a imm32 not an unsigned 32-bit value, so its name doesn't indicate that it's an unsigned integer.

> @@ -845,6 +849,7 @@ enum
>    REG_VEX_0FAE,
>    REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
>    REG_VEX_0F38F3_L_0,
> +  REG_VEX_MAP7_F8_L_0_W_0_M_1,
>  
>    REG_XOP_09_01_L_0,
>    REG_XOP_09_02_L_0,
> @@ -893,8 +898,10 @@ enum
>    MOD_0FC7_REG_6,
>    MOD_0FC7_REG_7,
>    MOD_0F38DC_PREFIX_1,
> +  MOD_0F38F8,
>  
>    MOD_VEX_0F3849_X86_64_L_0_W_0,
> +  MOD_VEX_MAP7_F8_L_0_W_0,
>  };

> As before - no new mod_table[] entries please which don't have both branches populated.

Have removed it.

+  {
+    /* MOD_VEX_MAP7_F8_L_0_W_0 */
+    { Bad_Opcode },
+    { REG_TABLE (REG_VEX_MAP7_F8_L_0_W_0_M_1) },
+  },

> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>    },
> +  /* VEX_MAP7 */
> +  {
> +    /* 00 */
> +    { Bad_Opcode },

> I wonder whether adding a full new table (rather than some special case
> code) is really a god use of space. Of course if you know that more of it will be populated in the not too distant future ...

I don't know, I'm just treating the new opcode_space MAP7 like other opcode_space. 

> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
>    return true;
>  }
>  
> +/* The function is used to get imm32, when imm32 is operand 0, and 
> +ins only has 2 operands. */ static bool
> +get32_operand0 (instr_info *ins, bfd_vma *res) {
> +
> +  if (!fetch_code (ins->info, ins->codep + 5))
> +    return false;
> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
> +  return true;
> +}

> Instead of this (which assumes ModRM.mod == 3) I think you want to arrange for dealing with ModRM first. We already have OP_Skip_MODRM() for such needs, which you could use in a first "hidden" operand.

I want to output imm32 first, but modrm byte is behind imm32, so I need to skip modrm for the moment. but I can't use OP_Skip_MODRM to deal with my problem, If I use it, I should add ins->codep-- at the end of get32_operand0.

> @@ -3346,3 +3349,12 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}  eretu, 
> 0xf30f01ca, FRED|x64, NoSuf, {}
>  
>  // FRED instructions end.
> +
> +// USER_MSR instructions.
> +
> +urdmsr, 0xf20f38f8, USER_MSR|x64, 
> +Modrm|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Reg64 }

> Iirc RegMem is the attribute to use here, not any new one.

Indeed.

> +urdmsr, 0xf2f8/0, USER_MSR|x64, 
> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|NoSuf, { Imm32S, Reg64 }

> This and ...

> +uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|IgnoreSize|NoSuf, { Reg64, 
> +Reg64 } uwrmsr, 0xf3f8/0, USER_MSR|x64, 
> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, 
> +Imm32S }

> ... this needs to use Imm32, not Imm32S. I understand this is going to cause complications elsewhere, but we can't afford getting this wrong.

> Also in all forms I think you don't mean IgnoreSize, but NoRex64.

Have modified them.

Jan
  
Jan Beulich Oct. 19, 2023, 8:36 a.m. UTC | #3
On 18.10.2023 09:51, Hu, Lin1 wrote:
> Thanks for your review. I responded individually under each comment.
> Attached is the modified version.

Please can you send proper new versions, to allow easy commenting?

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com> 
> Sent: Monday, October 16, 2023 8:11 PM
> 
> On 10.10.2023 09:24, Hu, Lin1 wrote:
>> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
>>        source = v;
>>        v = tmp;
>>      }
>> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
>> +    {
>> +      if (dest == (unsigned int) ~0)
>> +	source = source ^ 1;
>> +      else
>> +	{
>> +	  unsigned int tmp = source;
>> +
>> +	  source = dest;
>> +	  dest = tmp;
>> +	}
>> +    }
> 
>> Why is this needed? There's only a single register operand in both affected insn forms (see comment below on the 2-register form).
> Furthermore I think it would be easier if you "canonicalized" the early immediate to be the 1st operand, such that for all other purposes immediates remain first.
> 
>> As a cosmetic nit: Please have a blank line ahead of the if() block (if it needs to stay).
> 
> Indeed, I've only kept the part that deals with a single register. Do you mean to complain to the person who designed the insn. Unfortunately, that's impossible.

I'm having trouble connecting your reply to what I wrote. No, I do not
mean to complain; I can certainly see why the immediate is wanted as
first (Intel) / last (AT&T) operand.

I'm also not happy about the new change to build_modrm_byte(). When
asking to "canonicalize" operands, I meant to gave this generalized,
with SWAP_SOURCE_DEST dropped completely. (This will then also save
me from complaining about a missing blank in SwapSourceDest's
#define.)

>> --- /dev/null
>> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
>> @@ -0,0 +1,15 @@
>> +# Check 64bit USER_MSR instructions
>> +
>> +	.allow_index_reg
>> +	.text
>> +_start:
>> +	urdmsr	%r14, %r12	 #USER_MSR
>> +	urdmsr	$51515151, %r12	 #USER_MSR
>> +	uwrmsr	%r12, %r14	 #USER_MSR
>> +	uwrmsr	%r12, $51515151	 #USER_MSR
>> +
>> +.intel_syntax noprefix
> 
>> Nit: Please indent directives.
> Have removed these comments.

Neither does your reply fit here, nor did you what I've asked for
here (in addition to the earlier, wider request of dropping
meaningless comments).

>> +	urdmsr	r12, r14	 #USER_MSR
>> +	urdmsr	r12, 51515151	 #USER_MSR
>> +	uwrmsr	r14, r12	 #USER_MSR
>> +	uwrmsr	51515151, r12	 #USER_MSR
> 
>> I think varying registers slightly more (such that each two-register form has one low-8 and one high-8 operand, totaling to two forms each to prove that the REX.[RB] bits are also correctly dealt with) would be better.
> 
> I have added some other tests here.

Thanks.

>> @@ -618,6 +620,8 @@ enum
>>    w_mode,
>>    /* double word operand  */
>>    d_mode,
>> +  /* double word operand 0 */
>> +  d_0_mode,
> 
>> Why is this needed? IOW why does d_mode not do? Or alternatively why isn't this a name indicating that it's an unsigned 32-bit value (as opposed to other 32-bit immediates in 64-bit mode)?
> 
> I want to output imm32 first, but modrm byte is behind imm32, so I need to skip modrm for the moment. If I use { "uwrmsr", { Id, Eq }, 0} Id will read modrm byte. The mode is for a imm32 not an unsigned 32-bit value, so its name doesn't indicate that it's an unsigned integer.

But the immediate is an unsigned 32-bit one. Signed ones would be sign-
extended, which isn't the case here (or else the range 0x80000000 ...
0xffffffff wouldn't form valid MSR numbers).

>> @@ -845,6 +849,7 @@ enum
>>    REG_VEX_0FAE,
>>    REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
>>    REG_VEX_0F38F3_L_0,
>> +  REG_VEX_MAP7_F8_L_0_W_0_M_1,
>>  
>>    REG_XOP_09_01_L_0,
>>    REG_XOP_09_02_L_0,
>> @@ -893,8 +898,10 @@ enum
>>    MOD_0FC7_REG_6,
>>    MOD_0FC7_REG_7,
>>    MOD_0F38DC_PREFIX_1,
>> +  MOD_0F38F8,
>>  
>>    MOD_VEX_0F3849_X86_64_L_0_W_0,
>> +  MOD_VEX_MAP7_F8_L_0_W_0,
>>  };
> 
>> As before - no new mod_table[] entries please which don't have both branches populated.
> 
> Have removed it.

But you're using Eq there, i.e. permitting memory operands as well.
What (I think) you want is Rq. For consistency this may then also
want using in the new PREFIX_0F38F8_M_1_X86_64 entry. But of course
you need to be careful about the collision with Nq.

>> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
>>      { Bad_Opcode },
>>      { Bad_Opcode },
>>    },
>> +  /* VEX_MAP7 */
>> +  {
>> +    /* 00 */
>> +    { Bad_Opcode },
> 
>> I wonder whether adding a full new table (rather than some special case
>> code) is really a god use of space. Of course if you know that more of it will be populated in the not too distant future ...
> 
> I don't know, I'm just treating the new opcode_space MAP7 like other opcode_space. 

I'm afraid that "I don't know" is not an answer here. You can basically
take two positions: Mine (waste of space) or you justify why the extra
space used (and the extra runtime relocations added) aren't of concern.

>> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
>>    return true;
>>  }
>>  
>> +/* The function is used to get imm32, when imm32 is operand 0, and 
>> +ins only has 2 operands. */ static bool
>> +get32_operand0 (instr_info *ins, bfd_vma *res) {
>> +
>> +  if (!fetch_code (ins->info, ins->codep + 5))
>> +    return false;
>> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
>> +  return true;
>> +}
> 
>> Instead of this (which assumes ModRM.mod == 3) I think you want to arrange for dealing with ModRM first. We already have OP_Skip_MODRM() for such needs, which you could use in a first "hidden" operand.
> 
> I want to output imm32 first, but modrm byte is behind imm32, so I need to skip modrm for the moment. but I can't use OP_Skip_MODRM to deal with my problem, If I use it, I should add ins->codep-- at the end of get32_operand0.

get32_operand0() should go away imo; at the very least I don't agree
with special casing it being operand 0 (which, as you realize, is true
only in the textual representation, and only in Intel syntax, but in
particular not in the encoding). It may be necessary to special case
it being an unsigned immediate, but get32() already fits that purpose.

>> @@ -3346,3 +3349,12 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}  eretu, 
>> 0xf30f01ca, FRED|x64, NoSuf, {}
>>  
>>  // FRED instructions end.
>> +
>> +// USER_MSR instructions.
>> +
>> +urdmsr, 0xf20f38f8, USER_MSR|x64, 
>> +Modrm|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Reg64 }
> 
>> Iirc RegMem is the attribute to use here, not any new one.
> 
> Indeed.
> 
>> +urdmsr, 0xf2f8/0, USER_MSR|x64, 
>> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|NoSuf, { Imm32S, Reg64 }
> 
>> This and ...
> 
>> +uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|IgnoreSize|NoSuf, { Reg64, 
>> +Reg64 } uwrmsr, 0xf3f8/0, USER_MSR|x64, 
>> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, 
>> +Imm32S }
> 
>> ... this needs to use Imm32, not Imm32S. I understand this is going to cause complications elsewhere, but we can't afford getting this wrong.

I see you've corrected this, but it doesn't look to me as if the
match_template() change is actually going to do the job. You can't
change i.types[] without looking at the actual values. I think this
needs dealing with in optimize_imm() and/or finalize_imm()
(irrespective of it's name more likely the former, but knock-on
adjustments to the latter may then turn out to be needed).

>> Also in all forms I think you don't mean IgnoreSize, but NoRex64.
> 
> Have modified them.

Hmm, I may have mislead you: NoRex64 is meaningless on VEX encodings.
There only the IgnoreSize needed dropping. I'm sorry.

Jan
  
Hu, Lin1 Oct. 24, 2023, 8:38 a.m. UTC | #4
Thanks for your advice. I've tentatively made some changes based on your comments, 
but it's not completely finalized yet, I'd like to consult with you for your opinion, and 
I'll send out a new patch after it's basically confirmed.

BRs,
Lin

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, October 19, 2023 4:36 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
> Subject: Re: [PATCH] Support Intel USER_MSR
> 
> On 18.10.2023 09:51, Hu, Lin1 wrote:
> > Thanks for your review. I responded individually under each comment.
> > Attached is the modified version.
> 
> Please can you send proper new versions, to allow easy commenting?
> 
> > -----Original Message-----
> > From: Jan Beulich <jbeulich@suse.com>
> > Sent: Monday, October 16, 2023 8:11 PM
> >
> > On 10.10.2023 09:24, Hu, Lin1 wrote:
> >> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
> >>        source = v;
> >>        v = tmp;
> >>      }
> >> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
> >> +    {
> >> +      if (dest == (unsigned int) ~0)
> >> +	source = source ^ 1;
> >> +      else
> >> +	{
> >> +	  unsigned int tmp = source;
> >> +
> >> +	  source = dest;
> >> +	  dest = tmp;
> >> +	}
> >> +    }
> >
> >> Why is this needed? There's only a single register operand in both affected
> insn forms (see comment below on the 2-register form).
> > Furthermore I think it would be easier if you "canonicalized" the early
> immediate to be the 1st operand, such that for all other purposes immediates
> remain first.
> >
> >> As a cosmetic nit: Please have a blank line ahead of the if() block (if it needs
> to stay).
> >
> > Indeed, I've only kept the part that deals with a single register. Do you mean to
> complain to the person who designed the insn. Unfortunately, that's impossible.
> 
> I'm having trouble connecting your reply to what I wrote. No, I do not mean to
> complain; I can certainly see why the immediate is wanted as first (Intel) / last
> (AT&T) operand.
> 
> I'm also not happy about the new change to build_modrm_byte(). When asking
> to "canonicalize" operands, I meant to gave this generalized, with
> SWAP_SOURCE_DEST dropped completely. (This will then also save me from
> complaining about a missing blank in SwapSourceDest's
> #define.)
> 

OK, I implemented a basic logic to handle the current situation, and included remarks.
Like:

   /* If the last operand is an immediate number (ATT), we need to modify
       the source operand accordingly. If any instructions use other immediate
       (imm8, imm16, etc.) as the last operand, we must update the constraint.  */
    if (i.types[source].bitfield.imm32 == 1)
      source--;

What's your opinion on this version?

> >> --- /dev/null
> >> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> >> @@ -0,0 +1,15 @@
> >> +# Check 64bit USER_MSR instructions
> >> +
> >> +	.allow_index_reg
> >> +	.text
> >> +_start:
> >> +	urdmsr	%r14, %r12	 #USER_MSR
> >> +	urdmsr	$51515151, %r12	 #USER_MSR
> >> +	uwrmsr	%r12, %r14	 #USER_MSR
> >> +	uwrmsr	%r12, $51515151	 #USER_MSR
> >> +
> >> +.intel_syntax noprefix
> >
> >> Nit: Please indent directives.
> > Have removed these comments.
> 
> Neither does your reply fit here, nor did you what I've asked for here (in addition
> to the earlier, wider request of dropping meaningless comments).
> 

Perhaps there's an issue with my interface, but it seems to me that the
instruction now begins with a \t instead of two spaces for the updated patch.

> >> +	urdmsr	r12, r14	 #USER_MSR
> >> +	urdmsr	r12, 51515151	 #USER_MSR
> >> +	uwrmsr	r14, r12	 #USER_MSR
> >> +	uwrmsr	51515151, r12	 #USER_MSR
> >
> >> I think varying registers slightly more (such that each two-register form has
> one low-8 and one high-8 operand, totaling to two forms each to prove that the
> REX.[RB] bits are also correctly dealt with) would be better.
> >
> > I have added some other tests here.
> 
> Thanks.
> 
> >> @@ -618,6 +620,8 @@ enum
> >>    w_mode,
> >>    /* double word operand  */
> >>    d_mode,
> >> +  /* double word operand 0 */
> >> +  d_0_mode,
> >
> >> Why is this needed? IOW why does d_mode not do? Or alternatively why isn't
> this a name indicating that it's an unsigned 32-bit value (as opposed to other 32-
> bit immediates in 64-bit mode)?
> >
> > I want to output imm32 first, but modrm byte is behind imm32, so I need to
> skip modrm for the moment. If I use { "uwrmsr", { Id, Eq }, 0} Id will read modrm
> byte. The mode is for a imm32 not an unsigned 32-bit value, so its name doesn't
> indicate that it's an unsigned integer.
> 
> But the immediate is an unsigned 32-bit one. Signed ones would be sign-
> extended, which isn't the case here (or else the range 0x80000000 ...
> 0xffffffff wouldn't form valid MSR numbers).

OK, you are right.

> 
> >> @@ -845,6 +849,7 @@ enum
> >>    REG_VEX_0FAE,
> >>    REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
> >>    REG_VEX_0F38F3_L_0,
> >> +  REG_VEX_MAP7_F8_L_0_W_0_M_1,
> >>
> >>    REG_XOP_09_01_L_0,
> >>    REG_XOP_09_02_L_0,
> >> @@ -893,8 +898,10 @@ enum
> >>    MOD_0FC7_REG_6,
> >>    MOD_0FC7_REG_7,
> >>    MOD_0F38DC_PREFIX_1,
> >> +  MOD_0F38F8,
> >>
> >>    MOD_VEX_0F3849_X86_64_L_0_W_0,
> >> +  MOD_VEX_MAP7_F8_L_0_W_0,
> >>  };
> >
> >> As before - no new mod_table[] entries please which don't have both
> branches populated.
> >
> > Have removed it.
> 
> But you're using Eq there, i.e. permitting memory operands as well.
> What (I think) you want is Rq. For consistency this may then also want using in
> the new PREFIX_0F38F8_M_1_X86_64 entry. But of course you need to be
> careful about the collision with Nq.

I change the Nq's mode from q_mode to q_mm_mode and create Rq ( OP_R, q_mode ) for used.

> 
> >> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
> >>      { Bad_Opcode },
> >>      { Bad_Opcode },
> >>    },
> >> +  /* VEX_MAP7 */
> >> +  {
> >> +    /* 00 */
> >> +    { Bad_Opcode },
> >
> >> I wonder whether adding a full new table (rather than some special
> >> case
> >> code) is really a god use of space. Of course if you know that more of it will
> be populated in the not too distant future ...
> >
> > I don't know, I'm just treating the new opcode_space MAP7 like other
> opcode_space.
> 
> I'm afraid that "I don't know" is not an answer here. You can basically take two
> positions: Mine (waste of space) or you justify why the extra space used (and the
> extra runtime relocations added) aren't of concern.
> 

OK, I will skip MAP7 table. If vex_table_index = VEX_MAP7 and index == f8
dp will be VEX_LEN_MAP7_F8. So I don't need to add a full new table untill other MAP7
instructions raise.

> >> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
> >>    return true;
> >>  }
> >>
> >> +/* The function is used to get imm32, when imm32 is operand 0, and
> >> +ins only has 2 operands. */ static bool
> >> +get32_operand0 (instr_info *ins, bfd_vma *res) {
> >> +
> >> +  if (!fetch_code (ins->info, ins->codep + 5))
> >> +    return false;
> >> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
> >> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
> >> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
> >> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
> >> +  return true;
> >> +}
> >
> >> Instead of this (which assumes ModRM.mod == 3) I think you want to arrange
> for dealing with ModRM first. We already have OP_Skip_MODRM() for such
> needs, which you could use in a first "hidden" operand.
> >
> > I want to output imm32 first, but modrm byte is behind imm32, so I need to
> skip modrm for the moment. but I can't use OP_Skip_MODRM to deal with my
> problem, If I use it, I should add ins->codep-- at the end of get32_operand0.
> 
> get32_operand0() should go away imo; at the very least I don't agree with
> special casing it being operand 0 (which, as you realize, is true only in the textual
> representation, and only in Intel syntax, but in particular not in the encoding). It
> may be necessary to special case it being an unsigned immediate, but get32()
> already fits that purpose.

I've thought of it so far is I can use a Fixup function like

static bool
uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag)
{
    if (bytemode == d_mode)
      {
        if (OP_Skip_MODRM (ins, 0, sizeflag))
          {
            if (OP_I (ins, bytemode, sizeflag))
              {
                ins->codep--;
              }
              return true;
          }
      }
    return false;
}

Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup, d_mode }, Rq }, 0 }.
What‘s your opinion?

> 
> >> @@ -3346,3 +3349,12 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}  eretu,
> >> 0xf30f01ca, FRED|x64, NoSuf, {}
> >>
> >>  // FRED instructions end.
> >> +
> >> +// USER_MSR instructions.
> >> +
> >> +urdmsr, 0xf20f38f8, USER_MSR|x64,
> >> +Modrm|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Reg64 }
> >
> >> Iirc RegMem is the attribute to use here, not any new one.
> >
> > Indeed.
> >
> >> +urdmsr, 0xf2f8/0, USER_MSR|x64,
> >> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|NoSuf, { Imm32S, Reg64 }
> >
> >> This and ...
> >
> >> +uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|IgnoreSize|NoSuf, { Reg64,
> >> +Reg64 } uwrmsr, 0xf3f8/0, USER_MSR|x64,
> >> +Modrm|Vex128|VexMap7|VexW0|IgnoreSize|SwapSourceDest|NoSuf,
> { Reg64,
> >> +Imm32S }
> >
> >> ... this needs to use Imm32, not Imm32S. I understand this is going to cause
> complications elsewhere, but we can't afford getting this wrong.
> 
> I see you've corrected this, but it doesn't look to me as if the
> match_template() change is actually going to do the job. You can't change
> i.types[] without looking at the actual values. I think this needs dealing with in
> optimize_imm() and/or finalize_imm() (irrespective of it's name more likely the
> former, but knock-on adjustments to the latter may then turn out to be needed).
> 

OK.

> >> Also in all forms I think you don't mean IgnoreSize, but NoRex64.
> >
> > Have modified them.
> 
> Hmm, I may have mislead you: NoRex64 is meaningless on VEX encodings.
> There only the IgnoreSize needed dropping. I'm sorry.
> 
> Jan
  
Jan Beulich Oct. 24, 2023, 8:55 a.m. UTC | #5
On 24.10.2023 10:38, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Thursday, October 19, 2023 4:36 PM
>>
>> On 18.10.2023 09:51, Hu, Lin1 wrote:
>>> -----Original Message-----
>>> From: Jan Beulich <jbeulich@suse.com>
>>> Sent: Monday, October 16, 2023 8:11 PM
>>>
>>> On 10.10.2023 09:24, Hu, Lin1 wrote:
>>>> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
>>>>        source = v;
>>>>        v = tmp;
>>>>      }
>>>> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
>>>> +    {
>>>> +      if (dest == (unsigned int) ~0)
>>>> +	source = source ^ 1;
>>>> +      else
>>>> +	{
>>>> +	  unsigned int tmp = source;
>>>> +
>>>> +	  source = dest;
>>>> +	  dest = tmp;
>>>> +	}
>>>> +    }
>>>
>>>> Why is this needed? There's only a single register operand in both affected
>> insn forms (see comment below on the 2-register form).
>>> Furthermore I think it would be easier if you "canonicalized" the early
>> immediate to be the 1st operand, such that for all other purposes immediates
>> remain first.
>>>
>>>> As a cosmetic nit: Please have a blank line ahead of the if() block (if it needs
>> to stay).
>>>
>>> Indeed, I've only kept the part that deals with a single register. Do you mean to
>> complain to the person who designed the insn. Unfortunately, that's impossible.
>>
>> I'm having trouble connecting your reply to what I wrote. No, I do not mean to
>> complain; I can certainly see why the immediate is wanted as first (Intel) / last
>> (AT&T) operand.
>>
>> I'm also not happy about the new change to build_modrm_byte(). When asking
>> to "canonicalize" operands, I meant to gave this generalized, with
>> SWAP_SOURCE_DEST dropped completely. (This will then also save me from
>> complaining about a missing blank in SwapSourceDest's
>> #define.)
>>
> 
> OK, I implemented a basic logic to handle the current situation, and included remarks.
> Like:
> 
>    /* If the last operand is an immediate number (ATT), we need to modify
>        the source operand accordingly. If any instructions use other immediate
>        (imm8, imm16, etc.) as the last operand, we must update the constraint.  */
>     if (i.types[source].bitfield.imm32 == 1)
>       source--;
> 
> What's your opinion on this version?

As before - build_modrm_byte() is the wrong place to make the intended
(generalized) adjustment. You want to move the unusually placed imm
operand to its usual place as soon as you're done with matching input
against the respective template.

>>>> --- /dev/null
>>>> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
>>>> @@ -0,0 +1,15 @@
>>>> +# Check 64bit USER_MSR instructions
>>>> +
>>>> +	.allow_index_reg
>>>> +	.text
>>>> +_start:
>>>> +	urdmsr	%r14, %r12	 #USER_MSR
>>>> +	urdmsr	$51515151, %r12	 #USER_MSR
>>>> +	uwrmsr	%r12, %r14	 #USER_MSR
>>>> +	uwrmsr	%r12, $51515151	 #USER_MSR
>>>> +
>>>> +.intel_syntax noprefix
>>>
>>>> Nit: Please indent directives.
>>> Have removed these comments.
>>
>> Neither does your reply fit here, nor did you what I've asked for here (in addition
>> to the earlier, wider request of dropping meaningless comments).
>>
> 
> Perhaps there's an issue with my interface, but it seems to me that the
> instruction now begins with a \t instead of two spaces for the updated patch.

Not that you say "instruction" when I said "directive". My comment
wasn't about any instruction, but about the ".intel_syntax noprefix"
line.

>>>> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
>>>>      { Bad_Opcode },
>>>>      { Bad_Opcode },
>>>>    },
>>>> +  /* VEX_MAP7 */
>>>> +  {
>>>> +    /* 00 */
>>>> +    { Bad_Opcode },
>>>
>>>> I wonder whether adding a full new table (rather than some special
>>>> case
>>>> code) is really a god use of space. Of course if you know that more of it will
>> be populated in the not too distant future ...
>>>
>>> I don't know, I'm just treating the new opcode_space MAP7 like other
>> opcode_space.
>>
>> I'm afraid that "I don't know" is not an answer here. You can basically take two
>> positions: Mine (waste of space) or you justify why the extra space used (and the
>> extra runtime relocations added) aren't of concern.
>>
> 
> OK, I will skip MAP7 table. If vex_table_index = VEX_MAP7 and index == f8
> dp will be VEX_LEN_MAP7_F8. So I don't need to add a full new table untill other MAP7
> instructions raise.

H.J., before we go this route, what's your view here?

>>>> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
>>>>    return true;
>>>>  }
>>>>
>>>> +/* The function is used to get imm32, when imm32 is operand 0, and
>>>> +ins only has 2 operands. */ static bool
>>>> +get32_operand0 (instr_info *ins, bfd_vma *res) {
>>>> +
>>>> +  if (!fetch_code (ins->info, ins->codep + 5))
>>>> +    return false;
>>>> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
>>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
>>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
>>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
>>>> +  return true;
>>>> +}
>>>
>>>> Instead of this (which assumes ModRM.mod == 3) I think you want to arrange
>> for dealing with ModRM first. We already have OP_Skip_MODRM() for such
>> needs, which you could use in a first "hidden" operand.
>>>
>>> I want to output imm32 first, but modrm byte is behind imm32, so I need to
>> skip modrm for the moment. but I can't use OP_Skip_MODRM to deal with my
>> problem, If I use it, I should add ins->codep-- at the end of get32_operand0.
>>
>> get32_operand0() should go away imo; at the very least I don't agree with
>> special casing it being operand 0 (which, as you realize, is true only in the textual
>> representation, and only in Intel syntax, but in particular not in the encoding). It
>> may be necessary to special case it being an unsigned immediate, but get32()
>> already fits that purpose.
> 
> I've thought of it so far is I can use a Fixup function like
> 
> static bool
> uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag)
> {
>     if (bytemode == d_mode)
>       {
>         if (OP_Skip_MODRM (ins, 0, sizeflag))
>           {
>             if (OP_I (ins, bytemode, sizeflag))
>               {
>                 ins->codep--;
>               }
>               return true;
>           }
>       }
>     return false;
> }
> 
> Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup, d_mode }, Rq }, 0 }.
> What‘s your opinion?

Hmm, not very nice, but I can't exclude it simply won't get any better.
My desire was for there to not be any new fixup function, and for
OP_Skip_MODRM to be used directly in the table entry. (In any event,
if you really need to keep this new function, please combine the three
if()-s into a single one, helping readability quite a bit.

Jan
  
Hu, Lin1 Oct. 24, 2023, 10:01 a.m. UTC | #6
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, October 24, 2023 4:56 PM
> To: Hu, Lin1 <lin1.hu@intel.com>; Lu, Hongjiu <hongjiu.lu@intel.com>
> Cc: binutils@sourceware.org
> Subject: Re: [PATCH] Support Intel USER_MSR
> 
> On 24.10.2023 10:38, Hu, Lin1 wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Thursday, October 19, 2023 4:36 PM
> >>
> >> On 18.10.2023 09:51, Hu, Lin1 wrote:
> >>> -----Original Message-----
> >>> From: Jan Beulich <jbeulich@suse.com>
> >>> Sent: Monday, October 16, 2023 8:11 PM
> >>>
> >>> On 10.10.2023 09:24, Hu, Lin1 wrote:
> >>>> @@ -8752,6 +8755,18 @@ build_modrm_byte (void)
> >>>>        source = v;
> >>>>        v = tmp;
> >>>>      }
> >>>> +  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
> >>>> +    {
> >>>> +      if (dest == (unsigned int) ~0)
> >>>> +	source = source ^ 1;
> >>>> +      else
> >>>> +	{
> >>>> +	  unsigned int tmp = source;
> >>>> +
> >>>> +	  source = dest;
> >>>> +	  dest = tmp;
> >>>> +	}
> >>>> +    }
> >>>
> >>>> Why is this needed? There's only a single register operand in both
> >>>> affected
> >> insn forms (see comment below on the 2-register form).
> >>> Furthermore I think it would be easier if you "canonicalized" the
> >>> early
> >> immediate to be the 1st operand, such that for all other purposes
> >> immediates remain first.
> >>>
> >>>> As a cosmetic nit: Please have a blank line ahead of the if() block
> >>>> (if it needs
> >> to stay).
> >>>
> >>> Indeed, I've only kept the part that deals with a single register.
> >>> Do you mean to
> >> complain to the person who designed the insn. Unfortunately, that's
> impossible.
> >>
> >> I'm having trouble connecting your reply to what I wrote. No, I do
> >> not mean to complain; I can certainly see why the immediate is wanted
> >> as first (Intel) / last
> >> (AT&T) operand.
> >>
> >> I'm also not happy about the new change to build_modrm_byte(). When
> >> asking to "canonicalize" operands, I meant to gave this generalized,
> >> with SWAP_SOURCE_DEST dropped completely. (This will then also save
> >> me from complaining about a missing blank in SwapSourceDest's
> >> #define.)
> >>
> >
> > OK, I implemented a basic logic to handle the current situation, and included
> remarks.
> > Like:
> >
> >    /* If the last operand is an immediate number (ATT), we need to modify
> >        the source operand accordingly. If any instructions use other immediate
> >        (imm8, imm16, etc.) as the last operand, we must update the constraint.
> */
> >     if (i.types[source].bitfield.imm32 == 1)
> >       source--;
> >
> > What's your opinion on this version?
> 
> As before - build_modrm_byte() is the wrong place to make the intended
> (generalized) adjustment. You want to move the unusually placed imm operand
> to its usual place as soon as you're done with matching input against the
> respective template.

Oh, I get you.
> 
> >>>> --- /dev/null
> >>>> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> >>>> @@ -0,0 +1,15 @@
> >>>> +# Check 64bit USER_MSR instructions
> >>>> +
> >>>> +	.allow_index_reg
> >>>> +	.text
> >>>> +_start:
> >>>> +	urdmsr	%r14, %r12	 #USER_MSR
> >>>> +	urdmsr	$51515151, %r12	 #USER_MSR
> >>>> +	uwrmsr	%r12, %r14	 #USER_MSR
> >>>> +	uwrmsr	%r12, $51515151	 #USER_MSR
> >>>> +
> >>>> +.intel_syntax noprefix
> >>>
> >>>> Nit: Please indent directives.
> >>> Have removed these comments.
> >>
> >> Neither does your reply fit here, nor did you what I've asked for
> >> here (in addition to the earlier, wider request of dropping meaningless
> comments).
> >>
> >
> > Perhaps there's an issue with my interface, but it seems to me that
> > the instruction now begins with a \t instead of two spaces for the updated
> patch.
> 
> Not that you say "instruction" when I said "directive". My comment wasn't
> about any instruction, but about the ".intel_syntax noprefix"
> line.
> 

Okay, thanks for the explanation.

> >>>> @@ -6791,6 +6839,297 @@ static const struct dis386 vex_table[][256] = {
> >>>>      { Bad_Opcode },
> >>>>      { Bad_Opcode },
> >>>>    },
> >>>> +  /* VEX_MAP7 */
> >>>> +  {
> >>>> +    /* 00 */
> >>>> +    { Bad_Opcode },
> >>>
> >>>> I wonder whether adding a full new table (rather than some special
> >>>> case
> >>>> code) is really a god use of space. Of course if you know that more
> >>>> of it will
> >> be populated in the not too distant future ...
> >>>
> >>> I don't know, I'm just treating the new opcode_space MAP7 like other
> >> opcode_space.
> >>
> >> I'm afraid that "I don't know" is not an answer here. You can
> >> basically take two
> >> positions: Mine (waste of space) or you justify why the extra space
> >> used (and the extra runtime relocations added) aren't of concern.
> >>
> >
> > OK, I will skip MAP7 table. If vex_table_index = VEX_MAP7 and index ==
> > f8 dp will be VEX_LEN_MAP7_F8. So I don't need to add a full new table
> > untill other MAP7 instructions raise.
> 
> H.J., before we go this route, what's your view here?
> 

Just mention: H.J. has a sabbatical until Nov 30 2023. I'm not sure if you got his e-mail.

> >>>> @@ -11248,6 +11609,20 @@ get32s (instr_info *ins, bfd_vma *res)
> >>>>    return true;
> >>>>  }
> >>>>
> >>>> +/* The function is used to get imm32, when imm32 is operand 0, and
> >>>> +ins only has 2 operands. */ static bool
> >>>> +get32_operand0 (instr_info *ins, bfd_vma *res) {
> >>>> +
> >>>> +  if (!fetch_code (ins->info, ins->codep + 5))
> >>>> +    return false;
> >>>> +  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
> >>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
> >>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
> >>>> +  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
> >>>> +  return true;
> >>>> +}
> >>>
> >>>> Instead of this (which assumes ModRM.mod == 3) I think you want to
> >>>> arrange
> >> for dealing with ModRM first. We already have OP_Skip_MODRM() for
> >> such needs, which you could use in a first "hidden" operand.
> >>>
> >>> I want to output imm32 first, but modrm byte is behind imm32, so I
> >>> need to
> >> skip modrm for the moment. but I can't use OP_Skip_MODRM to deal with
> >> my problem, If I use it, I should add ins->codep-- at the end of
> get32_operand0.
> >>
> >> get32_operand0() should go away imo; at the very least I don't agree
> >> with special casing it being operand 0 (which, as you realize, is
> >> true only in the textual representation, and only in Intel syntax,
> >> but in particular not in the encoding). It may be necessary to
> >> special case it being an unsigned immediate, but get32() already fits that
> purpose.
> >
> > I've thought of it so far is I can use a Fixup function like
> >
> > static bool
> > uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag) {
> >     if (bytemode == d_mode)
> >       {
> >         if (OP_Skip_MODRM (ins, 0, sizeflag))
> >           {
> >             if (OP_I (ins, bytemode, sizeflag))
> >               {
> >                 ins->codep--;
> >               }
> >               return true;
> >           }
> >       }
> >     return false;
> > }
> >
> > Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup, d_mode },
> Rq }, 0 }.
> > What‘s your opinion?
> 
> Hmm, not very nice, but I can't exclude it simply won't get any better.
> My desire was for there to not be any new fixup function, and for
> OP_Skip_MODRM to be used directly in the table entry. (In any event, if you
> really need to keep this new function, please combine the three if()-s into a
> single one, helping readability quite a bit.
> 

I have another idea, can I have a new function like

OP_back_codep(...)
{
	Ins->codep--;
	Return true;
}

So the uwrmsr's unit will be { "uwrmsr", { Skip_MODRM, Id, Back_Codep, Rq }, 0 }.

OK, thanks for your advices.

> Jan
  
Jan Beulich Oct. 24, 2023, 12:02 p.m. UTC | #7
On 24.10.2023 12:01, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, October 24, 2023 4:56 PM
>>
>> On 24.10.2023 10:38, Hu, Lin1 wrote:
>>> I've thought of it so far is I can use a Fixup function like
>>>
>>> static bool
>>> uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag) {
>>>     if (bytemode == d_mode)
>>>       {
>>>         if (OP_Skip_MODRM (ins, 0, sizeflag))
>>>           {
>>>             if (OP_I (ins, bytemode, sizeflag))
>>>               {
>>>                 ins->codep--;
>>>               }
>>>               return true;
>>>           }
>>>       }
>>>     return false;
>>> }
>>>
>>> Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup, d_mode },
>> Rq }, 0 }.
>>> What‘s your opinion?
>>
>> Hmm, not very nice, but I can't exclude it simply won't get any better.
>> My desire was for there to not be any new fixup function, and for
>> OP_Skip_MODRM to be used directly in the table entry. (In any event, if you
>> really need to keep this new function, please combine the three if()-s into a
>> single one, helping readability quite a bit.
>>
> 
> I have another idea, can I have a new function like
> 
> OP_back_codep(...)
> {
> 	Ins->codep--;
> 	Return true;
> }
> 
> So the uwrmsr's unit will be { "uwrmsr", { Skip_MODRM, Id, Back_Codep, Rq }, 0 }.

Well, the main thing I dislike is the decrementing of codep, no matter where
it's put. In case you don't think you can get away without, I guess I'll try
afterwards, aiming at an incremental change then.

Jan
  
Hu, Lin1 Oct. 25, 2023, 2:01 a.m. UTC | #8
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, October 24, 2023 8:02 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: binutils@sourceware.org; Lu, Hongjiu <hongjiu.lu@intel.com>
> Subject: Re: [PATCH] Support Intel USER_MSR
> 
> On 24.10.2023 12:01, Hu, Lin1 wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Tuesday, October 24, 2023 4:56 PM
> >>
> >> On 24.10.2023 10:38, Hu, Lin1 wrote:
> >>> I've thought of it so far is I can use a Fixup function like
> >>>
> >>> static bool
> >>> uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag) {
> >>>     if (bytemode == d_mode)
> >>>       {
> >>>         if (OP_Skip_MODRM (ins, 0, sizeflag))
> >>>           {
> >>>             if (OP_I (ins, bytemode, sizeflag))
> >>>               {
> >>>                 ins->codep--;
> >>>               }
> >>>               return true;
> >>>           }
> >>>       }
> >>>     return false;
> >>> }
> >>>
> >>> Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup,
> d_mode },
> >> Rq }, 0 }.
> >>> What‘s your opinion?
> >>
> >> Hmm, not very nice, but I can't exclude it simply won't get any better.
> >> My desire was for there to not be any new fixup function, and for
> >> OP_Skip_MODRM to be used directly in the table entry. (In any event,
> >> if you really need to keep this new function, please combine the
> >> three if()-s into a single one, helping readability quite a bit.
> >>
> >
> > I have another idea, can I have a new function like
> >
> > OP_back_codep(...)
> > {
> > 	Ins->codep--;
> > 	Return true;
> > }
> >
> > So the uwrmsr's unit will be { "uwrmsr", { Skip_MODRM, Id, Back_Codep, Rq },
> 0 }.
> 
> Well, the main thing I dislike is the decrementing of codep, no matter where it's
> put. In case you don't think you can get away without, I guess I'll try afterwards,
> aiming at an incremental change then.
> 

I have another one in mind at the moment. Can I have a bool variable in instr_info,

@@ -221,6 +221,9 @@ struct instr_info
   /* Record whether EVEX masking is used incorrectly.  */
   bool illegal_masking;

+  /* Record whether the modrm byte has been skipped.  */
+  bool has_skipped_modrm.
+
   unsigned char op_ad;

And the Skip mod/rm byte pattern will be 

@@ -11668,7 +11658,11 @@ OP_Skip_MODRM (instr_info *ins, int bytemode ATTRIBUTE_UNUSED,

   /* Skip mod/rm byte.  */
   MODRM_CHECK;
-  ins->codep++;
+  if (!ins->has_skipped_modrm)
+    {
+      ins->codep++;
+      ins->has_skipped_modrm = true;
+    }
   return true;
 }
.
This change will be applied to all other similar sections (include OP_E).

So the uwrmsr's unit will be { "uwrmsr", { Skip_MODRM, Id, Rq }, 0 }, because the codep won't increasing in Rq, I don't need the decrementing of codep.

> Jan
  
Jan Beulich Oct. 25, 2023, 8:48 a.m. UTC | #9
On 25.10.2023 04:01, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, October 24, 2023 8:02 PM
>> To: Hu, Lin1 <lin1.hu@intel.com>
>> Cc: binutils@sourceware.org; Lu, Hongjiu <hongjiu.lu@intel.com>
>> Subject: Re: [PATCH] Support Intel USER_MSR
>>
>> On 24.10.2023 12:01, Hu, Lin1 wrote:
>>>> -----Original Message-----
>>>> From: Jan Beulich <jbeulich@suse.com>
>>>> Sent: Tuesday, October 24, 2023 4:56 PM
>>>>
>>>> On 24.10.2023 10:38, Hu, Lin1 wrote:
>>>>> I've thought of it so far is I can use a Fixup function like
>>>>>
>>>>> static bool
>>>>> uwrmsr_Fixup (instr_info *ins, int bytemode, int sizeflag) {
>>>>>     if (bytemode == d_mode)
>>>>>       {
>>>>>         if (OP_Skip_MODRM (ins, 0, sizeflag))
>>>>>           {
>>>>>             if (OP_I (ins, bytemode, sizeflag))
>>>>>               {
>>>>>                 ins->codep--;
>>>>>               }
>>>>>               return true;
>>>>>           }
>>>>>       }
>>>>>     return false;
>>>>> }
>>>>>
>>>>> Then the uwrmsr's unit will be { "uwrmsr",       {	{ uwrmsr_Fixup,
>> d_mode },
>>>> Rq }, 0 }.
>>>>> What‘s your opinion?
>>>>
>>>> Hmm, not very nice, but I can't exclude it simply won't get any better.
>>>> My desire was for there to not be any new fixup function, and for
>>>> OP_Skip_MODRM to be used directly in the table entry. (In any event,
>>>> if you really need to keep this new function, please combine the
>>>> three if()-s into a single one, helping readability quite a bit.
>>>>
>>>
>>> I have another idea, can I have a new function like
>>>
>>> OP_back_codep(...)
>>> {
>>> 	Ins->codep--;
>>> 	Return true;
>>> }
>>>
>>> So the uwrmsr's unit will be { "uwrmsr", { Skip_MODRM, Id, Back_Codep, Rq },
>> 0 }.
>>
>> Well, the main thing I dislike is the decrementing of codep, no matter where it's
>> put. In case you don't think you can get away without, I guess I'll try afterwards,
>> aiming at an incremental change then.
>>
> 
> I have another one in mind at the moment. Can I have a bool variable in instr_info,
> 
> @@ -221,6 +221,9 @@ struct instr_info
>    /* Record whether EVEX masking is used incorrectly.  */
>    bool illegal_masking;
> 
> +  /* Record whether the modrm byte has been skipped.  */
> +  bool has_skipped_modrm.
> +
>    unsigned char op_ad;
> 
> And the Skip mod/rm byte pattern will be 
> 
> @@ -11668,7 +11658,11 @@ OP_Skip_MODRM (instr_info *ins, int bytemode ATTRIBUTE_UNUSED,
> 
>    /* Skip mod/rm byte.  */
>    MODRM_CHECK;
> -  ins->codep++;
> +  if (!ins->has_skipped_modrm)
> +    {
> +      ins->codep++;
> +      ins->has_skipped_modrm = true;
> +    }
>    return true;
>  }
> .
> This change will be applied to all other similar sections (include OP_E).

Yes, that's along the lines of what I had in mind.

Jan
  

Patch

diff --git a/gas/NEWS b/gas/NEWS
index 71a1269b893..ab0e7813f27 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@ 
 -*- text -*-
 
+* Add support for Intel USER_MSR instructions.
+
 * Add support for Intel AVX10.1.
 
 * Add support for Intel PBNDKB instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 714354d5116..2a7b1329858 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1164,6 +1164,7 @@  static const arch_entry cpu_arch[] =
   VECARCH (sm4, SM4, ANY_SM4, reset),
   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
+  SUBARCH (user_msr, USER_MSR, USER_MSR, false),
 };
 
 #undef SUBARCH
@@ -3863,6 +3864,8 @@  build_vex_prefix (const insn_template *t)
 	case SPACE_0F:
 	case SPACE_0F38:
 	case SPACE_0F3A:
+	case SPACE_EVEXMAP5:
+	case SPACE_VEXMAP7:
 	  i.vex.bytes[0] = 0xc4;
 	  break;
 	case SPACE_XOP08:
@@ -8752,6 +8755,18 @@  build_modrm_byte (void)
       source = v;
       v = tmp;
     }
+  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCE_DEST)
+    {
+      if (dest == (unsigned int) ~0)
+	source = source ^ 1;
+      else
+	{
+	  unsigned int tmp = source;
+
+	  source = dest;
+	  dest = tmp;
+	}
+    }
 
   if (v < MAX_OPERANDS)
     {
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index b04e1b00b4b..03ee980bef7 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -216,6 +216,7 @@  accept various extension mnemonics.  For example,
 @code{avx10.1/512},
 @code{avx10.1/256},
 @code{avx10.1/128},
+@code{user_msr},
 @code{amx_int8},
 @code{amx_bf16},
 @code{amx_fp16},
@@ -1650,7 +1651,7 @@  supported on the CPU specified.  The choices for @var{cpu_type} are:
 @item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
 @item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
 @item @samp{.avx_vnni_int16} @tab @samp{.sha512} @tab @samp{.sm3} @tab @samp{.sm4}
-@item @samp{.pbndkb}
+@item @samp{.pbndkb} @tab @samp{.user_msr}
 @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
 @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
 @item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index ee74bcd4615..81ce1e38b10 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -509,6 +509,7 @@  if [gas_32_check] then {
     run_dump_test "sm4"
     run_dump_test "sm4-intel"
     run_list_test "pbndkb-inval"
+    run_list_test "user_msr-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/user_msr-inval.l b/gas/testsuite/gas/i386/user_msr-inval.l
new file mode 100644
index 00000000000..9618645bea8
--- /dev/null
+++ b/gas/testsuite/gas/i386/user_msr-inval.l
@@ -0,0 +1,3 @@ 
+.* Assembler messages:
+.*:6: Error: `urdmsr' is only supported in 64-bit mode
+.*:7: Error: `uwrmsr' is only supported in 64-bit mode
diff --git a/gas/testsuite/gas/i386/user_msr-inval.s b/gas/testsuite/gas/i386/user_msr-inval.s
new file mode 100644
index 00000000000..dba67ddfd5d
--- /dev/null
+++ b/gas/testsuite/gas/i386/user_msr-inval.s
@@ -0,0 +1,7 @@ 
+# Check Illegal 32bit USER_MSR instructions
+
+	.allow_index_reg
+	.text
+_start:
+	urdmsr	%r12, %r14	 #USER_MSR
+	uwrmsr	%r12, %r14	 #USER_MSR
diff --git a/gas/testsuite/gas/i386/x86-64-user_msr-intel.d b/gas/testsuite/gas/i386/x86-64-user_msr-intel.d
new file mode 100644
index 00000000000..fe8418b2ce0
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-user_msr-intel.d
@@ -0,0 +1,18 @@ 
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 USER_MSR insns (Intel disassembly)
+#source: x86-64-user_msr.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*f2 4d 0f 38 f8 f4\s+urdmsr r12,r14
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr r12,0x3120f0f
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f8 f4\s+uwrmsr r14,r12
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr 0x3120f0f,r12
+\s*[a-f0-9]+:\s*f2 4d 0f 38 f8 f4\s+urdmsr r12,r14
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr r12,0x3120f0f
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f8 f4\s+uwrmsr r14,r12
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr 0x3120f0f,r12
diff --git a/gas/testsuite/gas/i386/x86-64-user_msr.d b/gas/testsuite/gas/i386/x86-64-user_msr.d
new file mode 100644
index 00000000000..295f80c0318
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-user_msr.d
@@ -0,0 +1,18 @@ 
+#as:
+#objdump: -dw
+#name: x86_64 USER_MSR insns
+#source: x86-64-user_msr.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*f2 4d 0f 38 f8 f4\s+urdmsr %r14,%r12
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr \$0x3120f0f,%r12
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f8 f4\s+uwrmsr %r12,%r14
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr %r12,\$0x3120f0f
+\s*[a-f0-9]+:\s*f2 4d 0f 38 f8 f4\s+urdmsr %r14,%r12
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr \$0x3120f0f,%r12
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f8 f4\s+uwrmsr %r12,%r14
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr %r12,\$0x3120f0f
diff --git a/gas/testsuite/gas/i386/x86-64-user_msr.s b/gas/testsuite/gas/i386/x86-64-user_msr.s
new file mode 100644
index 00000000000..c62c5e785bd
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
@@ -0,0 +1,15 @@ 
+# Check 64bit USER_MSR instructions
+
+	.allow_index_reg
+	.text
+_start:
+	urdmsr	%r14, %r12	 #USER_MSR
+	urdmsr	$51515151, %r12	 #USER_MSR
+	uwrmsr	%r12, %r14	 #USER_MSR
+	uwrmsr	%r12, $51515151	 #USER_MSR
+
+.intel_syntax noprefix
+	urdmsr	r12, r14	 #USER_MSR
+	urdmsr	r12, 51515151	 #USER_MSR
+	uwrmsr	r14, r12	 #USER_MSR
+	uwrmsr	51515151, r12	 #USER_MSR
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 52711cdcf6f..5765d473e31 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -450,6 +450,8 @@  run_dump_test "x86-64-sm4"
 run_dump_test "x86-64-sm4-intel"
 run_dump_test "x86-64-pbndkb"
 run_dump_test "x86-64-pbndkb-intel"
+run_dump_test "x86-64-user_msr"
+run_dump_test "x86-64-user_msr-intel"
 run_dump_test "x86-64-clzero"
 run_dump_test "x86-64-mwaitx-bdver4"
 run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 87ecf0f5e23..810cd76669f 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -418,6 +418,7 @@  fetch_error (const instr_info *ins)
 #define Gv { OP_G, v_mode }
 #define Gd { OP_G, d_mode }
 #define Gdq { OP_G, dq_mode }
+#define Gq { OP_G, q_mode }
 #define Gm { OP_G, m_mode }
 #define Gva { OP_G, va_mode }
 #define Gw { OP_G, w_mode }
@@ -430,6 +431,7 @@  fetch_error (const instr_info *ins)
 #define Id { OP_I, d_mode }
 #define Iw { OP_I, w_mode }
 #define I1 { OP_I, const_1_mode }
+#define Id0 { OP_I, d_0_mode }
 #define Jb { OP_J, b_mode }
 #define Jv { OP_J, v_mode }
 #define Jdqw { OP_J, dqw_mode }
@@ -618,6 +620,8 @@  enum
   w_mode,
   /* double word operand  */
   d_mode,
+  /* double word operand 0 */
+  d_0_mode,
   /* word operand with operand swapped  */
   w_swap_mode,
   /* double word operand with operand swapped */
@@ -845,6 +849,7 @@  enum
   REG_VEX_0FAE,
   REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
   REG_VEX_0F38F3_L_0,
+  REG_VEX_MAP7_F8_L_0_W_0_M_1,
 
   REG_XOP_09_01_L_0,
   REG_XOP_09_02_L_0,
@@ -893,8 +898,10 @@  enum
   MOD_0FC7_REG_6,
   MOD_0FC7_REG_7,
   MOD_0F38DC_PREFIX_1,
+  MOD_0F38F8,
 
   MOD_VEX_0F3849_X86_64_L_0_W_0,
+  MOD_VEX_MAP7_F8_L_0_W_0,
 };
 
 enum
@@ -1010,7 +1017,8 @@  enum
   PREFIX_0F38F0,
   PREFIX_0F38F1,
   PREFIX_0F38F6,
-  PREFIX_0F38F8,
+  PREFIX_0F38F8_M_0,
+  PREFIX_0F38F8_M_1_X86_64,
   PREFIX_0F38FA,
   PREFIX_0F38FB,
   PREFIX_0F38FC,
@@ -1073,6 +1081,7 @@  enum
   PREFIX_VEX_0F38F6_L_0,
   PREFIX_VEX_0F38F7_L_0,
   PREFIX_VEX_0F3AF0_L_0,
+  PREFIX_VEX_MAP7_F8_L_0_W_0_M_1_R_0_X86_64,
 
   PREFIX_EVEX_0F5B,
   PREFIX_EVEX_0F6F,
@@ -1217,6 +1226,7 @@  enum
   X86_64_0F18_REG_7_MOD_0,
   X86_64_0F24,
   X86_64_0F26,
+  X86_64_0F38F8_M_1,
   X86_64_0FC7_REG_6_MOD_3_PREFIX_1,
 
   X86_64_VEX_0F3849,
@@ -1240,6 +1250,7 @@  enum
   X86_64_VEX_0F38ED,
   X86_64_VEX_0F38EE,
   X86_64_VEX_0F38EF,
+  X86_64_VEX_MAP7_F8_L_0_W_0_M_1_R_0,
 };
 
 enum
@@ -1259,7 +1270,8 @@  enum
 {
   VEX_0F = 0,
   VEX_0F38,
-  VEX_0F3A
+  VEX_0F3A,
+  VEX_MAP7
 };
 
 enum
@@ -1350,6 +1362,7 @@  enum
   VEX_LEN_0F3ADE_W_0,
   VEX_LEN_0F3ADF,
   VEX_LEN_0F3AF0,
+  VEX_LEN_MAP7_F8,
   VEX_LEN_XOP_08_85,
   VEX_LEN_XOP_08_86,
   VEX_LEN_XOP_08_87,
@@ -1510,6 +1523,7 @@  enum
   VEX_W_0F3ACE,
   VEX_W_0F3ACF,
   VEX_W_0F3ADE,
+  VEX_W_MAP7_F8_L_0,
 
   VEX_W_XOP_08_85_L_0,
   VEX_W_XOP_08_86_L_0,
@@ -2849,6 +2863,10 @@  static const struct dis386 reg_table[][8] = {
     { "blsmskS",	{ VexGdq, Edq }, PREFIX_OPCODE },
     { "blsiS",		{ VexGdq, Edq }, PREFIX_OPCODE },
   },
+  /* REG_VEX_MAP7_F8_L_0_W_0_M_1 */
+  {
+    { X86_64_TABLE (X86_64_VEX_MAP7_F8_L_0_W_0_M_1_R_0) },
+  },
   /* REG_XOP_09_01_L_0 */
   {
     { Bad_Opcode },
@@ -3555,13 +3573,22 @@  static const struct dis386 prefix_table[][4] = {
     { Bad_Opcode },
   },
 
-  /* PREFIX_0F38F8 */
+  /* PREFIX_0F38F8_M_0 */
   {
     { Bad_Opcode },
     { "enqcmds", { Gva, M }, 0 },
     { "movdir64b", { Gva, M }, 0 },
     { "enqcmd",	{ Gva, M }, 0 },
   },
+
+  /* PREFIX_0F38F8_M_1_X86_64 */
+  {
+    { Bad_Opcode },
+    { "uwrmsr",		{ Gdq, Eq }, 0 },
+    { Bad_Opcode },
+    { "urdmsr",		{ Rdq, Gq }, 0 },
+  },
+
   /* PREFIX_0F38FA */
   {
     { Bad_Opcode },
@@ -4014,6 +4041,14 @@  static const struct dis386 prefix_table[][4] = {
     { "rorxS",		{ Gdq, Edq, Ib }, 0 },
   },
 
+  /* PREFIX_VEX_MAP7_F8_L_0_W_0_M_1_R_0_X86_64 */
+  {
+    { Bad_Opcode },
+    { "uwrmsr", { Id0, Eq }, 0 },
+    { Bad_Opcode },
+    { "urdmsr", { Eq, Id }, 0 },
+  },
+
 #include "i386-dis-evex-prefix.h"
 };
 
@@ -4322,6 +4357,12 @@  static const struct dis386 x86_64_table[][2] = {
     { "movZ",		{ Td, Em }, 0 },
   },
 
+  {
+    /* X86_64_0F38F8_M_1 */
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_0F38F8_M_1_X86_64) },
+  },
+
   /* X86_64_0FC7_REG_6_MOD_3_PREFIX_1 */
   {
     { Bad_Opcode },
@@ -4453,6 +4494,13 @@  static const struct dis386 x86_64_table[][2] = {
     { Bad_Opcode },
     { "cmpnlexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
   },
+
+  /* X86_64_VEX_MAP7_F8_L_0_W_0_M_1_R_0 */
+  {
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_MAP7_F8_L_0_W_0_M_1_R_0_X86_64) },
+  },
+
 };
 
 static const struct dis386 three_byte_table[][256] = {
@@ -4739,7 +4787,7 @@  static const struct dis386 three_byte_table[][256] = {
     { PREFIX_TABLE (PREFIX_0F38F6) },
     { Bad_Opcode },
     /* f8 */
-    { PREFIX_TABLE (PREFIX_0F38F8) },
+    { MOD_TABLE (MOD_0F38F8) },
     { "movdiri",	{ Mdq, Gdq }, PREFIX_OPCODE },
     { PREFIX_TABLE (PREFIX_0F38FA) },
     { PREFIX_TABLE (PREFIX_0F38FB) },
@@ -6791,6 +6839,297 @@  static const struct dis386 vex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
   },
+  /* VEX_MAP7 */
+  {
+    /* 00 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 08 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 10 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 18 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 20 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 28 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 30 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 38 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 40 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 48 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 50 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 58 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 60 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 68 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 70 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 78 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 80 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 88 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 90 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 98 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* a0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* a8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* b0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* b8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* c0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* c8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* d0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* d8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* e0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* e8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* f0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* f8 */
+    { VEX_LEN_TABLE (VEX_LEN_MAP7_F8) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+  },
 };
 
 #include "i386-dis-evex.h"
@@ -7205,6 +7544,11 @@  static const struct dis386 vex_len_table[][2] = {
     { PREFIX_TABLE (PREFIX_VEX_0F3AF0_L_0) },
   },
 
+  /* VEX_LEN_MAP7_F8 */
+  {
+    { VEX_W_TABLE (VEX_W_MAP7_F8_L_0) },
+  },
+
   /* VEX_LEN_XOP_08_85 */
   {
     { VEX_W_TABLE (VEX_W_XOP_08_85_L_0) },
@@ -7811,6 +8155,10 @@  static const struct dis386 vex_w_table[][2] = {
     /* VEX_W_0F3ADE */
     { VEX_LEN_TABLE (VEX_LEN_0F3ADE_W_0) },
   },
+  {
+    /* VEX_W_MAP7_F8_L_0 */
+    { MOD_TABLE (MOD_VEX_MAP7_F8_L_0_W_0) },
+  },
   /* VEX_W_XOP_08_85_L_0 */
   {
     { "vpmacssww", 	{ XM, Vex, EXx, XMVexI4 }, 0 },
@@ -8153,11 +8501,21 @@  static const struct dis386 mod_table[][2] = {
     { "aesenc128kl",    { XM, M }, 0 },
     { "loadiwkey",      { XM, EXx }, 0 },
   },
+  /* MOD_0F38F8 */
+  {
+    { PREFIX_TABLE (PREFIX_0F38F8_M_0) },
+    { X86_64_TABLE (X86_64_0F38F8_M_1) },
+  },
   {
     /* MOD_VEX_0F3849_X86_64_L_0_W_0 */
     { PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0) },
     { PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1) },
   },
+  {
+    /* MOD_VEX_MAP7_F8_L_0_W_0 */
+    { Bad_Opcode },
+    { REG_TABLE (REG_VEX_MAP7_F8_L_0_W_0_M_1) },
+  },
 
 #include "i386-dis-evex-mod.h"
 };
@@ -8769,6 +9127,9 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
 	case 0x3:
 	  vex_table_index = VEX_0F3A;
 	  break;
+	case 0x7:
+	  vex_table_index = VEX_MAP7;
+	  break;
 	}
       ins->codep++;
       ins->vex.w = *ins->codep & 0x80;
@@ -11248,6 +11609,20 @@  get32s (instr_info *ins, bfd_vma *res)
   return true;
 }
 
+/* The function is used to get imm32, when imm32 is operand 0, and ins only has 2 operands. */
+static bool
+get32_operand0 (instr_info *ins, bfd_vma *res)
+{
+
+  if (!fetch_code (ins->info, ins->codep + 5))
+    return false;
+  *res = *(ins->codep++ + 1) & (bfd_vma) 0xff;
+  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 8;
+  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 16;
+  *res |= (*(ins->codep++ + 1) & (bfd_vma) 0xff) << 24;
+  return true;
+}
+
 static bool
 get64 (instr_info *ins, uint64_t *res)
 {
@@ -12008,6 +12383,10 @@  OP_I (instr_info *ins, int bytemode, int sizeflag)
 	    }
 	}
       break;
+    case d_0_mode:
+      if (!get32_operand0 (ins, &op))
+	return false;
+      break;
     case const_1_mode:
       if (ins->intel_syntax)
 	oappend (ins, "1");
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index cfc5a7a6172..bb58afcbc06 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -380,6 +380,7 @@  static bitfield cpu_flags[] =
   BITFIELD (RAO_INT),
   BITFIELD (FRED),
   BITFIELD (LKGS),
+  BITFIELD (USER_MSR),
   BITFIELD (MWAITX),
   BITFIELD (CLZERO),
   BITFIELD (OSPKE),
@@ -1023,6 +1024,7 @@  process_i386_opcode_modifier (FILE *table, char *mod, unsigned int space,
     SPACE(0F3A),
     SPACE(EVEXMAP5),
     SPACE(EVEXMAP6),
+    SPACE(VEXMAP7),
     SPACE(XOP08),
     SPACE(XOP09),
     SPACE(XOP0A),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 149ae0e950c..a6f0d149d8c 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -223,6 +223,8 @@  enum i386_cpu
   CpuFRED,
   /* lkgs instruction required */
   CpuLKGS,
+  /* Intel USER_MSR Instruction support required.  */
+  CpuUSER_MSR,
   /* mwaitx instruction required */
   CpuMWAITX,
   /* Clzero instruction required */
@@ -471,6 +473,7 @@  typedef union i386_cpu_flags
       unsigned int cpurao_int:1;
       unsigned int cpufred:1;
       unsigned int cpulkgs:1;
+      unsigned int cpuuser_msr:1;
       unsigned int cpumwaitx:1;
       unsigned int cpuclzero:1;
       unsigned int cpuospke:1;
@@ -573,6 +576,11 @@  enum
   /* Instrucion requires that destination must be distinct from source
      registers.  */
 #define DISTINCT_DEST 9
+  /* Swap source and dest.
+     1. instructions only has 2 register operands and operand1 in MODRM.REG
+     and operand0 in MODRM.R/M.
+     2. instructions only has 2 operands and operand1 is IMM. */
+#define SWAP_SOURCE_DEST 10
   OperandConstraint,
   /* instruction ignores operand size prefix and in Intel mode ignores
      mnemonic size suffix check.  */
@@ -966,6 +974,7 @@  typedef struct insn_template
      3: 0F3A opcode prefix / space.
      5: EVEXMAP5 opcode prefix / space.
      6: EVEXMAP6 opcode prefix / space.
+     7: VEXMAP7 opcode prefix / space.
      8: XOP 08 opcode space.
      9: XOP 09 opcode space.
      A: XOP 0A opcode space.
@@ -976,6 +985,7 @@  typedef struct insn_template
 #define SPACE_0F3A	3
 #define SPACE_EVEXMAP5	5
 #define SPACE_EVEXMAP6	6
+#define SPACE_VEXMAP7	7
 #define SPACE_XOP08	8
 #define SPACE_XOP09	9
 #define SPACE_XOP0A	0xA
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index e60184ba154..75f3df6af2d 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -85,6 +85,7 @@ 
 #define RegKludge         OperandConstraint=REG_KLUDGE
 #define SwapSources       OperandConstraint=SWAP_SOURCES
 #define Ugh               OperandConstraint=UGH
+#define SwapSourceDest	  OperandConstraint=SWAP_SOURCE_DEST
 
 #define IgnoreSize	MnemonicSize=IGNORESIZE
 #define DefaultSize	MnemonicSize=DEFAULTSIZE
@@ -112,6 +113,8 @@ 
 #define EVexMap5 OpcodeSpace=SPACE_EVEXMAP5
 #define EVexMap6 OpcodeSpace=SPACE_EVEXMAP6
 
+#define VexMap7 OpcodeSpace=SPACE_VEXMAP7
+
 #define VexW0 VexW=VEXW0
 #define VexW1 VexW=VEXW1
 #define VexWIG VexW=VEXWIG
@@ -3346,3 +3349,12 @@  erets, 0xf20f01ca, FRED|x64, NoSuf, {}
 eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
 
 // FRED instructions end.
+
+// USER_MSR instructions.
+
+urdmsr, 0xf20f38f8, USER_MSR|x64, Modrm|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Reg64 }
+urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|IgnoreSize|NoSuf, { Imm32S, Reg64 }
+uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|IgnoreSize|NoSuf, { Reg64, Reg64 }
+uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|IgnoreSize|SwapSourceDest|NoSuf, { Reg64, Imm32S }
+
+// USER_MSR instructions end.