[13/17,APX,NDD] Support APX NDD for right shift insns

Message ID 20231205022948.504790-14-hongyu.wang@intel.com
State New
Headers
Series Support Intel APX NDD |

Commit Message

Hongyu Wang Dec. 5, 2023, 2:29 a.m. UTC
  Similar to LSHIFT, rshift do not need to omit $1 for NDD form.

gcc/ChangeLog:

	* config/i386/i386.md (ashr<mode>3_cvt): Extend with new
	alternatives to support NDD, and adjust output templates.
	(*ashr<mode>3_1): Likewise for SI/DI mode.
	(*lshr<mode>3_1): Likewise.
	(*<insn>si3_1_zext): Likewise.
	(*ashr<mode>3_1): Likewise for QI/HI mode.
	(*lshrqi3_1): Likewise.
	(*lshrhi3_1): Likewise.
	(<insn><mode>3_cmp): Likewise.
	(*<insn><mode>3_cconly): Likewise.
	(*ashrsi3_cvt_zext): Likewise, and use nonimmediate_operand for
	operands[1] to accept memory input for NDD alternative.
	(*highpartdisi2): Likewise.
	(*<insn>si3_cmp_zext): Likewise.
	(<insn><mode>3_carry): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ndd.c: Add l/ashiftrt tests.
---
 gcc/config/i386/i386.md                 | 232 +++++++++++++++---------
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  24 +++
 2 files changed, 166 insertions(+), 90 deletions(-)
  

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 43be1364bff..8bec8a63ba9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15803,39 +15803,45 @@  (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
 (define_insn "ashr<mode>3_cvt"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
 	(ashiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
+	  (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
 	  (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+			       TARGET_APX_NDD)"
   "@
    <cvt_mnemonic>
-   sar{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
+   sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd")
+   (set_attr "type" "imovx,ishift,ishift")
+   (set_attr "prefix_0f" "0,*,*")
+   (set_attr "length_immediate" "0,*,*")
+   (set_attr "modrm" "0,1,1")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashrsi3_cvt_zext"
-  [(set (match_operand:DI 0 "register_operand" "=*d,r")
+  [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
 	(zero_extend:DI
-	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm")
 		       (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && INTVAL (operands[2]) == 31
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands,
+			       TARGET_APX_NDD)"
   "@
    {cltd|cdq}
-   sar{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
+   sar{l}\t{%2, %k0|%k0, %2}
+   sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd")
+   (set_attr "type" "imovx,ishift,ishift")
+   (set_attr "prefix_0f" "0,*,*")
+   (set_attr "length_immediate" "0,*,*")
+   (set_attr "modrm" "0,1,1")
    (set_attr "mode" "SI")])
 
 (define_expand "@x86_shift<mode>_adj_3"
@@ -15877,13 +15883,15 @@  (define_insn "*bmi2_<insn><mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashr<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(ashiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
@@ -15891,14 +15899,16 @@  (define_insn "*ashr<mode>3_1"
 
     default:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !use_ndd)
 	return "sar{<imodesuffix>}\t%0";
       else
-	return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "ishift,ishiftx")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -15911,8 +15921,8 @@  (define_insn "*ashr<mode>3_1"
 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
 (define_insn_and_split "*highpartdisi2"
-  [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0)
-        (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k")
+  [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
+        (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm")
 		     (const_int 32)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
@@ -15931,16 +15941,20 @@  (define_insn_and_split "*highpartdisi2"
       DONE;
     }
   operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-})
+}
+[(set_attr "isa" "*,*,*,apx_ndd")])
+
 
 (define_insn "*lshr<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
 	(lshiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
 {
+  bool use_ndd = (which_alternative == 3);
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
@@ -15949,14 +15963,16 @@  (define_insn "*lshr<mode>3_1"
 
     default:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !use_ndd)
 	return "shr{<imodesuffix>}\t%0";
       else
-	return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2,<kmov_isa>")
-   (set_attr "type" "ishift,ishiftx,msklog")
+  [(set_attr "isa" "*,bmi2,<kmov_isa>,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
@@ -15989,13 +16005,15 @@  (define_insn "*bmi2_<insn>si3_1_zext"
    (set_attr "mode" "SI")])
 
 (define_insn "*<insn>si3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
-			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r,cI"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+					    TARGET_APX_NDD)"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
@@ -16003,14 +16021,16 @@  (define_insn "*<insn>si3_1_zext"
 
     default:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !use_ndd)
 	return "<shift>{l}\t%k0";
       else
-	return "<shift>{l}\t{%2, %k0|%k0, %2}";
+	return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+		       : "<shift>{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "ishift,ishiftx")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16033,20 +16053,25 @@  (define_split
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashr<mode>3_1"
-  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
 	(ashiftrt:SWI12
-	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
 {
+  bool use_ndd = which_alternative == 1;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "sar{<imodesuffix>}\t%0";
   else
-    return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		   : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16057,29 +16082,33 @@  (define_insn "*ashr<mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*lshrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k")
+  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
 	(lshiftrt:QI
-	  (match_operand:QI 1 "nonimmediate_operand" "0, k")
-	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb")))
+	  (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
+	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands,
+			    TARGET_APX_NDD)"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !use_ndd)
 	return "shr{b}\t%0";
       else
-	return "shr{b}\t{%2, %0|%0, %2}";
+	return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
+		       : "shr{b}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "*,avx512dq")
-   (set_attr "type" "ishift,msklog")
+  [(set_attr "isa" "*,avx512dq,apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
@@ -16091,29 +16120,33 @@  (define_insn "*lshrqi3_1"
    (set_attr "mode" "QI")])
 
 (define_insn "*lshrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
 	(lshiftrt:HI
-	  (match_operand:HI 1 "nonimmediate_operand" "0, k")
-	  (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
+	  (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
+	  (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands,
+			    TARGET_APX_NDD)"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !use_ndd)
 	return "shr{w}\t%0";
       else
-	return "shr{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "shr{w}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "*, avx512f")
-   (set_attr "type" "ishift,msklog")
+  [(set_attr "isa" "*, avx512f, apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
@@ -16166,25 +16199,30 @@  (define_insn "*<insn><mode>3_cmp"
   [(set (reg FLAGS_REG)
 	(compare
 	  (any_shiftrt:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "0")
-	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
 	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
 	(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
 	&& TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+			       TARGET_APX_NDD)"
 {
+  bool use_ndd = which_alternative == 1;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		   : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16197,10 +16235,10 @@  (define_insn "*<insn><mode>3_cmp"
 (define_insn "*<insn>si3_cmp_zext"
   [(set (reg FLAGS_REG)
 	(compare
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
 			  (match_operand:QI 2 "const_1_to_31_operand"))
 	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
 	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
    && (optimize_function_for_size_p (cfun)
@@ -16208,15 +16246,20 @@  (define_insn "*<insn>si3_cmp_zext"
        || (operands[2] == const1_rtx
 	   && TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+   && ix86_binary_operator_ok (<CODE>, SImode, operands,
+			       TARGET_APX_NDD)"
 {
+  bool use_ndd = which_alternative == 1;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{l}\t%k0";
   else
-    return "<shift>{l}\t{%2, %k0|%k0, %2}";
+    return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+		   : "<shift>{l}\t{%2, %k0|%k0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16230,23 +16273,28 @@  (define_insn "*<insn><mode>3_cconly"
   [(set (reg FLAGS_REG)
 	(compare
 	  (any_shiftrt:SWI
-	    (match_operand:SWI 1 "register_operand" "0")
-	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
 	  (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r"))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
 	&& TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)"
 {
+  bool use_ndd = which_alternative == 1;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd
+	   ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	   : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16850,18 +16898,22 @@  (define_insn "rcrdi2"
 ;; Versions of sar and shr that set the carry flag.
 (define_insn "<insn><mode>3_carry"
   [(set (reg:CCC FLAGS_REG)
-	(unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+	(unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
 				(const_int 1))
 		     (const_int 0)] UNSPEC_CC_NE))
-   (set (match_operand:SWI48 0 "register_operand" "=r")
+   (set (match_operand:SWI48 0 "register_operand" "=r,r")
 	(any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
   ""
 {
-  if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+  bool use_ndd = which_alternative == 1;
+  if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
-  return "<shift>{<imodesuffix>}\t{1, %0|%0, 1}";
+  return use_ndd ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
+		 : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
 }
-  [(set_attr "type" "ishift1")
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift1")
    (set (attr "length_immediate")
      (if_then_else
        (ior (match_test "TARGET_SHIFT1")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 9951fb00a4c..239c427514a 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -2,6 +2,8 @@ 
 /* { dg-options "-mapxf -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
+#include <stdint.h>
+
 #define FOO(TYPE, OP_NAME, OP)   \
 TYPE				 \
 __attribute__ ((noipa)) 	 \
@@ -132,6 +134,24 @@  FOO3 (int, shl, <<, 7)
 FOO (long, shl, <<)
 FOO3 (long, shl, <<, 7)
 
+FOO (char, sar, >>)
+FOO3 (char, sar, >>, 7)
+FOO (short, sar, >>)
+FOO3 (short, sar, >>, 7)
+FOO (int, sar, >>)
+FOO3 (int, sar, >>, 7)
+FOO (long, sar, >>)
+FOO3 (long, sar, >>, 7)
+
+FOO (uint8_t, shr, >>)
+FOO3 (uint8_t, shr, >>, 7)
+FOO (uint16_t, shr, >>)
+FOO3 (uint16_t, shr, >>, 7)
+FOO (uint32_t, shr, >>)
+FOO3 (uint32_t, shr, >>, 7)
+FOO (uint64_t, shr, >>)
+FOO3 (uint64_t, shr, >>, 7)
+
 /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
 /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
@@ -156,3 +176,7 @@  FOO3 (long, shl, <<, 7)
 /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
 /* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */