[7/8] AArch64: Consolidate zero and sign extension patterns and add missing ones.

Message ID Y1+4qItMrQHbdqqD@arm.com
State Deferred
Headers
Series [1/8] middle-end: Recognize scalar reductions from bitfields and array_refs |

Commit Message

Tamar Christina Oct. 31, 2022, 11:59 a.m. UTC
  Hi All,

The target has various zero and sign extension patterns.  These however live in
various locations around the MD file and almost all of them are split
differently.  Due to the various patterns we also ended up missing valid
extensions.  For instance smov is almost never generated.

This change tries to make this more manageable by consolidating the patterns as
much as possible and in doing so fix the missing alternatives.

There were also some duplicate patterns.  Note that the
zero_extend<*_ONLY:mode><SD_HSDI:mode>2  patterns are nearly identical however
QImode lacks an alternative that the others don't have, so I have left them as
3 different patterns next to each other.

In a lot of cases the wrong iterator was used leaving out cases that should
exist.

I've also changed the masks used for zero extensions to hex instead of decimal
as it's more clear what they do that way, and aligns better with output of
other compilers.

This leave the bulk of the extensions in just 3 patterns.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md
	(*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>): Changed to ...
	(*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>): ... This.
	(*aarch64_get_lane_extenddi<VS:mode>): New.
	* config/aarch64/aarch64.md (<optab>sidi2, *extendsidi2_aarch64,
	<optab>qihi2, *extendqihi2_aarch64, *zero_extendsidi2_aarch64): Remove
	duplicate patterns.
	(<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2,
	*extend<SHORT:mode><GPI:mode>2_aarch64): Remove, consolidate
	into ...
	(extend<ALLX:mode><SD_HSDI:mode>2): ... This.
	(*zero_extendqihi2_aarch64,
	*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Remove, consolidate into
	...
	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2):
	(*ands<GPI:mode>_compare0): Renamed to ...
	(*ands<SD_HSDI:mode>_compare0): ... This.
	* config/aarch64/iterators.md (HI_ONLY, QI_ONLY): New.
	(short_mask): Use hex rather than dec and add SI.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/ands_3.c: Update codegen.
	* gcc.target/aarch64/sve/slp_1.c: Likewise.
	* gcc.target/aarch64/tst_5.c: Likewise.
	* gcc.target/aarch64/tst_6.c: Likewise.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc3270e71af5a72b9 100644




--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc3270e71af5a72b9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4237,19 +4237,34 @@ (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
 )
 
-(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+(define_insn "*aarch64_get_lane_extenddi<VS:mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (vec_select:<VS:VEL>
+	    (match_operand:VS 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode,
+					   INTVAL (operands[2]));
+    return "smov\\t%x0, %1.<VS:Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_to_gp<VS:q>")]
+)
+
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	  (vec_select:<VDQQH:VEL>
-	    (match_operand:VDQQH 1 "register_operand" "w")
+	  (vec_select:<VDQV_L:VEL>
+	    (match_operand:VDQV_L 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+    operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode,
 					   INTVAL (operands[2]));
-    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
+    return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]";
   }
-  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
+  [(set_attr "type" "neon_to_gp<VDQV_L:q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 3ea16dbc2557c6a4f37104d44a49f77f768eb53d..09ae1118371f82ca63146fceb953eb9e820d05a4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1911,22 +1911,6 @@ (define_insn "storewb_pair<TX:mode>_<P:mode>"
 ;; Sign/Zero extension
 ;; -------------------------------------------------------------------
 
-(define_expand "<optab>sidi2"
-  [(set (match_operand:DI 0 "register_operand")
-	(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
-  ""
-)
-
-(define_insn "*extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  ""
-  "@
-   sxtw\t%0, %w1
-   ldrsw\t%0, %1"
-  [(set_attr "type" "extend,load_4")]
-)
-
 (define_insn "*load_pair_extendsidi2_aarch64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
@@ -1940,21 +1924,6 @@ (define_insn "*load_pair_extendsidi2_aarch64"
   [(set_attr "type" "load_8")]
 )
 
-(define_insn "*zero_extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
-  ""
-  "@
-   uxtw\t%0, %w1
-   ldr\t%w0, %1
-   fmov\t%s0, %w1
-   ldr\t%s0, %1
-   fmov\t%w0, %s1
-   fmov\t%s0, %s1"
-  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
-   (set_attr "arch" "*,*,fp,fp,fp,fp")]
-)
-
 (define_insn "*load_pair_zero_extendsidi2_aarch64"
   [(set (match_operand:DI 0 "register_operand" "=r,w")
 	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")))
@@ -1971,61 +1940,64 @@ (define_insn "*load_pair_zero_extendsidi2_aarch64"
    (set_attr "arch" "*,fp")]
 )
 
-(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand")
-        (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
-  ""
-)
-
-(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64"
-  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
-        (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,w")))]
+(define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,r")
+        (sign_extend:SD_HSDI
+	  (match_operand:ALLX 1 "nonimmediate_operand" "r,m,w")))]
   ""
   "@
-   sxt<SHORT:size>\t%<GPI:w>0, %w1
-   ldrs<SHORT:size>\t%<GPI:w>0, %1
-   smov\t%<GPI:w>0, %1.<SHORT:size>[0]"
+   sxt<ALLX:size>\t%<SD_HSDI:w>0, %w1
+   ldrs<ALLX:size>\t%<SD_HSDI:w>0, %1
+   smov\t%<SD_HSDI:w>0, %1.<ALLX:Vetype>[0]"
   [(set_attr "type" "extend,load_4,neon_to_gp")
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
-  [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
-        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m,w")))]
+(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
   ""
   "@
-   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
-   ldr<SHORT:size>\t%w0, %1
-   ldr\t%<SHORT:size>0, %1
-   umov\t%w0, %1.<SHORT:size>[0]"
-  [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
-   (set_attr "arch" "*,*,fp,fp")]
-)
-
-(define_expand "<optab>qihi2"
-  [(set (match_operand:HI 0 "register_operand")
-        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
-  ""
+   uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<SI_ONLY:sizel>\t%w0, %1
+   fmov\t%<SI_ONLY:Vetype>0, %w1
+   ldr\t%<SI_ONLY:Vetype>0, %1
+   fmov\t%w0, %<SI_ONLY:Vetype>1
+   fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1"
+  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "*extendqihi2_aarch64"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
   ""
   "@
-   sxtb\t%w0, %w1
-   ldrsb\t%w0, %1"
-  [(set_attr "type" "extend,load_4")]
+   uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<HI_ONLY:sizel>\t%w0, %1
+   fmov\t%<HI_ONLY:Vetype>0, %w1
+   ldr\t%<HI_ONLY:Vetype>0, %1
+   umov\t%w0, %1.<HI_ONLY:Vetype>[0]
+   fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1"
+  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "*zero_extendqihi2_aarch64"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
   ""
   "@
-   and\t%w0, %w1, 255
-   ldrb\t%w0, %1"
-  [(set_attr "type" "logic_imm,load_4")]
+   uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<QI_ONLY:sizel>\t%w0, %1
+   ldr\t%<QI_ONLY:Vetype>0, %1
+   umov\t%w0, %1.<QI_ONLY:Vetype>[0]
+   dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]"
+  [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp,fp,fp")]
 )
 
 ;; -------------------------------------------------------------------
@@ -5029,15 +5001,15 @@ (define_insn "*and<mode>_compare0"
   [(set_attr "type" "alus_imm")]
 )
 
-(define_insn "*ands<GPI:mode>_compare0"
+(define_insn "*ands<SD_HSDI:mode>_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r"))
+	 (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand" "r"))
 	 (const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(zero_extend:GPI (match_dup 1)))]
+   (set (match_operand:SD_HSDI 0 "register_operand" "=r")
+	(zero_extend:SD_HSDI (match_dup 1)))]
   ""
-  "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>"
+  "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>"
   [(set_attr "type" "alus_imm")]
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966b2d9347a6d0fd2 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -41,6 +41,8 @@ (define_mode_iterator SHORT [QI HI])
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator SI_ONLY [SI])
 (define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator HI_ONLY [HI])
+(define_mode_iterator QI_ONLY [QI])
 
 ;; Iterator for all integer modes (up to 64-bit)
 (define_mode_iterator ALLI [QI HI SI DI])
@@ -1033,7 +1035,7 @@ (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
 ;; For width of fp registers in fcvt instruction
 (define_mode_attr fpw [(DI "s") (SI "d")])
 
-(define_mode_attr short_mask [(HI "65535") (QI "255")])
+(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI "0xff")])
 
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c b/gcc/testsuite/gcc.target/aarch64/ands_3.c
index 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653a149cb21bb871c 100644
--- a/gcc/testsuite/gcc.target/aarch64/ands_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c
@@ -9,4 +9,4 @@ f9 (unsigned char x, int y)
   return x;
 }
 
-/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*255" } } */
+/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
index 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94f2a6448d9fa9a52 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -51,7 +51,6 @@ TEST_ALL (VEC_PERM)
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
 /* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-times {\tstr} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], v[0-9]+\.h\[0\]} 1 } } */
 
 /* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_5.c b/gcc/testsuite/gcc.target/aarch64/tst_5.c
index 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d9ab3d1cff805a483 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c
@@ -4,7 +4,7 @@
 int
 f255 (int x)
 {
-  if (x & 255)
+  if (x & 0xff)
     return 1;
   return x;
 }
@@ -12,10 +12,10 @@ f255 (int x)
 int
 f65535 (int x)
 {
-  if (x & 65535)
+  if (x & 0xffff)
     return 1;
   return x;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c b/gcc/testsuite/gcc.target/aarch64/tst_6.c
index f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e6781a277202fcc588 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c
@@ -7,4 +7,4 @@ foo (long x)
    return ((short) x != 0) ? x : 1;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */
  

Comments

Tamar Christina Nov. 30, 2022, 4:28 a.m. UTC | #1
Ping.

> -----Original Message-----
> From: Tamar Christina <tamar.christina@arm.com>
> Sent: Monday, October 31, 2022 12:00 PM
> To: gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; Richard Earnshaw <Richard.Earnshaw@arm.com>;
> Marcus Shawcroft <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov
> <Kyrylo.Tkachov@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Subject: [PATCH 7/8]AArch64: Consolidate zero and sign extension patterns
> and add missing ones.
> 
> Hi All,
> 
> The target has various zero and sign extension patterns.  These however live
> in various locations around the MD file and almost all of them are split
> differently.  Due to the various patterns we also ended up missing valid
> extensions.  For instance smov is almost never generated.
> 
> This change tries to make this more manageable by consolidating the
> patterns as much as possible and in doing so fix the missing alternatives.
> 
> There were also some duplicate patterns.  Note that the
> zero_extend<*_ONLY:mode><SD_HSDI:mode>2  patterns are nearly
> identical however QImode lacks an alternative that the others don't have, so
> I have left them as
> 3 different patterns next to each other.
> 
> In a lot of cases the wrong iterator was used leaving out cases that should
> exist.
> 
> I've also changed the masks used for zero extensions to hex instead of
> decimal as it's more clear what they do that way, and aligns better with
> output of other compilers.
> 
> This leave the bulk of the extensions in just 3 patterns.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* config/aarch64/aarch64-simd.md
> 	(*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>):
> Changed to ...
> 	(*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>): ...
> This.
> 	(*aarch64_get_lane_extenddi<VS:mode>): New.
> 	* config/aarch64/aarch64.md (<optab>sidi2, *extendsidi2_aarch64,
> 	<optab>qihi2, *extendqihi2_aarch64, *zero_extendsidi2_aarch64):
> Remove
> 	duplicate patterns.
> 	(<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2,
> 	*extend<SHORT:mode><GPI:mode>2_aarch64): Remove,
> consolidate
> 	into ...
> 	(extend<ALLX:mode><SD_HSDI:mode>2): ... This.
> 	(*zero_extendqihi2_aarch64,
> 	*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Remove,
> consolidate into
> 	...
> 	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2):
> 	(*ands<GPI:mode>_compare0): Renamed to ...
> 	(*ands<SD_HSDI:mode>_compare0): ... This.
> 	* config/aarch64/iterators.md (HI_ONLY, QI_ONLY): New.
> 	(short_mask): Use hex rather than dec and add SI.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/aarch64/ands_3.c: Update codegen.
> 	* gcc.target/aarch64/sve/slp_1.c: Likewise.
> 	* gcc.target/aarch64/tst_5.c: Likewise.
> 	* gcc.target/aarch64/tst_6.c: Likewise.
> 
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index
> 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc
> 3270e71af5a72b9 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4237,19 +4237,34 @@ (define_insn
> "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
>    [(set_attr "type" "neon_to_gp<VDQQH:q>")]
>  )
> 
> -(define_insn
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
> +(define_insn "*aarch64_get_lane_extenddi<VS:mode>"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +	(sign_extend:DI
> +	  (vec_select:<VS:VEL>
> +	    (match_operand:VS 1 "register_operand" "w")
> +	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
> +  "TARGET_SIMD"
> +  {
> +    operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode,
> +					   INTVAL (operands[2]));
> +    return "smov\\t%x0, %1.<VS:Vetype>[%2]";
> +  }
> +  [(set_attr "type" "neon_to_gp<VS:q>")]
> +)
> +
> +(define_insn
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(zero_extend:GPI
> -	  (vec_select:<VDQQH:VEL>
> -	    (match_operand:VDQQH 1 "register_operand" "w")
> +	  (vec_select:<VDQV_L:VEL>
> +	    (match_operand:VDQV_L 1 "register_operand" "w")
>  	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
>    "TARGET_SIMD"
>    {
> -    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
> +    operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode,
>  					   INTVAL (operands[2]));
> -    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
> +    return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]";
>    }
> -  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
> +  [(set_attr "type" "neon_to_gp<VDQV_L:q>")]
>  )
> 
>  ;; Lane extraction of a value, neither sign nor zero extension diff --git
> a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
> 3ea16dbc2557c6a4f37104d44a49f77f768eb53d..09ae1118371f82ca63146fceb9
> 53eb9e820d05a4 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1911,22 +1911,6 @@ (define_insn
> "storewb_pair<TX:mode>_<P:mode>"
>  ;; Sign/Zero extension
>  ;; -------------------------------------------------------------------
> 
> -(define_expand "<optab>sidi2"
> -  [(set (match_operand:DI 0 "register_operand")
> -	(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
> -  ""
> -)
> -
> -(define_insn "*extendsidi2_aarch64"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r")
> -        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"
> "r,m")))]
> -  ""
> -  "@
> -   sxtw\t%0, %w1
> -   ldrsw\t%0, %1"
> -  [(set_attr "type" "extend,load_4")]
> -)
> -
>  (define_insn "*load_pair_extendsidi2_aarch64"
>    [(set (match_operand:DI 0 "register_operand" "=r")
>  	(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand"
> "Ump"))) @@ -1940,21 +1924,6 @@ (define_insn
> "*load_pair_extendsidi2_aarch64"
>    [(set_attr "type" "load_8")]
>  )
> 
> -(define_insn "*zero_extendsidi2_aarch64"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
> -        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
> -  ""
> -  "@
> -   uxtw\t%0, %w1
> -   ldr\t%w0, %1
> -   fmov\t%s0, %w1
> -   ldr\t%s0, %1
> -   fmov\t%w0, %s1
> -   fmov\t%s0, %s1"
> -  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> -   (set_attr "arch" "*,*,fp,fp,fp,fp")]
> -)
> -
>  (define_insn "*load_pair_zero_extendsidi2_aarch64"
>    [(set (match_operand:DI 0 "register_operand" "=r,w")
>  	(zero_extend:DI (match_operand:SI 1
> "aarch64_mem_pair_operand" "Ump,Ump"))) @@ -1971,61 +1940,64 @@
> (define_insn "*load_pair_zero_extendsidi2_aarch64"
>     (set_attr "arch" "*,fp")]
>  )
> 
> -(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
> -  [(set (match_operand:GPI 0 "register_operand")
> -        (ANY_EXTEND:GPI (match_operand:SHORT 1
> "nonimmediate_operand")))]
> -  ""
> -)
> -
> -(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64"
> -  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
> -        (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand"
> "r,m,w")))]
> +(define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,r")
> +        (sign_extend:SD_HSDI
> +	  (match_operand:ALLX 1 "nonimmediate_operand" "r,m,w")))]
>    ""
>    "@
> -   sxt<SHORT:size>\t%<GPI:w>0, %w1
> -   ldrs<SHORT:size>\t%<GPI:w>0, %1
> -   smov\t%<GPI:w>0, %1.<SHORT:size>[0]"
> +   sxt<ALLX:size>\t%<SD_HSDI:w>0, %w1
> +   ldrs<ALLX:size>\t%<SD_HSDI:w>0, %1
> +   smov\t%<SD_HSDI:w>0, %1.<ALLX:Vetype>[0]"
>    [(set_attr "type" "extend,load_4,neon_to_gp")
>     (set_attr "arch" "*,*,fp")]
>  )
> 
> -(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
> -  [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
> -        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand"
> "r,m,m,w")))]
> +(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:SI_ONLY 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
>    ""
>    "@
> -   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
> -   ldr<SHORT:size>\t%w0, %1
> -   ldr\t%<SHORT:size>0, %1
> -   umov\t%w0, %1.<SHORT:size>[0]"
> -  [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
> -   (set_attr "arch" "*,*,fp,fp")]
> -)
> -
> -(define_expand "<optab>qihi2"
> -  [(set (match_operand:HI 0 "register_operand")
> -        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
> -  ""
> +   uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<SI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<SI_ONLY:Vetype>0, %w1
> +   ldr\t%<SI_ONLY:Vetype>0, %1
> +   fmov\t%w0, %<SI_ONLY:Vetype>1
> +   fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp,fp")]
>  )
> 
> -(define_insn "*extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand"
> "r,m")))]
> +(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:HI_ONLY 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
>    ""
>    "@
> -   sxtb\t%w0, %w1
> -   ldrsb\t%w0, %1"
> -  [(set_attr "type" "extend,load_4")]
> +   uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<HI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<HI_ONLY:Vetype>0, %w1
> +   ldr\t%<HI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<HI_ONLY:Vetype>[0]
> +   fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
>  )
> 
> -(define_insn "*zero_extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand"
> "r,m")))]
> +(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:QI_ONLY 1 "nonimmediate_operand"
> "r,m,m,w,w")))]
>    ""
>    "@
> -   and\t%w0, %w1, 255
> -   ldrb\t%w0, %1"
> -  [(set_attr "type" "logic_imm,load_4")]
> +   uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<QI_ONLY:sizel>\t%w0, %1
> +   ldr\t%<QI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<QI_ONLY:Vetype>[0]
> +   dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]"
> +  [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp")]
>  )
> 
>  ;; -------------------------------------------------------------------
> @@ -5029,15 +5001,15 @@ (define_insn "*and<mode>_compare0"
>    [(set_attr "type" "alus_imm")]
>  )
> 
> -(define_insn "*ands<GPI:mode>_compare0"
> +(define_insn "*ands<SD_HSDI:mode>_compare0"
>    [(set (reg:CC_NZ CC_REGNUM)
>  	(compare:CC_NZ
> -	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand"
> "r"))
> +	 (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand"
> "r"))
>  	 (const_int 0)))
> -   (set (match_operand:GPI 0 "register_operand" "=r")
> -	(zero_extend:GPI (match_dup 1)))]
> +   (set (match_operand:SD_HSDI 0 "register_operand" "=r")
> +	(zero_extend:SD_HSDI (match_dup 1)))]
>    ""
> -  "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>"
> +  "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>"
>    [(set_attr "type" "alus_imm")]
>  )
> 
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md index
> 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966
> b2d9347a6d0fd2 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -41,6 +41,8 @@ (define_mode_iterator SHORT [QI HI])  ;; Iterators for
> single modes, for "@" patterns.
>  (define_mode_iterator SI_ONLY [SI])
>  (define_mode_iterator DI_ONLY [DI])
> +(define_mode_iterator HI_ONLY [HI])
> +(define_mode_iterator QI_ONLY [QI])
> 
>  ;; Iterator for all integer modes (up to 64-bit)  (define_mode_iterator ALLI
> [QI HI SI DI]) @@ -1033,7 +1035,7 @@ (define_mode_attr w2 [(HF "x") (SF
> "x") (DF "w")])  ;; For width of fp registers in fcvt instruction
> (define_mode_attr fpw [(DI "s") (SI "d")])
> 
> -(define_mode_attr short_mask [(HI "65535") (QI "255")])
> +(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI
> +"0xff")])
> 
>  ;; For constraints used in scalar immediate vector moves  (define_mode_attr
> hq [(HI "h") (QI "q")]) diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c
> b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> index
> 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653
> a149cb21bb871c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ands_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> @@ -9,4 +9,4 @@ f9 (unsigned char x, int y)
>    return x;
>  }
> 
> -/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[
> \t\]*255" } } */
> +/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[
> +\t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> index
> 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94
> f2a6448d9fa9a52 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> @@ -51,7 +51,6 @@ TEST_ALL (VEC_PERM)
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
>  /* { dg-final { scan-assembler-not {\tldr} } } */
> -/* { dg-final { scan-assembler-times {\tstr} 2 } } */
> -/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\],
> +v[0-9]+\.h\[0\]} 1 } } */
> 
>  /* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git
> a/gcc/testsuite/gcc.target/aarch64/tst_5.c
> b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> index
> 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d
> 9ab3d1cff805a483 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> @@ -4,7 +4,7 @@
>  int
>  f255 (int x)
>  {
> -  if (x & 255)
> +  if (x & 0xff)
>      return 1;
>    return x;
>  }
> @@ -12,10 +12,10 @@ f255 (int x)
>  int
>  f65535 (int x)
>  {
> -  if (x & 65535)
> +  if (x & 0xffff)
>      return 1;
>    return x;
>  }
> 
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } }
> +*/
> diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c
> b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> index
> f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e678
> 1a277202fcc588 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_6.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> @@ -7,4 +7,4 @@ foo (long x)
>     return ((short) x != 0) ? x : 1;
>  }
> 
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } }
> +*/
> 
> 
> 
> 
> --
  
Richard Sandiford Dec. 6, 2022, 3:59 p.m. UTC | #2
Tamar Christina via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi All,
>
> The target has various zero and sign extension patterns.  These however live in
> various locations around the MD file and almost all of them are split
> differently.  Due to the various patterns we also ended up missing valid
> extensions.  For instance smov is almost never generated.
>
> This change tries to make this more manageable by consolidating the patterns as
> much as possible and in doing so fix the missing alternatives.
>
> There were also some duplicate patterns.  Note that the
> zero_extend<*_ONLY:mode><SD_HSDI:mode>2  patterns are nearly identical however
> QImode lacks an alternative that the others don't have, so I have left them as
> 3 different patterns next to each other.
>
> In a lot of cases the wrong iterator was used leaving out cases that should
> exist.
>
> I've also changed the masks used for zero extensions to hex instead of decimal
> as it's more clear what they do that way, and aligns better with output of
> other compilers.
>
> This leave the bulk of the extensions in just 3 patterns.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-simd.md
> 	(*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>): Changed to ...
> 	(*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>): ... This.
> 	(*aarch64_get_lane_extenddi<VS:mode>): New.
> 	* config/aarch64/aarch64.md (<optab>sidi2, *extendsidi2_aarch64,
> 	<optab>qihi2, *extendqihi2_aarch64, *zero_extendsidi2_aarch64): Remove
> 	duplicate patterns.
> 	(<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2,
> 	*extend<SHORT:mode><GPI:mode>2_aarch64): Remove, consolidate
> 	into ...
> 	(extend<ALLX:mode><SD_HSDI:mode>2): ... This.
> 	(*zero_extendqihi2_aarch64,
> 	*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Remove, consolidate into
> 	...
> 	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2):
> 	(*ands<GPI:mode>_compare0): Renamed to ...
> 	(*ands<SD_HSDI:mode>_compare0): ... This.
> 	* config/aarch64/iterators.md (HI_ONLY, QI_ONLY): New.
> 	(short_mask): Use hex rather than dec and add SI.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/ands_3.c: Update codegen.
> 	* gcc.target/aarch64/sve/slp_1.c: Likewise.
> 	* gcc.target/aarch64/tst_5.c: Likewise.
> 	* gcc.target/aarch64/tst_6.c: Likewise.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc3270e71af5a72b9 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4237,19 +4237,34 @@ (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
>    [(set_attr "type" "neon_to_gp<VDQQH:q>")]
>  )
>  
> -(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
> +(define_insn "*aarch64_get_lane_extenddi<VS:mode>"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +	(sign_extend:DI
> +	  (vec_select:<VS:VEL>
> +	    (match_operand:VS 1 "register_operand" "w")
> +	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
> +  "TARGET_SIMD"
> +  {
> +    operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode,
> +					   INTVAL (operands[2]));
> +    return "smov\\t%x0, %1.<VS:Vetype>[%2]";
> +  }
> +  [(set_attr "type" "neon_to_gp<VS:q>")]
> +)
> +
> +(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(zero_extend:GPI
> -	  (vec_select:<VDQQH:VEL>
> -	    (match_operand:VDQQH 1 "register_operand" "w")
> +	  (vec_select:<VDQV_L:VEL>
> +	    (match_operand:VDQV_L 1 "register_operand" "w")
>  	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
>    "TARGET_SIMD"
>    {
> -    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
> +    operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode,
>  					   INTVAL (operands[2]));
> -    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
> +    return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]";
>    }
> -  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
> +  [(set_attr "type" "neon_to_gp<VDQV_L:q>")]
>  )

Do you have any tests for the extra SI sign-extends?

I think it'd better to use a consistent style here: either have a single
pattern for all source modes (like you do with the zero_extends) or have
a separate extend-to-DI-only pattern for SI inputs (like you do with the
sign_extends).

If we go with the single-pattern approach, then as per the reviews
in other patches that came after this patch was posted, it'd be good
to compile out the invalid extend-SI-to-SI cases, e.g. using a condition
based on <elem_bits> or whatever (extended to Advanced SIMD modes).

Same comments for the other patterns: would be good to compile-out
invalid cases.  E.g. in particular:

> -(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
> -  [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
> -        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m,w")))]
> +(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]

It doesn't really make conceptual sense to define SI extensions to HI or SI.
This can just be a single pattern, with no iterators.  It might be easier
to write the HI and QI iterators in the same style.

I guess one reason you might have done this is because a later patch
added "@" to the names, but it looked like that use could use paradoxical
subregs instead.  Even if we do want to generate extensions directly in
future, it's probably better to use the optabs interface, since it
coerces the operands to the predicates.

Thanks,
Richard

>    ""
>    "@
> -   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
> -   ldr<SHORT:size>\t%w0, %1
> -   ldr\t%<SHORT:size>0, %1
> -   umov\t%w0, %1.<SHORT:size>[0]"
> -  [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
> -   (set_attr "arch" "*,*,fp,fp")]
> -)
> -
> -(define_expand "<optab>qihi2"
> -  [(set (match_operand:HI 0 "register_operand")
> -        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
> -  ""
> +   uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<SI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<SI_ONLY:Vetype>0, %w1
> +   ldr\t%<SI_ONLY:Vetype>0, %1
> +   fmov\t%w0, %<SI_ONLY:Vetype>1
> +   fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp,fp")]
>  )
>  
> -(define_insn "*extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
> +(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
>    ""
>    "@
> -   sxtb\t%w0, %w1
> -   ldrsb\t%w0, %1"
> -  [(set_attr "type" "extend,load_4")]
> +   uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<HI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<HI_ONLY:Vetype>0, %w1
> +   ldr\t%<HI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<HI_ONLY:Vetype>[0]
> +   fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
>  )
>  
> -(define_insn "*zero_extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
> +(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
> +        (zero_extend:SD_HSDI
> +	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
>    ""
>    "@
> -   and\t%w0, %w1, 255
> -   ldrb\t%w0, %1"
> -  [(set_attr "type" "logic_imm,load_4")]
> +   uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<QI_ONLY:sizel>\t%w0, %1
> +   ldr\t%<QI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<QI_ONLY:Vetype>[0]
> +   dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]"
> +  [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp")]
>  )
>  
>  ;; -------------------------------------------------------------------
> @@ -5029,15 +5001,15 @@ (define_insn "*and<mode>_compare0"
>    [(set_attr "type" "alus_imm")]
>  )
>  
> -(define_insn "*ands<GPI:mode>_compare0"
> +(define_insn "*ands<SD_HSDI:mode>_compare0"
>    [(set (reg:CC_NZ CC_REGNUM)
>  	(compare:CC_NZ
> -	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r"))
> +	 (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand" "r"))
>  	 (const_int 0)))
> -   (set (match_operand:GPI 0 "register_operand" "=r")
> -	(zero_extend:GPI (match_dup 1)))]
> +   (set (match_operand:SD_HSDI 0 "register_operand" "=r")
> +	(zero_extend:SD_HSDI (match_dup 1)))]
>    ""
> -  "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>"
> +  "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>"
>    [(set_attr "type" "alus_imm")]
>  )
>  
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966b2d9347a6d0fd2 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -41,6 +41,8 @@ (define_mode_iterator SHORT [QI HI])
>  ;; Iterators for single modes, for "@" patterns.
>  (define_mode_iterator SI_ONLY [SI])
>  (define_mode_iterator DI_ONLY [DI])
> +(define_mode_iterator HI_ONLY [HI])
> +(define_mode_iterator QI_ONLY [QI])
>  
>  ;; Iterator for all integer modes (up to 64-bit)
>  (define_mode_iterator ALLI [QI HI SI DI])
> @@ -1033,7 +1035,7 @@ (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
>  ;; For width of fp registers in fcvt instruction
>  (define_mode_attr fpw [(DI "s") (SI "d")])
>  
> -(define_mode_attr short_mask [(HI "65535") (QI "255")])
> +(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI "0xff")])
>  
>  ;; For constraints used in scalar immediate vector moves
>  (define_mode_attr hq [(HI "h") (QI "q")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> index 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653a149cb21bb871c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ands_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> @@ -9,4 +9,4 @@ f9 (unsigned char x, int y)
>    return x;
>  }
>  
> -/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*255" } } */
> +/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> index 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94f2a6448d9fa9a52 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> @@ -51,7 +51,6 @@ TEST_ALL (VEC_PERM)
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
>  /* { dg-final { scan-assembler-not {\tldr} } } */
> -/* { dg-final { scan-assembler-times {\tstr} 2 } } */
> -/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], v[0-9]+\.h\[0\]} 1 } } */
>  
>  /* { dg-final { scan-assembler-not {\tuqdec} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/tst_5.c b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> index 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d9ab3d1cff805a483 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> @@ -4,7 +4,7 @@
>  int
>  f255 (int x)
>  {
> -  if (x & 255)
> +  if (x & 0xff)
>      return 1;
>    return x;
>  }
> @@ -12,10 +12,10 @@ f255 (int x)
>  int
>  f65535 (int x)
>  {
> -  if (x & 65535)
> +  if (x & 0xffff)
>      return 1;
>    return x;
>  }
>  
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> index f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e6781a277202fcc588 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_6.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> @@ -7,4 +7,4 @@ foo (long x)
>     return ((short) x != 0) ? x : 1;
>  }
>  
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */
  

Patch

--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4237,19 +4237,34 @@  (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
 )
 
-(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+(define_insn "*aarch64_get_lane_extenddi<VS:mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (vec_select:<VS:VEL>
+	    (match_operand:VS 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode,
+					   INTVAL (operands[2]));
+    return "smov\\t%x0, %1.<VS:Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_to_gp<VS:q>")]
+)
+
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	  (vec_select:<VDQQH:VEL>
-	    (match_operand:VDQQH 1 "register_operand" "w")
+	  (vec_select:<VDQV_L:VEL>
+	    (match_operand:VDQV_L 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+    operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode,
 					   INTVAL (operands[2]));
-    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
+    return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]";
   }
-  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
+  [(set_attr "type" "neon_to_gp<VDQV_L:q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 3ea16dbc2557c6a4f37104d44a49f77f768eb53d..09ae1118371f82ca63146fceb953eb9e820d05a4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1911,22 +1911,6 @@  (define_insn "storewb_pair<TX:mode>_<P:mode>"
 ;; Sign/Zero extension
 ;; -------------------------------------------------------------------
 
-(define_expand "<optab>sidi2"
-  [(set (match_operand:DI 0 "register_operand")
-	(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
-  ""
-)
-
-(define_insn "*extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  ""
-  "@
-   sxtw\t%0, %w1
-   ldrsw\t%0, %1"
-  [(set_attr "type" "extend,load_4")]
-)
-
 (define_insn "*load_pair_extendsidi2_aarch64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
@@ -1940,21 +1924,6 @@  (define_insn "*load_pair_extendsidi2_aarch64"
   [(set_attr "type" "load_8")]
 )
 
-(define_insn "*zero_extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
-  ""
-  "@
-   uxtw\t%0, %w1
-   ldr\t%w0, %1
-   fmov\t%s0, %w1
-   ldr\t%s0, %1
-   fmov\t%w0, %s1
-   fmov\t%s0, %s1"
-  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
-   (set_attr "arch" "*,*,fp,fp,fp,fp")]
-)
-
 (define_insn "*load_pair_zero_extendsidi2_aarch64"
   [(set (match_operand:DI 0 "register_operand" "=r,w")
 	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")))
@@ -1971,61 +1940,64 @@  (define_insn "*load_pair_zero_extendsidi2_aarch64"
    (set_attr "arch" "*,fp")]
 )
 
-(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand")
-        (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
-  ""
-)
-
-(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64"
-  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
-        (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,w")))]
+(define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,r")
+        (sign_extend:SD_HSDI
+	  (match_operand:ALLX 1 "nonimmediate_operand" "r,m,w")))]
   ""
   "@
-   sxt<SHORT:size>\t%<GPI:w>0, %w1
-   ldrs<SHORT:size>\t%<GPI:w>0, %1
-   smov\t%<GPI:w>0, %1.<SHORT:size>[0]"
+   sxt<ALLX:size>\t%<SD_HSDI:w>0, %w1
+   ldrs<ALLX:size>\t%<SD_HSDI:w>0, %1
+   smov\t%<SD_HSDI:w>0, %1.<ALLX:Vetype>[0]"
   [(set_attr "type" "extend,load_4,neon_to_gp")
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
-  [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
-        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m,w")))]
+(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
   ""
   "@
-   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
-   ldr<SHORT:size>\t%w0, %1
-   ldr\t%<SHORT:size>0, %1
-   umov\t%w0, %1.<SHORT:size>[0]"
-  [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
-   (set_attr "arch" "*,*,fp,fp")]
-)
-
-(define_expand "<optab>qihi2"
-  [(set (match_operand:HI 0 "register_operand")
-        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
-  ""
+   uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<SI_ONLY:sizel>\t%w0, %1
+   fmov\t%<SI_ONLY:Vetype>0, %w1
+   ldr\t%<SI_ONLY:Vetype>0, %1
+   fmov\t%w0, %<SI_ONLY:Vetype>1
+   fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1"
+  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "*extendqihi2_aarch64"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
   ""
   "@
-   sxtb\t%w0, %w1
-   ldrsb\t%w0, %1"
-  [(set_attr "type" "extend,load_4")]
+   uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<HI_ONLY:sizel>\t%w0, %1
+   fmov\t%<HI_ONLY:Vetype>0, %w1
+   ldr\t%<HI_ONLY:Vetype>0, %1
+   umov\t%w0, %1.<HI_ONLY:Vetype>[0]
+   fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1"
+  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "*zero_extendqihi2_aarch64"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
+        (zero_extend:SD_HSDI
+	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
   ""
   "@
-   and\t%w0, %w1, 255
-   ldrb\t%w0, %1"
-  [(set_attr "type" "logic_imm,load_4")]
+   uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1
+   ldr<QI_ONLY:sizel>\t%w0, %1
+   ldr\t%<QI_ONLY:Vetype>0, %1
+   umov\t%w0, %1.<QI_ONLY:Vetype>[0]
+   dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]"
+  [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp,fp,fp")]
 )
 
 ;; -------------------------------------------------------------------
@@ -5029,15 +5001,15 @@  (define_insn "*and<mode>_compare0"
   [(set_attr "type" "alus_imm")]
 )
 
-(define_insn "*ands<GPI:mode>_compare0"
+(define_insn "*ands<SD_HSDI:mode>_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r"))
+	 (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand" "r"))
 	 (const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(zero_extend:GPI (match_dup 1)))]
+   (set (match_operand:SD_HSDI 0 "register_operand" "=r")
+	(zero_extend:SD_HSDI (match_dup 1)))]
   ""
-  "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>"
+  "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>"
   [(set_attr "type" "alus_imm")]
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966b2d9347a6d0fd2 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -41,6 +41,8 @@  (define_mode_iterator SHORT [QI HI])
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator SI_ONLY [SI])
 (define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator HI_ONLY [HI])
+(define_mode_iterator QI_ONLY [QI])
 
 ;; Iterator for all integer modes (up to 64-bit)
 (define_mode_iterator ALLI [QI HI SI DI])
@@ -1033,7 +1035,7 @@  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
 ;; For width of fp registers in fcvt instruction
 (define_mode_attr fpw [(DI "s") (SI "d")])
 
-(define_mode_attr short_mask [(HI "65535") (QI "255")])
+(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI "0xff")])
 
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c b/gcc/testsuite/gcc.target/aarch64/ands_3.c
index 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653a149cb21bb871c 100644
--- a/gcc/testsuite/gcc.target/aarch64/ands_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c
@@ -9,4 +9,4 @@  f9 (unsigned char x, int y)
   return x;
 }
 
-/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*255" } } */
+/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
index 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94f2a6448d9fa9a52 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -51,7 +51,6 @@  TEST_ALL (VEC_PERM)
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
 /* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-times {\tstr} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], v[0-9]+\.h\[0\]} 1 } } */
 
 /* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_5.c b/gcc/testsuite/gcc.target/aarch64/tst_5.c
index 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d9ab3d1cff805a483 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c
@@ -4,7 +4,7 @@ 
 int
 f255 (int x)
 {
-  if (x & 255)
+  if (x & 0xff)
     return 1;
   return x;
 }
@@ -12,10 +12,10 @@  f255 (int x)
 int
 f65535 (int x)
 {
-  if (x & 65535)
+  if (x & 0xffff)
     return 1;
   return x;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c b/gcc/testsuite/gcc.target/aarch64/tst_6.c
index f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e6781a277202fcc588 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c
@@ -7,4 +7,4 @@  foo (long x)
    return ((short) x != 0) ? x : 1;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } */