[4/8] AArch64 aarch64: Implement widening reduction patterns

Message ID Y1+4UYIESInTYiGq@arm.com
State Dropped
Headers
Series [1/8] middle-end: Recognize scalar reductions from bitfields and array_refs |

Commit Message

Tamar Christina Oct. 31, 2022, 11:58 a.m. UTC
  Hi All,

This implements the new widening reduction optab in the backend.
Instead of introducing a duplicate definition for the same thing I have
renamed the intrinsics defintions to use the same optab.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd-builtins.def (saddlv, uaddlv): Rename to
	reduc_splus_widen_scal_ and reduc_uplus_widen_scal_ respectively.
	* config/aarch64/aarch64-simd.md (aarch64_<su>addlv<mode>): Renamed to
	...
	(reduc_<su>plus_widen_scal_<mode>): ... This.
	* config/aarch64/arm_neon.h (vaddlv_s8, vaddlv_s16, vaddlv_u8,
	vaddlv_u16, vaddlvq_s8, vaddlvq_s16, vaddlvq_s32, vaddlvq_u8,
	vaddlvq_u16, vaddlvq_u32, vaddlv_s32, vaddlv_u32): Use it.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index cf46b31627b84476a25762ffc708fd84a4086e43..a4b21e1495c5699d8557a4bcb9e73ef98ae60b35 100644




--
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index cf46b31627b84476a25762ffc708fd84a4086e43..a4b21e1495c5699d8557a4bcb9e73ef98ae60b35 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -190,9 +190,9 @@
   BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE)
   BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE)
 
-  /* Implemented by aarch64_<su>addlv<mode>.  */
-  BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE)
-  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE)
+  /* Implemented by reduc_<su>plus_widen_scal_<mode>.  */
+  BUILTIN_VDQV_L (UNOP, reduc_splus_widen_scal_, 10, NONE)
+  BUILTIN_VDQV_L (UNOPU, reduc_uplus_widen_scal_, 10, NONE)
 
   /* Implemented by aarch64_<su>abd<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cf8c094bd4b76981cef2dd5dd7b8e6be0d56101f..25aed74f8cf939562ed65a578fe32ca76605b58a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3455,7 +3455,7 @@ (define_expand "reduc_plus_scal_v4sf"
   DONE;
 })
 
-(define_insn "aarch64_<su>addlv<mode>"
+(define_insn "reduc_<su>plus_widen_scal_<mode>"
  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
        (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
 		    USADDLV))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index cf6af728ca99dae1cb6ab647466cfec32f7e913e..7b2c4c016191bcd6c3e075d27810faedb23854b7 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -3664,70 +3664,70 @@ __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_saddlvv8qi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v8qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_saddlvv4hi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v4hi (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_uaddlvv8qi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v8qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_uaddlvv4hi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v4hi_uu (__a);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_saddlvv16qi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v16qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_saddlvv8hi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v8hi (__a);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_saddlvv4si (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v4si (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_uaddlvv16qi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v16qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_uaddlvv8hi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v8hi_uu (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u32 (uint32x4_t __a)
 {
-  return __builtin_aarch64_uaddlvv4si_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v4si_uu (__a);
 }
 
 __extension__ extern __inline float32x2_t
@@ -6461,14 +6461,14 @@ __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_saddlvv2si (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v2si (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_uaddlvv2si_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v2si_uu (__a);
 }
 
 __extension__ extern __inline int16x4_t
  

Comments

Richard Sandiford Nov. 1, 2022, 2:41 p.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This implements the new widening reduction optab in the backend.
> Instead of introducing a duplicate definition for the same thing I have
> renamed the intrinsics defintions to use the same optab.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-simd-builtins.def (saddlv, uaddlv): Rename to
> 	reduc_splus_widen_scal_ and reduc_uplus_widen_scal_ respectively.
> 	* config/aarch64/aarch64-simd.md (aarch64_<su>addlv<mode>): Renamed to
> 	...
> 	(reduc_<su>plus_widen_scal_<mode>): ... This.
> 	* config/aarch64/arm_neon.h (vaddlv_s8, vaddlv_s16, vaddlv_u8,
> 	vaddlv_u16, vaddlvq_s8, vaddlvq_s16, vaddlvq_s32, vaddlvq_u8,
> 	vaddlvq_u16, vaddlvq_u32, vaddlv_s32, vaddlv_u32): Use it.

OK, thanks.

Richard

> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> index cf46b31627b84476a25762ffc708fd84a4086e43..a4b21e1495c5699d8557a4bcb9e73ef98ae60b35 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -190,9 +190,9 @@
>    BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE)
>    BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE)
>  
> -  /* Implemented by aarch64_<su>addlv<mode>.  */
> -  BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE)
> -  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE)
> +  /* Implemented by reduc_<su>plus_widen_scal_<mode>.  */
> +  BUILTIN_VDQV_L (UNOP, reduc_splus_widen_scal_, 10, NONE)
> +  BUILTIN_VDQV_L (UNOPU, reduc_uplus_widen_scal_, 10, NONE)
>  
>    /* Implemented by aarch64_<su>abd<mode>.  */
>    BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE)
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index cf8c094bd4b76981cef2dd5dd7b8e6be0d56101f..25aed74f8cf939562ed65a578fe32ca76605b58a 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3455,7 +3455,7 @@ (define_expand "reduc_plus_scal_v4sf"
>    DONE;
>  })
>  
> -(define_insn "aarch64_<su>addlv<mode>"
> +(define_insn "reduc_<su>plus_widen_scal_<mode>"
>   [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
>         (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
>  		    USADDLV))]
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index cf6af728ca99dae1cb6ab647466cfec32f7e913e..7b2c4c016191bcd6c3e075d27810faedb23854b7 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -3664,70 +3664,70 @@ __extension__ extern __inline int16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_s8 (int8x8_t __a)
>  {
> -  return __builtin_aarch64_saddlvv8qi (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v8qi (__a);
>  }
>  
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_s16 (int16x4_t __a)
>  {
> -  return __builtin_aarch64_saddlvv4hi (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v4hi (__a);
>  }
>  
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_u8 (uint8x8_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv8qi_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v8qi_uu (__a);
>  }
>  
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_u16 (uint16x4_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv4hi_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v4hi_uu (__a);
>  }
>  
>  __extension__ extern __inline int16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_s8 (int8x16_t __a)
>  {
> -  return __builtin_aarch64_saddlvv16qi (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v16qi (__a);
>  }
>  
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_s16 (int16x8_t __a)
>  {
> -  return __builtin_aarch64_saddlvv8hi (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v8hi (__a);
>  }
>  
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_s32 (int32x4_t __a)
>  {
> -  return __builtin_aarch64_saddlvv4si (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v4si (__a);
>  }
>  
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_u8 (uint8x16_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv16qi_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v16qi_uu (__a);
>  }
>  
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_u16 (uint16x8_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv8hi_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v8hi_uu (__a);
>  }
>  
>  __extension__ extern __inline uint64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlvq_u32 (uint32x4_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv4si_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v4si_uu (__a);
>  }
>  
>  __extension__ extern __inline float32x2_t
> @@ -6461,14 +6461,14 @@ __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_s32 (int32x2_t __a)
>  {
> -  return __builtin_aarch64_saddlvv2si (__a);
> +  return __builtin_aarch64_reduc_splus_widen_scal_v2si (__a);
>  }
>  
>  __extension__ extern __inline uint64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vaddlv_u32 (uint32x2_t __a)
>  {
> -  return __builtin_aarch64_uaddlvv2si_uu (__a);
> +  return __builtin_aarch64_reduc_uplus_widen_scal_v2si_uu (__a);
>  }
>  
>  __extension__ extern __inline int16x4_t
  

Patch

--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -190,9 +190,9 @@ 
   BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE)
   BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE)
 
-  /* Implemented by aarch64_<su>addlv<mode>.  */
-  BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE)
-  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE)
+  /* Implemented by reduc_<su>plus_widen_scal_<mode>.  */
+  BUILTIN_VDQV_L (UNOP, reduc_splus_widen_scal_, 10, NONE)
+  BUILTIN_VDQV_L (UNOPU, reduc_uplus_widen_scal_, 10, NONE)
 
   /* Implemented by aarch64_<su>abd<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cf8c094bd4b76981cef2dd5dd7b8e6be0d56101f..25aed74f8cf939562ed65a578fe32ca76605b58a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3455,7 +3455,7 @@  (define_expand "reduc_plus_scal_v4sf"
   DONE;
 })
 
-(define_insn "aarch64_<su>addlv<mode>"
+(define_insn "reduc_<su>plus_widen_scal_<mode>"
  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
        (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
 		    USADDLV))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index cf6af728ca99dae1cb6ab647466cfec32f7e913e..7b2c4c016191bcd6c3e075d27810faedb23854b7 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -3664,70 +3664,70 @@  __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_saddlvv8qi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v8qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_saddlvv4hi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v4hi (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_uaddlvv8qi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v8qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_uaddlvv4hi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v4hi_uu (__a);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_saddlvv16qi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v16qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_saddlvv8hi (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v8hi (__a);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_saddlvv4si (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v4si (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_uaddlvv16qi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v16qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_uaddlvv8hi_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v8hi_uu (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlvq_u32 (uint32x4_t __a)
 {
-  return __builtin_aarch64_uaddlvv4si_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v4si_uu (__a);
 }
 
 __extension__ extern __inline float32x2_t
@@ -6461,14 +6461,14 @@  __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_saddlvv2si (__a);
+  return __builtin_aarch64_reduc_splus_widen_scal_v2si (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_uaddlvv2si_uu (__a);
+  return __builtin_aarch64_reduc_uplus_widen_scal_v2si_uu (__a);
 }
 
 __extension__ extern __inline int16x4_t