s390: Implement vec_set with vec_merge and, vec_duplicate.

Message ID 307ee6d9-7b40-a97e-8157-637073148543@linux.ibm.com
State New
Headers
Series s390: Implement vec_set with vec_merge and, vec_duplicate. |

Commit Message

Robin Dapp Aug. 12, 2022, 2:48 p.m. UTC
  Hi,

similar to other backends this patch implements vec_set via
vec_merge and vec_duplicate instead of an unspec.  This opens up
more possibilites to combine instructions.

Bootstrapped and regtested. No regressions.

Is it OK?

Regards
 Robin

gcc/ChangeLog:

	* config/s390/s390.md: Implement vec_set with vec_merge and
	vec_duplicate.
	* config/s390/vector.md: Likewise.
	* config/s390/vx-builtins.md: Likewise.
	* config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
	(print_operand_address): New output modifier.
	(print_operand): New output modifier.
---

   "vsce<V_HW_2:bhfgq>\t%v0,%O2(%v1,%R2),%3"
@@ -1858,14 +1861,18 @@ (define_expand "vec_ld2f"
    ; necessary since all elements of the vector will be set anyway.
    ; This is just to make it explicit to the data flow framework.
    (set (match_dup 2) (match_dup 3))
-   (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1
"memory_operand" "")
-				    (const_int 0)
-				    (match_dup 2)]
-				    UNSPEC_VEC_SET))
-   (set (match_dup 2) (unspec:V4SF [(match_dup 4)
-				    (const_int 2)
-				    (match_dup 2)]
-				    UNSPEC_VEC_SET))
+   (set (match_dup 2)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 1 "memory_operand" ""))
+	  (match_dup 2)
+	  (const_int 1)))
+   (set (match_dup 2)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_dup 4))
+	  (match_dup 2)
+	  (const_int 4)))
    (set (match_operand:V2DF 0 "register_operand" "")
 	(unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VFLL))]
   "TARGET_VX"
@@ -2303,26 +2310,29 @@ (define_insn_and_split "*eltswap<mode>_emu"
 ; vlebrh, vlebrf, vlebrg
 (define_insn "*vec_set_bswap_elem<mode>"
   [(set (match_operand:V_HW_HSD                                     0
"register_operand" "=v")
-	(unspec:V_HW_HSD [(bswap:<non_vec> (match_operand:<non_vec> 1
"memory_operand"    "R"))
-		                           (match_operand:SI        2
"const_int_operand" "C")
-					   (match_operand:V_HW_HSD  3 "register_operand"  "0")]
-		  UNSPEC_VEC_SET))]
-  "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
-  "vlebr<bhfgq>\t%v0,%1,%2"
+      (vec_merge:V_HW_HSD
+	(vec_duplicate:V_HW_HSD
+	 (bswap:<non_vec>
+	  (match_operand:<non_vec>    1 "memory_operand"	  "R")))
+	(match_operand:V_HW_HSD	      3 "register_operand"	  "0")
+	(match_operand:SI	      2 "const_int_operand"	  "C")))]
+  "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
+  "vlebr<bhfgq>\t%v0,%1,%p2"
   [(set_attr "op_type" "VRX")])

 ; vec_revb (vec_insert (*a, vec_revb (b), 1))      set-element-bswap-1.c
 ; vlebrh, vlebrf, vlebrg
 (define_insn "*vec_set_bswap_vec<mode>"
   [(set (match_operand:V_HW_HSD                                     0
"register_operand"       "=v")
-	(bswap:V_HW_HSD
-	 (unspec:V_HW_HSD [(match_operand:<non_vec>                 1
"memory_operand"          "R")
-		           (match_operand:SI                        2
"const_int_operand"       "C")
-			   (bswap:V_HW_HSD (match_operand:V_HW_HSD  3 "register_operand"
   "0"))]
-			  UNSPEC_VEC_SET)))
-   (use (match_operand:V16QI                                        4
"permute_pattern_operand" "X"))]
-  "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
-  "vlebr<bhfgq>\t%v0,%1,%2"
+    (bswap:V_HW_HSD
+	(vec_merge:V_HW_HSD
+	  (vec_duplicate:V_HW_HSD
+	    (match_operand:<non_vec>  1 "memory_operand"            "R"))
+	  (match_operand:V_HW_HSD     3 "register_operand"          "0")
+	  (match_operand:SI           2 "const_int_operand"         "C"))))
+  (use (match_operand:V16QI           4 "permute_pattern_operand"   "X"))]
+  "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
+  "vlebr<bhfgq>\t%v0,%1,%p2"
   [(set_attr "op_type" "VRX")])

 ; *a = vec_extract (vec_revb (b), 1);              get-element-bswap-3.c
  

Comments

Andreas Krebbel Aug. 16, 2022, 12:24 p.m. UTC | #1
On 8/12/22 16:48, Robin Dapp wrote:
> Hi,
> 
> similar to other backends this patch implements vec_set via
> vec_merge and vec_duplicate instead of an unspec.  This opens up
> more possibilites to combine instructions.
> 
> Bootstrapped and regtested. No regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.md: Implement vec_set with vec_merge and
> 	vec_duplicate.
> 	* config/s390/vector.md: Likewise.
> 	* config/s390/vx-builtins.md: Likewise.
> 	* config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
> 	(print_operand_address): New output modifier.
> 	(print_operand): New output modifier.

The way you handle the element selector doesn't look right to me. It appears to be an index if it is
a CONST_INT and a bitmask otherwise. I don't think it is legal to change operand semantics like this
depending on the operand type. This would break e.g. if LRA would decide to load the immediate index
in a register.

Couldn't you make the shift part of the RTX instead and have the parameter always as an index?

Bye,

Andreas

> ---
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index c86b26933d7a..ff89fb83360a 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -7073,11 +7073,10 @@ s390_expand_vec_init (rtx target, rtx vals)
>        if (!general_operand (elem, GET_MODE (elem)))
>  	elem = force_reg (inner_mode, elem);
> 
> -      emit_insn (gen_rtx_SET (target,
> -			      gen_rtx_UNSPEC (mode,
> -					      gen_rtvec (3, elem,
> -							 GEN_INT (i), target),
> -					      UNSPEC_VEC_SET)));
> +      emit_insn
> +	(gen_rtx_SET
> +	 (target, gen_rtx_VEC_MERGE
> +	  (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << i))));
>      }
>  }
> 
> @@ -8057,6 +8056,8 @@ print_operand_address (FILE *file, rtx addr)
>      'S': print S-type memory reference (base+displacement).
>      'Y': print address style operand without index (e.g. shift count or
> setmem
>  	 operand).
> +    'P': print address-style operand without index but with the offset as
> +	 if it were specified by a 'p' format flag.
> 
>      'b': print integer X as if it's an unsigned byte.
>      'c': print integer X as if it's an signed byte.
> @@ -8068,6 +8069,7 @@ print_operand_address (FILE *file, rtx addr)
>      'k': print the first nonzero SImode part of X.
>      'm': print the first SImode part unequal to -1 of X.
>      'o': print integer X as if it's an unsigned 32bit word.
> +    'p': print N such that 2^N == X (X must be a power of 2 and const int).
>      's': "start" of contiguous bitmask X in either DImode or vector
> inner mode.
>      't': CONST_INT: "start" of contiguous bitmask X in SImode.
>  	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
> @@ -8237,6 +8239,16 @@ print_operand (FILE *file, rtx x, int code)
>        print_shift_count_operand (file, x);
>        return;
> 
> +    case 'P':
> +      if (CONST_INT_P (x))
> +	{
> +	  ival = exact_log2 (INTVAL (x));
> +	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
> +	}
> +      else
> +	print_shift_count_operand (file, x);
> +      return;
> +
>      case 'K':
>        /* Append @PLT to both local and non-local symbols in order to
> support
>  	 Linux Kernel livepatching: patches contain individual functions and
> @@ -8321,6 +8333,9 @@ print_operand (FILE *file, rtx x, int code)
>  	case 'o':
>  	  ival &= 0xffffffff;
>  	  break;
> +	case 'p':
> +	  ival = exact_log2 (INTVAL (x));
> +	  break;
>  	case 'e': case 'f':
>  	case 's': case 't':
>  	  {
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index f37d8fd33a15..a82db4c624fa 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -183,7 +183,6 @@ (define_c_enum "unspec" [
>     UNSPEC_VEC_GFMSUM_128
>     UNSPEC_VEC_GFMSUM_ACCUM
>     UNSPEC_VEC_GFMSUM_ACCUM_128
> -   UNSPEC_VEC_SET
> 
>     UNSPEC_VEC_VSUMG
>     UNSPEC_VEC_VSUMQ
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index c50451a8326c..bde3a39db3d4 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -467,12 +467,17 @@ (define_insn "mov<mode>"
>  ; vec_set is supposed to *modify* an existing vector so operand 0 is
>  ; duplicated as input operand.
>  (define_expand "vec_set<mode>"
> -  [(set (match_operand:V                    0 "register_operand"  "")
> -	(unspec:V [(match_operand:<non_vec> 1 "general_operand"   "")
> -		   (match_operand:SI        2 "nonmemory_operand" "")
> -		   (match_dup 0)]
> -		   UNSPEC_VEC_SET))]
> -  "TARGET_VX")
> +  [(set (match_operand:V	     0 "register_operand" "")
> +	(vec_merge:V
> +	  (vec_duplicate:V
> +	    (match_operand:<non_vec> 1 "general_operand" ""))
> +	  (match_dup 0)
> +	  (match_operand:SI	     2 "nonmemory_operand")))]
> +  ""
> +{
> +  if (CONST_INT_P (operands[2]))
> +    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
> +})
> 
>  ; FIXME: Support also vector mode operands for 1
>  ; FIXME: A target memory operand seems to be useful otherwise we end
> @@ -480,28 +485,31 @@ (define_expand "vec_set<mode>"
>  ; that itself?
>  ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih,
> vleif, vleig
>  (define_insn "*vec_set<mode>"
> -  [(set (match_operand:V                    0 "register_operand"  "=v,v,v")
> -	(unspec:V [(match_operand:<non_vec> 1 "general_operand"    "d,R,K")
> -		   (match_operand:SI        2 "nonmemory_operand" "an,I,I")
> -		   (match_operand:V         3 "register_operand"   "0,0,0")]
> -		  UNSPEC_VEC_SET))]
> +  [(set (match_operand:V             0 "register_operand" "=v,v,v")
> +	(vec_merge:V
> +	  (vec_duplicate:V
> +	    (match_operand:<non_vec> 1 "general_operand" "d,R,K"))
> +	  (match_operand:V           3 "register_operand" "0,0,0")
> +	  (match_operand:SI          2 "nonmemory_operand" "an,I,I")))]
>    "TARGET_VX
>     && (!CONST_INT_P (operands[2])
> -       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
> +       || exact_log2 (UINTVAL (operands[2])) < (GET_MODE_NUNITS
> (<V:MODE>mode)))"
>    "@
> -   vlvg<bhfgq>\t%v0,%1,%Y2
> -   vle<bhfgq>\t%v0,%1,%2
> -   vlei<bhfgq>\t%v0,%1,%2"
> +   vlvg<bhfgq>\t%v0,%1,%P2
> +   vle<bhfgq>\t%v0,%1,%p2
> +   vlei<bhfgq>\t%v0,%1,%p2"
>    [(set_attr "op_type" "VRS,VRX,VRI")])
> 
>  ; vlvgb, vlvgh, vlvgf, vlvgg
>  (define_insn "*vec_set<mode>_plus"
> -  [(set (match_operand:V                      0 "register_operand" "=v")
> -	(unspec:V [(match_operand:<non_vec>   1 "general_operand"   "d")
> -		   (plus:SI (match_operand:SI 2 "register_operand"  "a")
> -			    (match_operand:SI 4 "const_int_operand" "n"))
> -		   (match_operand:V           3 "register_operand"  "0")]
> -		  UNSPEC_VEC_SET))]
> +  [(set (match_operand:V             0 "register_operand" "=v")
> +	(vec_merge:V
> +	  (vec_duplicate:V
> +	    (match_operand:<non_vec> 1 "general_operand" "d"))
> +	  (match_operand:V           3 "register_operand" "0")
> +	  (plus:SI
> +	   (match_operand:SI         2 "register_operand"  "a")
> +	   (match_operand:SI         4 "const_int_operand" "n"))))]
>    "TARGET_VX"
>    "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
>    [(set_attr "op_type" "VRS")])
> @@ -575,7 +583,7 @@ (define_insn "*vec_splat<mode>"
>  	  (match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
>  	  (parallel
>  	   [(match_operand:QI 2 "const_mask_operand" "C")]))))]
> -  "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
> +  "TARGET_VX && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<MODE>mode)"
>    "vrep<bhfgq>\t%v0,%v1,%2"
>    [(set_attr "op_type" "VRI")])
> 
> @@ -678,13 +686,18 @@ (define_split
>  	(vec_duplicate:V_128_NOSINGLE (match_operand:<non_vec> 1
> "register_operand" "")))]
>    "TARGET_VX && GENERAL_REG_P (operands[1])"
>    [(set (match_dup 0)
> -	(unspec:V_128_NOSINGLE [(match_dup 1) (match_dup 2) (match_dup 0)]
> UNSPEC_VEC_SET))
> +    (vec_merge:V_128_NOSINGLE
> +      (vec_duplicate:V_128_NOSINGLE
> +	(match_dup 1))
> +      (match_dup 0)
> +      (match_dup 2)))
>     (set (match_dup 0)
>  	(vec_duplicate:V_128_NOSINGLE
>  	 (vec_select:<non_vec>
> -	  (match_dup 0) (parallel [(match_dup 2)]))))]
> +	  (match_dup 0) (parallel [(match_dup 3)]))))]
>  {
> -  operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
> +  operands[2] = GEN_INT (1 << (GET_MODE_NUNITS (<MODE>mode) - 1));
> +  operands[3] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
>  })
> 
>  (define_predicate "vcond_comparison_operator"
> @@ -1136,9 +1149,12 @@ (define_expand "popcountv8hi2_vx"
>     (set (match_dup 3) (match_dup 2))
>     ; Generate the shift count operand in a VR (8->byte 7)
>     (set (match_dup 4) (match_dup 5))
> -   (set (match_dup 4) (unspec:V16QI [(const_int 8)
> -				     (const_int 7)
> -				     (match_dup 4)] UNSPEC_VEC_SET))
> +   (set (match_dup 4)
> +      (vec_merge:V16QI
> +	(vec_duplicate:V16QI
> +	 (const_int 8))
> +	(match_dup 4)
> +	(const_int 128)))
>     ; Vector shift right logical by one byte
>     (set (match_dup 3)
>  	(unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
> @@ -1339,10 +1355,11 @@ (define_insn "*vec_slb<mode>"
>  ; this means it is a left shift on BE targets!
>  (define_expand "vec_shr_<mode>"
>    [(set (match_dup 3)
> -	(unspec:V16QI [(match_operand:SI 2 "const_shift_by_byte_operand" "")
> -		   (const_int 7)
> -		   (match_dup 3)]
> -		   UNSPEC_VEC_SET))
> +	(vec_merge:V16QI
> +	  (vec_duplicate:V16QI
> +	    (match_operand:SI 2 "const_shift_by_byte_operand" ""))
> +	  (match_dup 3)
> +	  (const_int 128)))
>     (set (match_operand:V_128 0 "register_operand" "")
>  	(unspec:V_128 [(match_operand:V_128 1 "register_operand" "")
>  		    (match_dup 3)]
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index 01dfde438af5..2bdd694112a1 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -150,22 +150,25 @@ (define_expand "vec_splats<mode>"
>  	(vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand"  "")))]
>    "TARGET_VX")
> 
> +
>  (define_expand "vec_insert<mode>"
> -  [(set (match_operand:VEC_HW                    0 "register_operand" "")
> -	(unspec:VEC_HW [(match_operand:<non_vec> 2 "register_operand" "")
> -			(match_operand:SI        3 "nonmemory_operand" "")
> -			(match_operand:VEC_HW    1 "register_operand" "")]
> -		       UNSPEC_VEC_SET))]
> +  [(set (match_operand:VEC_HW        0 "register_operand" "")
> +	(vec_merge:VEC_HW
> +	  (vec_duplicate:VEC_HW
> +	    (match_operand:<non_vec> 2 "register_operand" ""))
> +	  (match_operand:VEC_HW      1 "register_operand" "")
> +	  (match_operand:SI          3 "nonmemory_operand" "")))]
>    "TARGET_VX"
>    "")
> 
>  ; This is vec_set + modulo arithmetic on the element selector (op 2)
>  (define_expand "vec_promote<mode>"
> -  [(set (match_operand:VEC_HW                    0 "register_operand" "")
> -	(unspec:VEC_HW [(match_operand:<non_vec> 1 "register_operand" "")
> -			(match_operand:SI        2 "nonmemory_operand" "")
> -			(match_dup 0)]
> -		       UNSPEC_VEC_SET))]
> +  [(set (match_operand:VEC_HW        0 "register_operand" "")
> +	(vec_merge:VEC_HW
> +	  (vec_duplicate:VEC_HW
> +	    (match_operand:<non_vec> 1 "register_operand" ""))
> +	  (match_dup 0)
> +	  (match_operand:SI          2 "nonmemory_operand" "")))]
>    "TARGET_VX"
>    "")
> 
> @@ -457,11 +460,11 @@ (define_insn "vec_scatter_element<V_HW_2:mode>_SI"
>    [(set (mem:<non_vec>
>  	 (plus:SI (subreg:SI
>  		   (vec_select:<non_vec_int>
> -		    (match_operand:V_HW_2 1 "register_operand"   "v")
> -		    (parallel [(match_operand:QI     3 "const_mask_operand" "C")])) 4)
> -	  (match_operand:SI               2 "address_operand"   "ZQ")))
> +		    (match_operand:<TOINTVEC>	  1 "register_operand"   "v")
> +		    (parallel [(match_operand:QI  3 "const_mask_operand" "C")])) 4)
> +	  (match_operand:SI			  2 "address_operand"   "ZQ")))
>      (vec_select:<non_vec>
> -     (match_operand:V_HW_2                0 "register_operand" "v")
> +     (match_operand:V_HW_2			  0 "register_operand" "v")
>       (parallel [(match_dup 3)])))]
>    "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) <
> GET_MODE_NUNITS (<V_HW_2:MODE>mode)"
>    "vsce<V_HW_2:bhfgq>\t%v0,%O2(%v1,%R2),%3"
> @@ -1858,14 +1861,18 @@ (define_expand "vec_ld2f"
>     ; necessary since all elements of the vector will be set anyway.
>     ; This is just to make it explicit to the data flow framework.
>     (set (match_dup 2) (match_dup 3))
> -   (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1
> "memory_operand" "")
> -				    (const_int 0)
> -				    (match_dup 2)]
> -				    UNSPEC_VEC_SET))
> -   (set (match_dup 2) (unspec:V4SF [(match_dup 4)
> -				    (const_int 2)
> -				    (match_dup 2)]
> -				    UNSPEC_VEC_SET))
> +   (set (match_dup 2)
> +	(vec_merge:V4SF
> +	  (vec_duplicate:V4SF
> +	    (match_operand:SF 1 "memory_operand" ""))
> +	  (match_dup 2)
> +	  (const_int 1)))
> +   (set (match_dup 2)
> +	(vec_merge:V4SF
> +	  (vec_duplicate:V4SF
> +	    (match_dup 4))
> +	  (match_dup 2)
> +	  (const_int 4)))
>     (set (match_operand:V2DF 0 "register_operand" "")
>  	(unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VFLL))]
>    "TARGET_VX"
> @@ -2303,26 +2310,29 @@ (define_insn_and_split "*eltswap<mode>_emu"
>  ; vlebrh, vlebrf, vlebrg
>  (define_insn "*vec_set_bswap_elem<mode>"
>    [(set (match_operand:V_HW_HSD                                     0
> "register_operand" "=v")
> -	(unspec:V_HW_HSD [(bswap:<non_vec> (match_operand:<non_vec> 1
> "memory_operand"    "R"))
> -		                           (match_operand:SI        2
> "const_int_operand" "C")
> -					   (match_operand:V_HW_HSD  3 "register_operand"  "0")]
> -		  UNSPEC_VEC_SET))]
> -  "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> -  "vlebr<bhfgq>\t%v0,%1,%2"
> +      (vec_merge:V_HW_HSD
> +	(vec_duplicate:V_HW_HSD
> +	 (bswap:<non_vec>
> +	  (match_operand:<non_vec>    1 "memory_operand"	  "R")))
> +	(match_operand:V_HW_HSD	      3 "register_operand"	  "0")
> +	(match_operand:SI	      2 "const_int_operand"	  "C")))]
> +  "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> +  "vlebr<bhfgq>\t%v0,%1,%p2"
>    [(set_attr "op_type" "VRX")])
> 
>  ; vec_revb (vec_insert (*a, vec_revb (b), 1))      set-element-bswap-1.c
>  ; vlebrh, vlebrf, vlebrg
>  (define_insn "*vec_set_bswap_vec<mode>"
>    [(set (match_operand:V_HW_HSD                                     0
> "register_operand"       "=v")
> -	(bswap:V_HW_HSD
> -	 (unspec:V_HW_HSD [(match_operand:<non_vec>                 1
> "memory_operand"          "R")
> -		           (match_operand:SI                        2
> "const_int_operand"       "C")
> -			   (bswap:V_HW_HSD (match_operand:V_HW_HSD  3 "register_operand"
>    "0"))]
> -			  UNSPEC_VEC_SET)))
> -   (use (match_operand:V16QI                                        4
> "permute_pattern_operand" "X"))]
> -  "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> -  "vlebr<bhfgq>\t%v0,%1,%2"
> +    (bswap:V_HW_HSD
> +	(vec_merge:V_HW_HSD
> +	  (vec_duplicate:V_HW_HSD
> +	    (match_operand:<non_vec>  1 "memory_operand"            "R"))
> +	  (match_operand:V_HW_HSD     3 "register_operand"          "0")
> +	  (match_operand:SI           2 "const_int_operand"         "C"))))
> +  (use (match_operand:V16QI           4 "permute_pattern_operand"   "X"))]
> +  "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> +  "vlebr<bhfgq>\t%v0,%1,%p2"
>    [(set_attr "op_type" "VRX")])
> 
>  ; *a = vec_extract (vec_revb (b), 1);              get-element-bswap-3.c
  

Patch

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index c86b26933d7a..ff89fb83360a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -7073,11 +7073,10 @@  s390_expand_vec_init (rtx target, rtx vals)
       if (!general_operand (elem, GET_MODE (elem)))
 	elem = force_reg (inner_mode, elem);

-      emit_insn (gen_rtx_SET (target,
-			      gen_rtx_UNSPEC (mode,
-					      gen_rtvec (3, elem,
-							 GEN_INT (i), target),
-					      UNSPEC_VEC_SET)));
+      emit_insn
+	(gen_rtx_SET
+	 (target, gen_rtx_VEC_MERGE
+	  (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << i))));
     }
 }

@@ -8057,6 +8056,8 @@  print_operand_address (FILE *file, rtx addr)
     'S': print S-type memory reference (base+displacement).
     'Y': print address style operand without index (e.g. shift count or
setmem
 	 operand).
+    'P': print address-style operand without index but with the offset as
+	 if it were specified by a 'p' format flag.

     'b': print integer X as if it's an unsigned byte.
     'c': print integer X as if it's an signed byte.
@@ -8068,6 +8069,7 @@  print_operand_address (FILE *file, rtx addr)
     'k': print the first nonzero SImode part of X.
     'm': print the first SImode part unequal to -1 of X.
     'o': print integer X as if it's an unsigned 32bit word.
+    'p': print N such that 2^N == X (X must be a power of 2 and const int).
     's': "start" of contiguous bitmask X in either DImode or vector
inner mode.
     't': CONST_INT: "start" of contiguous bitmask X in SImode.
 	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
@@ -8237,6 +8239,16 @@  print_operand (FILE *file, rtx x, int code)
       print_shift_count_operand (file, x);
       return;

+    case 'P':
+      if (CONST_INT_P (x))
+	{
+	  ival = exact_log2 (INTVAL (x));
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+	}
+      else
+	print_shift_count_operand (file, x);
+      return;
+
     case 'K':
       /* Append @PLT to both local and non-local symbols in order to
support
 	 Linux Kernel livepatching: patches contain individual functions and
@@ -8321,6 +8333,9 @@  print_operand (FILE *file, rtx x, int code)
 	case 'o':
 	  ival &= 0xffffffff;
 	  break;
+	case 'p':
+	  ival = exact_log2 (INTVAL (x));
+	  break;
 	case 'e': case 'f':
 	case 's': case 't':
 	  {
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index f37d8fd33a15..a82db4c624fa 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -183,7 +183,6 @@  (define_c_enum "unspec" [
    UNSPEC_VEC_GFMSUM_128
    UNSPEC_VEC_GFMSUM_ACCUM
    UNSPEC_VEC_GFMSUM_ACCUM_128
-   UNSPEC_VEC_SET

    UNSPEC_VEC_VSUMG
    UNSPEC_VEC_VSUMQ
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index c50451a8326c..bde3a39db3d4 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -467,12 +467,17 @@  (define_insn "mov<mode>"
 ; vec_set is supposed to *modify* an existing vector so operand 0 is
 ; duplicated as input operand.
 (define_expand "vec_set<mode>"
-  [(set (match_operand:V                    0 "register_operand"  "")
-	(unspec:V [(match_operand:<non_vec> 1 "general_operand"   "")
-		   (match_operand:SI        2 "nonmemory_operand" "")
-		   (match_dup 0)]
-		   UNSPEC_VEC_SET))]
-  "TARGET_VX")
+  [(set (match_operand:V	     0 "register_operand" "")
+	(vec_merge:V
+	  (vec_duplicate:V
+	    (match_operand:<non_vec> 1 "general_operand" ""))
+	  (match_dup 0)
+	  (match_operand:SI	     2 "nonmemory_operand")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})

 ; FIXME: Support also vector mode operands for 1
 ; FIXME: A target memory operand seems to be useful otherwise we end
@@ -480,28 +485,31 @@  (define_expand "vec_set<mode>"
 ; that itself?
 ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih,
vleif, vleig
 (define_insn "*vec_set<mode>"
-  [(set (match_operand:V                    0 "register_operand"  "=v,v,v")
-	(unspec:V [(match_operand:<non_vec> 1 "general_operand"    "d,R,K")
-		   (match_operand:SI        2 "nonmemory_operand" "an,I,I")
-		   (match_operand:V         3 "register_operand"   "0,0,0")]
-		  UNSPEC_VEC_SET))]
+  [(set (match_operand:V             0 "register_operand" "=v,v,v")
+	(vec_merge:V
+	  (vec_duplicate:V
+	    (match_operand:<non_vec> 1 "general_operand" "d,R,K"))
+	  (match_operand:V           3 "register_operand" "0,0,0")
+	  (match_operand:SI          2 "nonmemory_operand" "an,I,I")))]
   "TARGET_VX
    && (!CONST_INT_P (operands[2])
-       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+       || exact_log2 (UINTVAL (operands[2])) < (GET_MODE_NUNITS
(<V:MODE>mode)))"
   "@
-   vlvg<bhfgq>\t%v0,%1,%Y2
-   vle<bhfgq>\t%v0,%1,%2
-   vlei<bhfgq>\t%v0,%1,%2"
+   vlvg<bhfgq>\t%v0,%1,%P2
+   vle<bhfgq>\t%v0,%1,%p2
+   vlei<bhfgq>\t%v0,%1,%p2"
   [(set_attr "op_type" "VRS,VRX,VRI")])

 ; vlvgb, vlvgh, vlvgf, vlvgg
 (define_insn "*vec_set<mode>_plus"
-  [(set (match_operand:V                      0 "register_operand" "=v")
-	(unspec:V [(match_operand:<non_vec>   1 "general_operand"   "d")
-		   (plus:SI (match_operand:SI 2 "register_operand"  "a")
-			    (match_operand:SI 4 "const_int_operand" "n"))
-		   (match_operand:V           3 "register_operand"  "0")]
-		  UNSPEC_VEC_SET))]
+  [(set (match_operand:V             0 "register_operand" "=v")
+	(vec_merge:V
+	  (vec_duplicate:V
+	    (match_operand:<non_vec> 1 "general_operand" "d"))
+	  (match_operand:V           3 "register_operand" "0")
+	  (plus:SI
+	   (match_operand:SI         2 "register_operand"  "a")
+	   (match_operand:SI         4 "const_int_operand" "n"))))]
   "TARGET_VX"
   "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
   [(set_attr "op_type" "VRS")])
@@ -575,7 +583,7 @@  (define_insn "*vec_splat<mode>"
 	  (match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
 	  (parallel
 	   [(match_operand:QI 2 "const_mask_operand" "C")]))))]
-  "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
+  "TARGET_VX && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<MODE>mode)"
   "vrep<bhfgq>\t%v0,%v1,%2"
   [(set_attr "op_type" "VRI")])

@@ -678,13 +686,18 @@  (define_split
 	(vec_duplicate:V_128_NOSINGLE (match_operand:<non_vec> 1
"register_operand" "")))]
   "TARGET_VX && GENERAL_REG_P (operands[1])"
   [(set (match_dup 0)
-	(unspec:V_128_NOSINGLE [(match_dup 1) (match_dup 2) (match_dup 0)]
UNSPEC_VEC_SET))
+    (vec_merge:V_128_NOSINGLE
+      (vec_duplicate:V_128_NOSINGLE
+	(match_dup 1))
+      (match_dup 0)
+      (match_dup 2)))
    (set (match_dup 0)
 	(vec_duplicate:V_128_NOSINGLE
 	 (vec_select:<non_vec>
-	  (match_dup 0) (parallel [(match_dup 2)]))))]
+	  (match_dup 0) (parallel [(match_dup 3)]))))]
 {
-  operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+  operands[2] = GEN_INT (1 << (GET_MODE_NUNITS (<MODE>mode) - 1));
+  operands[3] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
 })

 (define_predicate "vcond_comparison_operator"
@@ -1136,9 +1149,12 @@  (define_expand "popcountv8hi2_vx"
    (set (match_dup 3) (match_dup 2))
    ; Generate the shift count operand in a VR (8->byte 7)
    (set (match_dup 4) (match_dup 5))
-   (set (match_dup 4) (unspec:V16QI [(const_int 8)
-				     (const_int 7)
-				     (match_dup 4)] UNSPEC_VEC_SET))
+   (set (match_dup 4)
+      (vec_merge:V16QI
+	(vec_duplicate:V16QI
+	 (const_int 8))
+	(match_dup 4)
+	(const_int 128)))
    ; Vector shift right logical by one byte
    (set (match_dup 3)
 	(unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
@@ -1339,10 +1355,11 @@  (define_insn "*vec_slb<mode>"
 ; this means it is a left shift on BE targets!
 (define_expand "vec_shr_<mode>"
   [(set (match_dup 3)
-	(unspec:V16QI [(match_operand:SI 2 "const_shift_by_byte_operand" "")
-		   (const_int 7)
-		   (match_dup 3)]
-		   UNSPEC_VEC_SET))
+	(vec_merge:V16QI
+	  (vec_duplicate:V16QI
+	    (match_operand:SI 2 "const_shift_by_byte_operand" ""))
+	  (match_dup 3)
+	  (const_int 128)))
    (set (match_operand:V_128 0 "register_operand" "")
 	(unspec:V_128 [(match_operand:V_128 1 "register_operand" "")
 		    (match_dup 3)]
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 01dfde438af5..2bdd694112a1 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -150,22 +150,25 @@  (define_expand "vec_splats<mode>"
 	(vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand"  "")))]
   "TARGET_VX")

+
 (define_expand "vec_insert<mode>"
-  [(set (match_operand:VEC_HW                    0 "register_operand" "")
-	(unspec:VEC_HW [(match_operand:<non_vec> 2 "register_operand" "")
-			(match_operand:SI        3 "nonmemory_operand" "")
-			(match_operand:VEC_HW    1 "register_operand" "")]
-		       UNSPEC_VEC_SET))]
+  [(set (match_operand:VEC_HW        0 "register_operand" "")
+	(vec_merge:VEC_HW
+	  (vec_duplicate:VEC_HW
+	    (match_operand:<non_vec> 2 "register_operand" ""))
+	  (match_operand:VEC_HW      1 "register_operand" "")
+	  (match_operand:SI          3 "nonmemory_operand" "")))]
   "TARGET_VX"
   "")

 ; This is vec_set + modulo arithmetic on the element selector (op 2)
 (define_expand "vec_promote<mode>"
-  [(set (match_operand:VEC_HW                    0 "register_operand" "")
-	(unspec:VEC_HW [(match_operand:<non_vec> 1 "register_operand" "")
-			(match_operand:SI        2 "nonmemory_operand" "")
-			(match_dup 0)]
-		       UNSPEC_VEC_SET))]
+  [(set (match_operand:VEC_HW        0 "register_operand" "")
+	(vec_merge:VEC_HW
+	  (vec_duplicate:VEC_HW
+	    (match_operand:<non_vec> 1 "register_operand" ""))
+	  (match_dup 0)
+	  (match_operand:SI          2 "nonmemory_operand" "")))]
   "TARGET_VX"
   "")

@@ -457,11 +460,11 @@  (define_insn "vec_scatter_element<V_HW_2:mode>_SI"
   [(set (mem:<non_vec>
 	 (plus:SI (subreg:SI
 		   (vec_select:<non_vec_int>
-		    (match_operand:V_HW_2 1 "register_operand"   "v")
-		    (parallel [(match_operand:QI     3 "const_mask_operand" "C")])) 4)
-	  (match_operand:SI               2 "address_operand"   "ZQ")))
+		    (match_operand:<TOINTVEC>	  1 "register_operand"   "v")
+		    (parallel [(match_operand:QI  3 "const_mask_operand" "C")])) 4)
+	  (match_operand:SI			  2 "address_operand"   "ZQ")))
     (vec_select:<non_vec>
-     (match_operand:V_HW_2                0 "register_operand" "v")
+     (match_operand:V_HW_2			  0 "register_operand" "v")
      (parallel [(match_dup 3)])))]
   "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) <
GET_MODE_NUNITS (<V_HW_2:MODE>mode)"