i386: Introduce V2QImode vectorized shifts [PR103861]
Commit Message
Add V2QImode shift operations and split them to synthesized
double HI/LO QImode operations with integer registers.
Also robustify arithmetic split patterns.
2022-01-13 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/103861
* config/i386/i386.md (*ashlqi_ext<mode>_2): New insn pattern.
(*<any_shiftrt:insn>qi_ext<mode>_2): Ditto.
* config/i386/mmx.md (<any_shift:insn>v2qi):
New insn_and_split pattern.
gcc/testsuite/ChangeLog:
PR target/103861
* gcc.target/i386/pr103861.c (shl,ashr,lshr): New tests.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
@@ -12413,6 +12413,54 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*ashlqi_ext<mode>_2"
+ [(set (zero_extract:SWI248
+ (match_operand:SWI248 0 "register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (subreg:SWI248
+ (ashift:QI
+ (subreg:QI
+ (zero_extract:SWI248
+ (match_operand:SWI248 1 "register_operand" "0")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
+ rtx_equal_p (operands[0], operands[1])"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t%h0, %h0";
+
+ default:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{b}\t%h0";
+ else
+ return "sal{b}\t{%2, %h0|%h0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+ (match_operand 2 "const1_operand"))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (ior (eq_attr "type" "alu")
+ (and (eq_attr "type" "ishift")
+ (and (match_operand 2 "const1_operand")
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "QI")])
+
;; See comment above `ashl<mode>3' about how this works.
(define_expand "<insn><mode>3"
@@ -13143,6 +13191,39 @@
(const_string "0")
(const_string "*")))
(set_attr "mode" "<MODE>")])
+
+(define_insn "*<insn>qi_ext<mode>_2"
+ [(set (zero_extract:SWI248
+ (match_operand:SWI248 0 "register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (subreg:SWI248
+ (any_shiftrt:QI
+ (subreg:QI
+ (zero_extract:SWI248
+ (match_operand:SWI248 1 "register_operand" "0")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
+ rtx_equal_p (operands[0], operands[1])"
+{
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "<shift>{b}\t%h0";
+ else
+ return "<shift>{b}\t{%2, %h0|%h0, %2}";
+}
+ [(set_attr "type" "ishift")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (match_operand 2 "const1_operand")
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "QI")])
;; Rotate instructions
@@ -1657,7 +1657,8 @@
(neg:V2QI
(match_operand:V2QI 1 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed"
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && reload_completed"
[(parallel
[(set (strict_low_part (match_dup 0))
(neg:QI (match_dup 1)))
@@ -1683,7 +1684,8 @@
(neg:V2QI
(match_operand:V2QI 1 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed"
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
(minus:V16QI (match_dup 0) (match_dup 1)))]
@@ -1757,7 +1759,8 @@
(match_operand:V2QI 1 "general_reg_operand")
(match_operand:V2QI 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed"
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && reload_completed"
[(parallel
[(set (strict_low_part (match_dup 0))
(plusminus:QI (match_dup 1) (match_dup 2)))
@@ -1790,7 +1793,8 @@
(match_operand:V2QI 1 "sse_reg_operand")
(match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_SSE2 && reload_completed"
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
(plusminus:V16QI (match_dup 1) (match_dup 2)))]
{
@@ -2387,6 +2391,38 @@
(const_string "0")))
(set_attr "mode" "TI")])
+(define_insn_and_split "<insn>v2qi3"
+ [(set (match_operand:V2QI 0 "register_operand" "=Q")
+ (any_shift:V2QI
+ (match_operand:V2QI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "cI")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
+ (subreg:HI
+ (any_shift:QI
+ (subreg:QI
+ (zero_extract:HI (match_dup 4)
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (strict_low_part (match_dup 0))
+ (any_shift:QI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
+ operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
+ operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
+ operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "QI")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral comparisons
@@ -3,6 +3,7 @@
/* { dg-options "-O2 -dp" } */
typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
__v2qi and (__v2qi a, __v2qi b) { return a & b; };
@@ -20,4 +21,10 @@ __v2qi minus (__v2qi a, __v2qi b) { return a - b; };
__v2qi neg (__v2qi a) { return -a; };
+__v2qi shl (__v2qi a, int b) { return a << b; };
+
+__v2qi ashr (__v2qi a, int b) { return a >> b; };
+
+__v2qu lshr (__v2qu a, int b) { return a >> b; };
+
/* { dg-final { scan-assembler-not "insvhi" } } */