i386: Introduce V2QImode vectorized logic [PR103861]

Message ID CAFULd4bZTu5miELBtvwGWWSZX9z-xEvs4bCuC9ySD0C5VXYDNw@mail.gmail.com
State Committed
Commit 708b87dcb6e48cb48d170a4b3625088995377a5c
Headers
Series i386: Introduce V2QImode vectorized logic [PR103861] |

Commit Message

Uros Bizjak Jan. 4, 2022, 6:46 p.m. UTC
  Add V2QImode logic operations with SSE and GP registers and split
them to V4QImode SSE instructions or SImode GP instructions.

The patch also fixes PR target/103900.

2022-01-04  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    PR target/103861
    * config/i386/mmx.md (one_cmplv2qi3): New insn pattern.
    (one_cmplv2qi3 splitters): New post-reload splitters.
    (*andnotv2qi3): New insn pattern.
    (andnotv2qi3 splitters): New post-reload splitters.
    (<any_logic:code>v2qi3): New insn pattern.
    (<any_logic:insn>v2qi3 splitters): New post-reload splitters.

gcc/testsuite/ChangeLog:

    PR target/103861
    * gcc.target/i386/warn-vect-op-2.c: Adjust warnings.
    * gcc.target/i386/pr103900.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
  

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5b33d3cfc1c..fc8ec5e4d49 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2745,6 +2745,45 @@ 
   "TARGET_SSE2"
   "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
 
+(define_insn "one_cmplv2qi2"
+  [(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v")
+	(not:V2QI
+	  (match_operand:V2QI 1 "register_operand" "0,x,v")))]
+  ""
+  "#"
+  [(set_attr "isa" "*,sse2,avx512vl")
+   (set_attr "type" "negnot,sselog,sselog")
+   (set_attr "mode" "SI,TI,TI")])
+
+(define_split
+  [(set (match_operand:V2QI 0 "general_reg_operand")
+	(not:V2QI
+	  (match_operand:V2QI 1 "general_reg_operand")))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(not:SI (match_dup 1)))]
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:V2QI 0 "sse_reg_operand")
+	(not:V2QI
+	  (match_operand:V2QI 1 "sse_reg_operand")))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0)
+	(xor:V4QI
+	  (match_dup 0) (match_dup 1)))]
+{
+  emit_insn
+   (gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])),
+		 CONSTM1_RTX (V16QImode)));
+
+  operands[1] = gen_lowpart (V4QImode, operands[1]);
+  operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
 (define_insn "mmx_andnot<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
 	(and:MMXMODEI
@@ -2775,6 +2814,69 @@ 
    (set_attr "type" "sselog")
    (set_attr "mode" "TI")])
 
+(define_insn "*andnotv2qi3"
+  [(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v")
+        (and:V2QI
+	  (not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v"))
+	  (match_operand:V2QI 2 "register_operand" "r,r,x,x,v")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
+   (set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
+   (set_attr "mode" "SI,SI,TI,TI,TI")])
+
+(define_split
+  [(set (match_operand:V2QI 0 "general_reg_operand")
+        (and:V2QI
+	  (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+	  (match_operand:V2QI 2 "general_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:V2QI 0 "general_reg_operand")
+        (and:V2QI
+	  (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+	  (match_operand:V2QI 2 "general_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+        (not:SI (match_dup 1)))
+   (parallel
+     [(set (match_dup 0)
+	   (and:SI (match_dup 0) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:V2QI 0 "sse_reg_operand")
+        (and:V2QI
+	  (not:V2QI (match_operand:V2QI 1 "sse_reg_operand"))
+	  (match_operand:V2QI 2 "sse_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0)
+	(and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (V4QImode, operands[2]);
+  operands[1] = gen_lowpart (V4QImode, operands[1]);
+  operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
 (define_expand "mmx_<code><mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 	(any_logic:MMXMODEI
@@ -2821,6 +2923,50 @@ 
    (set_attr "type" "sselog")
    (set_attr "mode" "TI")])
 
+(define_insn "<code>v2qi3"
+  [(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v")
+        (any_logic:V2QI
+	  (match_operand:V2QI 1 "register_operand" "%0,0,x,v")
+	  (match_operand:V2QI 2 "register_operand" "r,x,x,v")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+   (set_attr "type" "alu,sselog,sselog,sselog")
+   (set_attr "mode" "SI,TI,TI,TI")])
+
+(define_split
+  [(set (match_operand:V2QI 0 "general_reg_operand")
+        (any_logic:V2QI
+	  (match_operand:V2QI 1 "general_reg_operand")
+	  (match_operand:V2QI 2 "general_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_logic:SI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:V2QI 0 "sse_reg_operand")
+        (any_logic:V2QI
+	  (match_operand:V2QI 1 "sse_reg_operand")
+	  (match_operand:V2QI 2 "sse_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0)
+	(any_logic:V4QI (match_dup 1) (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (V4QImode, operands[2]);
+  operands[1] = gen_lowpart (V4QImode, operands[1]);
+  operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral element swizzling
diff --git a/gcc/testsuite/gcc.target/i386/pr103900.c b/gcc/testsuite/gcc.target/i386/pr103900.c
new file mode 100644
index 00000000000..8793b492a05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103900.c
@@ -0,0 +1,25 @@ 
+/* PR target/103900 */
+/* { dg-do compile } */
+/* { dg-options "-O -fno-tree-dce -fno-tree-dse" } */
+
+typedef unsigned char __attribute__((__vector_size__(2))) T;
+typedef unsigned char __attribute__((__vector_size__(32))) U;
+typedef int __attribute__((__vector_size__(64))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+T foo0_v128u8_0;
+U foo0_v256u8_0;
+T foo0_v16u16_0;
+int foo0_v128u64_0, foo0_v512u64_0;
+
+void
+foo0() {
+  V v512u128_0;
+  T v16u8_0;
+  foo0_v128u64_0 += (short)v16u8_0;
+  T v16u8_1 = ~__builtin_shufflevector(foo0_v128u8_0, foo0_v256u8_0, 0, 5);
+  W v256u128_1;
+  V v512u8_r =
+      foo0_v512u64_0 + v512u128_0;
+  (union {U b;}){}.b + (U)v256u128_1;
+  T v16u8_r = v16u8_0 + v16u8_1 + foo0_v16u16_0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
index 5e378b6bd04..4560f7070bb 100644
--- a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
+++ b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
@@ -14,7 +14,7 @@  int main (int argc, char *argv[])
     v0 + v1,              /* { dg-warning "expanded piecewise" }  */
     v0 - v1,              /* { dg-warning "expanded piecewise" }  */
     v0 > v1,              /* { dg-warning "expanded piecewise" }  */
-    v0 & v1,              /* { dg-warning "expanded in parallel" }  */
+    v0 & v1,              /* { dg-warning "expanded piecewise" }  */
     __builtin_shuffle (v0, v1),        /* { dg-warning "expanded piecewise" }  */
     __builtin_shuffle (v0, v1, v1)     /* { dg-warning "expanded piecewise" }  */
   };