i386: Improve and optimize ix86_expand_sse_movcc

Message ID CAFULd4Z1HHXco2oAAsT6axM6surA6o08n12_2=L7Mq0bC=pyJA@mail.gmail.com
State New
Headers
Series i386: Improve and optimize ix86_expand_sse_movcc |

Commit Message

Uros Bizjak Jan. 15, 2022, 8:01 p.m. UTC
  Modernize ix86_expand_sse_movcc to use expand_simple_{unop,binop}
infrastructure to avoid manual twiddling with output registers.
Also fix a couple of inconsistent vector_all_ones_operand usages,
break a couple of unnecessary else-if chains, eliminate common
subexpressions and do some general code simplifications.

2022-01-15  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    * config/i386/i386-expand.c (ix86_expand_sse_movcc): Use
    expand_simple_unop and expand_simple_binop instead of manually
    constructing NOT, AND and IOR RTXes.  Use vector_all_ones_operand
    consistently.  Eliminate common subexpressions and simplify code.
    * config/i386/sse.md (<any_logic:code><MODEF:mode>3): New expander.
    (<any_logic:code><MODEF:mode>3): Make public.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
  

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index c740d6e5c04..138580da96e 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3781,6 +3781,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 {
   machine_mode mode = GET_MODE (dest);
   machine_mode cmpmode = GET_MODE (cmp);
+  rtx x;
 
   /* Simplify trivial VEC_COND_EXPR to avoid ICE in pr97506.  */
   if (rtx_equal_p (op_true, op_false))
@@ -3789,8 +3790,6 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       return;
     }
 
-  rtx t2, t3, x;
-
   /* If we have an integer mask and FP value then we need
      to cast mask to FP mode.  */
   if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
@@ -3813,12 +3812,14 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 		  ? force_reg (mode, op_false) : op_false);
       if (op_true == CONST0_RTX (mode))
 	{
-	  rtx n = gen_reg_rtx (cmpmode);
 	  if (cmpmode == E_DImode && !TARGET_64BIT)
-	    emit_insn (gen_knotdi (n, cmp));
+	    {
+	      x = gen_reg_rtx (cmpmode);
+	      emit_insn (gen_knotdi (x, cmp));
+	    }
 	  else
-	    emit_insn (gen_rtx_SET (n, gen_rtx_fmt_e (NOT, cmpmode, cmp)));
-	  cmp = n;
+	    x = expand_simple_unop (cmpmode, NOT, cmp, NULL, 1);
+	  cmp = x;
 	  /* Reverse op_true op_false.  */
 	  std::swap (op_true, op_false);
 	}
@@ -3826,22 +3827,24 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       if (mode == HFmode)
 	emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
       else
-	{
-	  rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
-	  emit_insn (gen_rtx_SET (dest, vec_merge));
-	}
+	emit_insn (gen_rtx_SET (dest,
+				gen_rtx_VEC_MERGE (mode,
+						   op_true, op_false, cmp)));
       return;
     }
-  else if (vector_all_ones_operand (op_true, mode)
-	   && op_false == CONST0_RTX (mode))
+
+  if (vector_all_ones_operand (op_true, mode)
+      && op_false == CONST0_RTX (mode))
     {
-      emit_insn (gen_rtx_SET (dest, cmp));
+      emit_move_insn (dest, cmp);
       return;
     }
   else if (op_false == CONST0_RTX (mode))
     {
-      op_true = force_reg (mode, op_true);
-      ix86_emit_vec_binop (AND, mode, dest, cmp, op_true);
+      x = expand_simple_binop (mode, AND, cmp, op_true,
+			       dest, 1, OPTAB_DIRECT);
+      if (x != dest)
+	emit_move_insn (dest, x);
       return;
     }
   else if (op_true == CONST0_RTX (mode))
@@ -3851,13 +3854,16 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       ix86_emit_vec_binop (AND, mode, dest, x, op_false);
       return;
     }
-  else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
+  else if (vector_all_ones_operand (op_true, mode))
     {
-      op_false = force_reg (mode, op_false);
-      ix86_emit_vec_binop (IOR, mode, dest, cmp, op_false);
+      x = expand_simple_binop (mode, IOR, cmp, op_false,
+			       dest, 1, OPTAB_DIRECT);
+      if (x != dest)
+	emit_move_insn (dest, x);
       return;
     }
-  else if (TARGET_XOP)
+
+  if (TARGET_XOP)
     {
       op_true = force_reg (mode, op_true);
 
@@ -3865,16 +3871,17 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	  || !nonimmediate_operand (op_false, mode))
 	op_false = force_reg (mode, op_false);
 
-      emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
-							  op_true,
-							  op_false)));
+      emit_insn (gen_rtx_SET (dest,
+			      gen_rtx_IF_THEN_ELSE (mode, cmp,
+						    op_true, op_false)));
       return;
     }
 
   rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
-  rtx d = dest;
+  machine_mode blend_mode = mode;
 
-  if (!vector_operand (op_true, mode))
+  if (GET_MODE_SIZE (mode) < 16
+      || !vector_operand (op_true, mode))
     op_true = force_reg (mode, op_true);
 
   op_false = force_reg (mode, op_false);
@@ -3883,10 +3890,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
     {
     case E_V2SFmode:
       if (TARGET_SSE4_1)
-	{
-	  gen = gen_mmx_blendvps;
-	  op_true = force_reg (mode, op_true);
-	}
+	gen = gen_mmx_blendvps;
       break;
     case E_V4SFmode:
       if (TARGET_SSE4_1)
@@ -3898,54 +3902,32 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       break;
     case E_SFmode:
       if (TARGET_SSE4_1)
-	{
-	  gen = gen_sse4_1_blendvss;
-	  op_true = force_reg (mode, op_true);
-	}
+	gen = gen_sse4_1_blendvss;
       break;
     case E_DFmode:
       if (TARGET_SSE4_1)
-	{
-	  gen = gen_sse4_1_blendvsd;
-	  op_true = force_reg (mode, op_true);
-	}
+	gen = gen_sse4_1_blendvsd;
       break;
     case E_V8QImode:
     case E_V4HImode:
     case E_V2SImode:
       if (TARGET_SSE4_1)
 	{
-	  op_true = force_reg (mode, op_true);
-
 	  gen = gen_mmx_pblendvb_v8qi;
-	  if (mode != V8QImode)
-	    d = gen_reg_rtx (V8QImode);
-	  op_false = gen_lowpart (V8QImode, op_false);
-	  op_true = gen_lowpart (V8QImode, op_true);
-	  cmp = gen_lowpart (V8QImode, cmp);
+	  blend_mode = V8QImode;
 	}
       break;
     case E_V4QImode:
     case E_V2HImode:
       if (TARGET_SSE4_1)
 	{
-	  op_true = force_reg (mode, op_true);
-
 	  gen = gen_mmx_pblendvb_v4qi;
-	  if (mode != V4QImode)
-	    d = gen_reg_rtx (V4QImode);
-	  op_false = gen_lowpart (V4QImode, op_false);
-	  op_true = gen_lowpart (V4QImode, op_true);
-	  cmp = gen_lowpart (V4QImode, cmp);
+	  blend_mode = V4QImode;
 	}
       break;
     case E_V2QImode:
       if (TARGET_SSE4_1)
-	{
-	  op_true = force_reg (mode, op_true);
-
-	  gen = gen_mmx_pblendvb_v2qi;
-	}
+	gen = gen_mmx_pblendvb_v2qi;
       break;
     case E_V16QImode:
     case E_V8HImode:
@@ -3955,11 +3937,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       if (TARGET_SSE4_1)
 	{
 	  gen = gen_sse4_1_pblendvb;
-	  if (mode != V16QImode)
-	    d = gen_reg_rtx (V16QImode);
-	  op_false = gen_lowpart (V16QImode, op_false);
-	  op_true = gen_lowpart (V16QImode, op_true);
-	  cmp = gen_lowpart (V16QImode, cmp);
+	  blend_mode = V16QImode;
 	}
       break;
     case E_V8SFmode:
@@ -3978,11 +3956,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       if (TARGET_AVX2)
 	{
 	  gen = gen_avx2_pblendvb;
-	  if (mode != V32QImode)
-	    d = gen_reg_rtx (V32QImode);
-	  op_false = gen_lowpart (V32QImode, op_false);
-	  op_true = gen_lowpart (V32QImode, op_true);
-	  cmp = gen_lowpart (V32QImode, cmp);
+	  blend_mode = V32QImode;
 	}
       break;
 
@@ -4014,26 +3988,36 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 
   if (gen != NULL)
     {
-      emit_insn (gen (d, op_false, op_true, cmp));
-      if (d != dest)
-	emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
+      if (blend_mode == mode)
+	x = dest;
+      else
+	{
+	  x = gen_reg_rtx (blend_mode);
+	  op_false = gen_lowpart (blend_mode, op_false);
+	  op_true = gen_lowpart (blend_mode, op_true);
+	  cmp = gen_lowpart (blend_mode, cmp);
+	}
+
+      emit_insn (gen (x, op_false, op_true, cmp));
+
+      if (x != dest)
+	emit_move_insn (dest, gen_lowpart (mode, x));
     }
   else
     {
-      op_true = force_reg (mode, op_true);
-
-      t2 = gen_reg_rtx (mode);
-      if (optimize)
-	t3 = gen_reg_rtx (mode);
-      else
-	t3 = dest;
+      rtx t2, t3;
 
-      ix86_emit_vec_binop (AND, mode, t2, op_true, cmp);
+      t2 = expand_simple_binop (mode, AND, op_true, cmp,
+				NULL, 1, OPTAB_DIRECT);
 
+      t3 = gen_reg_rtx (mode);
       x = gen_rtx_NOT (mode, cmp);
       ix86_emit_vec_binop (AND, mode, t3, x, op_false);
 
-      ix86_emit_vec_binop (IOR, mode, dest, t3, t2);
+      x = expand_simple_binop (mode, IOR, t3, t2,
+			       dest, 1, OPTAB_DIRECT);
+      if (x != dest)
+	emit_move_insn (dest, x);
     }
 }
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0864748875e..50dc5da9a38 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4956,7 +4956,7 @@ 
 	      ]
 	      (const_string "TI")))])
 
-(define_insn "*<code><mode>3"
+(define_insn "<code><mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
 	(any_logic:MODEF
 	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")