PR middle-end/95126: Expand small const structs as immediate constants.

Message ID 00d801d82b58$bccc1f40$36645dc0$@nextmovesoftware.com
State New
Headers
Series PR middle-end/95126: Expand small const structs as immediate constants. |

Commit Message

Roger Sayle Feb. 26, 2022, 9:35 p.m. UTC
  This patch resolves PR middle-end/95126 which is a code quality regression,
by teaching the RTL expander to emit small const structs/unions as integer
immediate constants.

The motivating example from the bugzilla PR is:

struct small{ short a,b; signed char c; };
extern int func(struct small X);
void call_func(void)
{
    static struct small const s = { 1, 2, 0 };
    func(s);
}

which on x86_64 is currently compiled to:

call_func:
        movzwl  s.0+2(%rip), %eax
        movzwl  s.0(%rip), %edx
        movzwl  s.0+4(%rip), %edi
        salq    $16, %rax
        orq     %rdx, %rax
        salq    $32, %rdi
        orq     %rax, %rdi
        jmp     func

but with this patch is now optimized to:

call_func:
        movl    $131073, %edi
        jmp     func


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  Ok for mainline?


2022-02-26  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	PR middle-end/95126
	* calls.cc (load_register_parameters): When loading a suitable
	immediate_const_ctor_p VAR_DECL into a single word_mode register,
	construct it directly in a pseudo rather than read it (by parts)
	from memory.
	* expr.cc (int_expr_size): Make tree argument a const_tree.
	(immediate_const_ctor_p): Helper predicate.  Return true for
	simple constructors that may be materialized in a register.
	(expand_expr_real_1) [VAR_DECL]: When expanding a constant
	VAR_DECL with a suitable immediate_const_ctor_p constructor
	use store_constructor to materialize it directly in a pseudo.
	* expr.h (immediate_const_ctor_p): Prototype here.
	* varasm.cc (initializer_constant_valid_for_bitfield_p): Change
	VALUE argument from tree to const_tree.
	* varasm.h (initializer_constant_valid_for_bitfield_p): Update
	prototype.

gcc/testsuite/ChangeLog
	PR middle-end/95126
	* gcc.target/i386/pr95126-m32-1.c: New test case.
	* gcc.target/i386/pr95126-m32-2.c: New test case.
	* gcc.target/i386/pr95126-m32-3.c: New test case.
	* gcc.target/i386/pr95126-m32-4.c: New test case.
	* gcc.target/i386/pr95126-m64-1.c: New test case.
	* gcc.target/i386/pr95126-m64-2.c: New test case.
	* gcc.target/i386/pr95126-m64-3.c: New test case.
	* gcc.target/i386/pr95126-m64-4.c: New test case.


Thanks in advance,
Roger
--
  

Comments

Jeff Law June 1, 2022, 9:16 p.m. UTC | #1
On 2/26/2022 2:35 PM, Roger Sayle wrote:
> This patch resolves PR middle-end/95126 which is a code quality regression,
> by teaching the RTL expander to emit small const structs/unions as integer
> immediate constants.
>
> The motivating example from the bugzilla PR is:
>
> struct small{ short a,b; signed char c; };
> extern int func(struct small X);
> void call_func(void)
> {
>      static struct small const s = { 1, 2, 0 };
>      func(s);
> }
>
> which on x86_64 is currently compiled to:
>
> call_func:
>          movzwl  s.0+2(%rip), %eax
>          movzwl  s.0(%rip), %edx
>          movzwl  s.0+4(%rip), %edi
>          salq    $16, %rax
>          orq     %rdx, %rax
>          salq    $32, %rdi
>          orq     %rax, %rdi
>          jmp     func
>
> but with this patch is now optimized to:
>
> call_func:
>          movl    $131073, %edi
>          jmp     func
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  Ok for mainline?
>
>
> 2022-02-26  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
> 	PR middle-end/95126
> 	* calls.cc (load_register_parameters): When loading a suitable
> 	immediate_const_ctor_p VAR_DECL into a single word_mode register,
> 	construct it directly in a pseudo rather than read it (by parts)
> 	from memory.
> 	* expr.cc (int_expr_size): Make tree argument a const_tree.
> 	(immediate_const_ctor_p): Helper predicate.  Return true for
> 	simple constructors that may be materialized in a register.
> 	(expand_expr_real_1) [VAR_DECL]: When expanding a constant
> 	VAR_DECL with a suitable immediate_const_ctor_p constructor
> 	use store_constructor to materialize it directly in a pseudo.
> 	* expr.h (immediate_const_ctor_p): Prototype here.
> 	* varasm.cc (initializer_constant_valid_for_bitfield_p): Change
> 	VALUE argument from tree to const_tree.
> 	* varasm.h (initializer_constant_valid_for_bitfield_p): Update
> 	prototype.
>
> gcc/testsuite/ChangeLog
> 	PR middle-end/95126
> 	* gcc.target/i386/pr95126-m32-1.c: New test case.
> 	* gcc.target/i386/pr95126-m32-2.c: New test case.
> 	* gcc.target/i386/pr95126-m32-3.c: New test case.
> 	* gcc.target/i386/pr95126-m32-4.c: New test case.
> 	* gcc.target/i386/pr95126-m64-1.c: New test case.
> 	* gcc.target/i386/pr95126-m64-2.c: New test case.
> 	* gcc.target/i386/pr95126-m64-3.c: New test case.
> 	* gcc.target/i386/pr95126-m64-4.c: New test case.
OK after a fresh bootstrap & regression test.  Sorry for the long wait.

jeff
  

Patch

diff --git a/gcc/calls.cc b/gcc/calls.cc
index e64a937..3417005 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -2093,7 +2093,8 @@  load_register_parameters (struct arg_data *args, int num_actuals,
 	  poly_int64 size = 0;
 	  HOST_WIDE_INT const_size = 0;
 	  rtx_insn *before_arg = get_last_insn ();
-	  tree type = TREE_TYPE (args[i].tree_value);
+	  tree tree_value = args[i].tree_value;
+	  tree type = TREE_TYPE (tree_value);
 	  if (RECORD_OR_UNION_TYPE_P (type) && TYPE_TRANSPARENT_AGGR (type))
 	    type = TREE_TYPE (first_field (type));
 	  /* Set non-negative if we must move a word at a time, even if
@@ -2170,6 +2171,24 @@  load_register_parameters (struct arg_data *args, int num_actuals,
 	      emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg) + j),
 			      args[i].aligned_regs[j]);
 
+	  /* If we need a single register and the source is a constant
+	     VAR_DECL with a simple constructor, expand that constructor
+	     via a pseudo rather than read from (possibly misaligned)
+	     memory.  PR middle-end/95126.  */
+	  else if (nregs == 1
+		   && partial == 0
+		   && !args[i].pass_on_stack
+		   && VAR_P (tree_value)
+		   && TREE_READONLY (tree_value)
+		   && !TREE_SIDE_EFFECTS (tree_value)
+		   && immediate_const_ctor_p (DECL_INITIAL (tree_value)))
+	    {
+	      rtx target = gen_reg_rtx (word_mode);
+	      rtx x = expand_expr (DECL_INITIAL (tree_value),
+				   target, word_mode, EXPAND_NORMAL);
+	      reg = gen_rtx_REG (word_mode, REGNO (reg));
+	      emit_move_insn (reg, x);
+	    }
 	  else if (partial == 0 || args[i].pass_on_stack)
 	    {
 	      /* SIZE and CONST_SIZE are 0 for partial arguments and
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 35e4029..b6d8d8c 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -99,7 +99,7 @@  static void do_tablejump (rtx, machine_mode, rtx, rtx, rtx,
 			  profile_probability);
 static rtx const_vector_from_tree (tree);
 static tree tree_expr_size (const_tree);
-static HOST_WIDE_INT int_expr_size (tree);
+static HOST_WIDE_INT int_expr_size (const_tree);
 static void convert_mode_scalar (rtx, rtx, int);
 
 
@@ -4866,7 +4866,22 @@  emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
 		    return false;
 		}
 	    }
-	  emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM);
+
+	  /* If source is a constant VAR_DECL with a simple constructor,
+             store the constructor to the stack instead of moving it.  */
+	  const_tree decl;
+	  if (partial == 0
+	      && MEM_P (xinner)
+	      && SYMBOL_REF_P (XEXP (xinner, 0))
+	      && (decl = SYMBOL_REF_DECL (XEXP (xinner, 0))) != NULL_TREE
+	      && VAR_P (decl)
+	      && TREE_READONLY (decl)
+	      && !TREE_SIDE_EFFECTS (decl)
+	      && immediate_const_ctor_p (DECL_INITIAL (decl), 2))
+	    store_constructor (DECL_INITIAL (decl), target, 0,
+			       int_expr_size (DECL_INITIAL (decl)), false);
+	  else
+	    emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM);
 	}
     }
   else if (partial > 0)
@@ -6575,6 +6590,25 @@  categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
 				     p_init_elts, p_complete);
 }
 
+/* Return true if constructor CTOR is simple enough to be materialized
+   in an integer mode register.  Limit the size to WORDS words, which
+   is 1 by default.  */
+
+bool
+immediate_const_ctor_p (const_tree ctor, unsigned int words)
+{
+  /* Allow function to be called with a VAR_DECL's DECL_INITIAL.  */
+  if (!ctor || TREE_CODE (ctor) != CONSTRUCTOR)
+    return false;
+
+  return TREE_CONSTANT (ctor)
+	 && !TREE_ADDRESSABLE (ctor)
+	 && CONSTRUCTOR_NELTS (ctor)
+	 && TREE_CODE (TREE_TYPE (ctor)) != ARRAY_TYPE
+	 && int_expr_size (ctor) <= words * UNITS_PER_WORD
+	 && initializer_constant_valid_for_bitfield_p (ctor);
+}
+
 /* TYPE is initialized by a constructor with NUM_ELTS elements, the last
    of which had type LAST_TYPE.  Each element was itself a complete
    initializer, in the sense that every meaningful byte was explicitly
@@ -10534,6 +10568,21 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	  if (temp)
 	    return temp;
 	}
+      /* Expand const VAR_DECLs with CONSTRUCTOR initializers that
+	 have scalar integer modes to a reg via store_constructor.  */
+      if (TREE_READONLY (exp)
+	  && !TREE_SIDE_EFFECTS (exp)
+	  && (modifier == EXPAND_NORMAL || modifier == EXPAND_STACK_PARM)
+	  && immediate_const_ctor_p (DECL_INITIAL (exp))
+	  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (exp)))
+	  && crtl->emit.regno_pointer_align_length
+	  && !target)
+	{
+	  target = gen_reg_rtx (TYPE_MODE (TREE_TYPE (exp)));
+	  store_constructor (DECL_INITIAL (exp), target, 0,
+			     int_expr_size (DECL_INITIAL (exp)), false);
+	  return target;
+	}
       /* ... fall through ...  */
 
     case PARM_DECL:
@@ -13128,7 +13177,7 @@  expr_size (tree exp)
    if the size can vary or is larger than an integer.  */
 
 static HOST_WIDE_INT
-int_expr_size (tree exp)
+int_expr_size (const_tree exp)
 {
   tree size;
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 7e5cf49..d777c28 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -338,6 +338,7 @@  extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
 				      HOST_WIDE_INT *, HOST_WIDE_INT *,
 				      bool *);
+extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1);
 
 extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
 			     enum expand_modifier);
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index d3d9daf..a00f11d 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -5069,7 +5069,7 @@  initializer_constant_valid_p (tree value, tree endtype, bool reverse)
    an element of a "constant" initializer.  */
 
 bool
-initializer_constant_valid_for_bitfield_p (tree value)
+initializer_constant_valid_for_bitfield_p (const_tree value)
 {
   /* For bitfields we support integer constants or possibly nested aggregates
      of such.  */
@@ -5078,7 +5078,7 @@  initializer_constant_valid_for_bitfield_p (tree value)
     case CONSTRUCTOR:
       {
 	unsigned HOST_WIDE_INT idx;
-	tree elt;
+	const_tree elt;
 
 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (value), idx, elt)
 	  if (!initializer_constant_valid_for_bitfield_p (elt))
diff --git a/gcc/varasm.h b/gcc/varasm.h
index d5d8c4e..fc6c753 100644
--- a/gcc/varasm.h
+++ b/gcc/varasm.h
@@ -65,7 +65,7 @@  extern tree initializer_constant_valid_p (tree, tree, bool = false);
 /* Return true if VALUE is a valid constant-valued expression
    for use in initializing a static bit-field; one that can be
    an element of a "constant" initializer.  */
-extern bool initializer_constant_valid_for_bitfield_p (tree);
+extern bool initializer_constant_valid_for_bitfield_p (const_tree);
 
 /* Whether a constructor CTOR is a valid static constant initializer if all
    its elements are.  This used to be internal to initializer_constant_valid_p
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m32-1.c b/gcc/testsuite/gcc.target/i386/pr95126-m32-1.c
new file mode 100644
index 0000000..1d6acd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m32-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; signed char c; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 1,2,0 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$" } } */
+/* { dg-final { scan-assembler "movb\[ \\t]*\\\$0, " } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m32-2.c b/gcc/testsuite/gcc.target/i386/pr95126-m32-2.c
new file mode 100644
index 0000000..b46be9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m32-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; signed char c; };
+static const struct small s = { 1,2,0 };
+extern int func(struct small X);
+
+void call_func(void)
+{
+  func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$" } } */
+/* { dg-final { scan-assembler "movb\[ \\t]*\\\$0, " } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m32-3.c b/gcc/testsuite/gcc.target/i386/pr95126-m32-3.c
new file mode 100644
index 0000000..cc2fe94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m32-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 2 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "pushl\[ \\t]*\\\$2" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m32-4.c b/gcc/testsuite/gcc.target/i386/pr95126-m32-4.c
new file mode 100644
index 0000000..e829335
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m32-4.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 1,2 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "pushl\[ \\t]*\\\$131073" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m64-1.c b/gcc/testsuite/gcc.target/i386/pr95126-m64-1.c
new file mode 100644
index 0000000..d5c6dded
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m64-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; signed char c; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 1,2,0 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$131073, " } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "salq" } } */
+/* { dg-final { scan-assembler-not "orq" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m64-2.c b/gcc/testsuite/gcc.target/i386/pr95126-m64-2.c
new file mode 100644
index 0000000..0230ffc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m64-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; signed char c; };
+static const struct small s = { 1,2,0 };
+extern int func(struct small X);
+
+void call_func(void)
+{
+  func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$131073, " } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "salq" } } */
+/* { dg-final { scan-assembler-not "orq" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m64-3.c b/gcc/testsuite/gcc.target/i386/pr95126-m64-3.c
new file mode 100644
index 0000000..25afe3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m64-3.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 2 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$2, " } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95126-m64-4.c b/gcc/testsuite/gcc.target/i386/pr95126-m64-4.c
new file mode 100644
index 0000000..71c7908
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95126-m64-4.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+struct small{ short a,b; };
+
+void call_func(void)
+{
+    extern int func(struct small X);
+    static struct small const s = { 1,2 };
+    func(s);
+}
+
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$131073, " } } */