[committed] hppa: Add support for 32-bit hppa targets in muldi3 expander

Message ID 4641577f-91ad-82a7-e381-62a0497bc092@bell.net
State Committed
Headers
Series [committed] hppa: Add support for 32-bit hppa targets in muldi3 expander |

Commit Message

John David Anglin Oct. 13, 2021, 4:04 p.m. UTC
  This patches patch allows inlining 64-bit hardware multiplication on 32-bit hppa targets
instead of using __muldi3 from libgcc.  This should improve performance at the expense of
a slight increase in code size.

We need this because I am testing a change to build libgcc with software float and integer
multiplication.

Tested on hppa2.0w-hp-hpux11.11, hppa64-hp-hpux11.11 and hppa-unknown-linux-gnu.  Committed to
all active branches.

Dave
---

Add support for 32-bit hppa targets in muldi3 expander

2021-10-13  John David Anglin  <danglin@gcc.gnu.org>

gcc/ChangeLog:

	* config/pa/pa.md (muldi3): Add support for inlining 64-bit
	multiplication on 32-bit PA 1.1 and 2.0 targets.
  

Patch

diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index b314f96de35..10623dd6fdb 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -5374,32 +5374,38 @@ 
    [(set (match_operand:DI 0 "register_operand" "")
          (mult:DI (match_operand:DI 1 "register_operand" "")
  		 (match_operand:DI 2 "register_operand" "")))]
-  "TARGET_64BIT && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT"
+  "! optimize_size
+   && TARGET_PA_11
+   && ! TARGET_DISABLE_FPREGS
+   && ! TARGET_SOFT_FLOAT"
    "
  {
    rtx low_product = gen_reg_rtx (DImode);
    rtx cross_product1 = gen_reg_rtx (DImode);
    rtx cross_product2 = gen_reg_rtx (DImode);
-  rtx cross_scratch = gen_reg_rtx (DImode);
-  rtx cross_product = gen_reg_rtx (DImode);
    rtx op1l, op1r, op2l, op2r;
-  rtx op1shifted, op2shifted;
-
-  op1shifted = gen_reg_rtx (DImode);
-  op2shifted = gen_reg_rtx (DImode);
-  op1l = gen_reg_rtx (SImode);
-  op1r = gen_reg_rtx (SImode);
-  op2l = gen_reg_rtx (SImode);
-  op2r = gen_reg_rtx (SImode);
-
-  emit_move_insn (op1shifted, gen_rtx_LSHIFTRT (DImode, operands[1],
-						GEN_INT (32)));
-  emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2],
-						GEN_INT (32)));
-  op1r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[1], 4));
-  op2r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[2], 4));
-  op1l = force_reg (SImode, gen_rtx_SUBREG (SImode, op1shifted, 4));
-  op2l = force_reg (SImode, gen_rtx_SUBREG (SImode, op2shifted, 4));
+
+  if (TARGET_64BIT)
+    {
+      rtx op1shifted = gen_reg_rtx (DImode);
+      rtx op2shifted = gen_reg_rtx (DImode);
+
+      emit_move_insn (op1shifted, gen_rtx_LSHIFTRT (DImode, operands[1],
+						    GEN_INT (32)));
+      emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2],
+						    GEN_INT (32)));
+      op1r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[1], 4));
+      op2r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[2], 4));
+      op1l = force_reg (SImode, gen_rtx_SUBREG (SImode, op1shifted, 4));
+      op2l = force_reg (SImode, gen_rtx_SUBREG (SImode, op2shifted, 4));
+    }
+  else
+    {
+      op1r = force_reg (SImode, gen_lowpart (SImode, operands[1]));
+      op2r = force_reg (SImode, gen_lowpart (SImode, operands[2]));
+      op1l = force_reg (SImode, gen_highpart (SImode, operands[1]));
+      op2l = force_reg (SImode, gen_highpart (SImode, operands[2]));
+    }

    /* Emit multiplies for the cross products.  */
    emit_insn (gen_umulsidi3 (cross_product1, op2r, op1l));
@@ -5408,13 +5414,35 @@ 
    /* Emit a multiply for the low sub-word.  */
    emit_insn (gen_umulsidi3 (low_product, copy_rtx (op2r), copy_rtx (op1r)));

-  /* Sum the cross products and shift them into proper position.  */
-  emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2));
-  emit_insn (gen_ashldi3 (cross_product, cross_scratch, GEN_INT (32)));
+  if (TARGET_64BIT)
+    {
+      rtx cross_scratch = gen_reg_rtx (DImode);
+      rtx cross_product = gen_reg_rtx (DImode);

-  /* Add the cross product to the low product and store the result
-     into the output operand .  */
-  emit_insn (gen_adddi3 (operands[0], cross_product, low_product));
+      /* Sum the cross products and shift them into proper position.  */
+      emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2));
+      emit_insn (gen_ashldi3 (cross_product, cross_scratch, GEN_INT (32)));
+
+      /* Add the cross product to the low product and store the result
+	 into the output operand .  */
+      emit_insn (gen_adddi3 (operands[0], cross_product, low_product));
+    }
+  else
+    {
+      rtx cross_scratch = gen_reg_rtx (SImode);
+
+      /* Sum cross products.  */
+      emit_move_insn (cross_scratch,
+		      gen_rtx_PLUS (SImode,
+				    gen_lowpart (SImode, cross_product1),
+				    gen_lowpart (SImode, cross_product2)));
+      emit_move_insn (gen_lowpart (SImode, operands[0]),
+		      gen_lowpart (SImode, low_product));
+      emit_move_insn (gen_highpart (SImode, operands[0]),
+		      gen_rtx_PLUS (SImode,
+				    gen_highpart (SImode, low_product),
+				    cross_scratch));
+    }
    DONE;
  }")