i386: Add V2SFmode DIV insn pattern [PR95046, PR103797]
Commit Message
Use V4SFmode "DIVPS X,Y" with [y0, y1, 1.0f, 1.0f] as a divisor
to avoid division by zero.
2021-12-24 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/95046
PR target/103797
* config/i386/mmx.md (divv2sf3): New instruction pattern.
gcc/testsuite/ChangeLog:
PR target/95046
PR target/103797
* gcc.target/i386/pr95046-1.c (test_div): Add.
(dg-options): Add -mno-recip.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
@@ -523,6 +523,26 @@
(set_attr "prefix" "*,orig,vex")
(set_attr "mode" "V2SF,V4SF,V4SF")])
+(define_expand "divv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (div:V2SF (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ rtx op0 = lowpart_subreg (V4SFmode, operands[0],
+ GET_MODE (operands[0]));
+ rtx op1 = lowpart_subreg (V4SFmode, operands[1],
+ GET_MODE (operands[1]));
+ rtx op2 = gen_rtx_VEC_CONCAT (V4SFmode, operands[2],
+ force_reg (V2SFmode, CONST1_RTX (V2SFmode)));
+ rtx tmp = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_rtx_SET (tmp, op2));
+
+ emit_insn (gen_divv4sf3 (op0, op1, tmp));
+ DONE;
+})
+
(define_expand "mmx_<code>v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(smaxmin:V2SF
@@ -1,6 +1,6 @@
/* PR target/95046 */
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -ffast-math -msse2" } */
+/* { dg-options "-O3 -ffast-math -msse2 -mno-recip" } */
float r[2], a[2], b[2];
@@ -32,6 +32,15 @@ test_mult (void)
/* { dg-final { scan-assembler "\tv?mulps" } } */
+void
+test_div (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] / b[i];
+}
+
+/* { dg-final { scan-assembler "\tv?divps" } } */
+
void
test_min (void)
{