[committed] i386: Implement .SAT_TRUNC for unsigned integers

Message ID CAFULd4YS=YpMxEuGgUU4CjOh4+y_-v9=NwGj+HPTLcwWEwjrdw@mail.gmail.com
State New
Headers
Series [committed] i386: Implement .SAT_TRUNC for unsigned integers |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 warning Patch is already merged
linaro-tcwg-bot/tcwg_gcc_check--master-arm warning Patch is already merged

Commit Message

Uros Bizjak July 9, 2024, 3:41 p.m. UTC
  The following testcase:

unsigned short foo (unsigned int x)
{
  _Bool overflow = x > (unsigned int)(unsigned short)(-1);
  return ((unsigned short)x | (unsigned short)-overflow);
}

currently compiles (-O2) to:

foo:
    xorl    %eax, %eax
    cmpl    $65535, %edi
    seta    %al
    negl    %eax
    orl    %edi, %eax
    ret

We can expand through ustrunc{m}{n}2 optab to use carry flag from the
comparison and generate code using SBB:

foo:
    cmpl    $65535, %edi
    sbbl    %eax, %eax
    orl    %edi, %eax
    ret

or CMOV instruction:

foo:
    movl    $65535, %eax
    cmpl    %eax, %edi
    cmovnc    %edi, %eax
    ret

gcc/ChangeLog:

    * config/i386/i386.md (@cmp<mode>_1): Use SWI mode iterator.
    (ustruncdi<mode>2): New expander.
    (ustruncsi<mode>2): Ditto.
    (ustrunchiqi2): Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/sattrunc-1.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
  

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 214cb2e239a..e2f30695d70 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1533,8 +1533,8 @@  (define_insn "@ccmp<mode>"
 
 (define_expand "@cmp<mode>_1"
   [(set (reg:CC FLAGS_REG)
-	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
-		    (match_operand:SWI48 1 "<general_operand>")))])
+	(compare:CC (match_operand:SWI 0 "nonimmediate_operand")
+		    (match_operand:SWI 1 "<general_operand>")))])
 
 (define_mode_iterator SWI1248_AVX512BWDQ_64
   [(QI "TARGET_AVX512DQ") HI
@@ -9981,6 +9981,114 @@  (define_expand "ussub<mode>3"
   DONE;
 })
 
+(define_expand "ustruncdi<mode>2"
+  [(set (match_operand:SWI124 0 "register_operand")
+	(us_truncate:DI (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT"
+{
+  rtx op1 = force_reg (DImode, operands[1]);
+  rtx sat = force_reg (DImode, GEN_INT (GET_MODE_MASK (<MODE>mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpdi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+    {
+      rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			     const0_rtx);
+
+      dst = force_reg (<MODE>mode, operands[0]);
+      emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+			      gen_lowpart (SImode, op1),
+			      gen_lowpart (SImode, sat)));
+    }
+  else
+    {
+      rtx msk = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
+      dst = expand_simple_binop (<MODE>mode, IOR,
+				 gen_lowpart (<MODE>mode, op1), msk,
+				 operands[0], 1, OPTAB_WIDEN);
+    }
+
+  if (!rtx_equal_p (dst, operands[0]))
+    emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustruncsi<mode>2"
+  [(set (match_operand:SWI12 0 "register_operand")
+	(us_truncate:SI (match_operand:SI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (SImode, operands[1]);
+  rtx sat = force_reg (SImode, GEN_INT (GET_MODE_MASK (<MODE>mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpsi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+    {
+      rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			     const0_rtx);
+
+      dst = force_reg (<MODE>mode, operands[0]);
+      emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+			      gen_lowpart (SImode, op1),
+			      gen_lowpart (SImode, sat)));
+    }
+  else
+    {
+      rtx msk = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
+      dst = expand_simple_binop (<MODE>mode, IOR,
+				 gen_lowpart (<MODE>mode, op1), msk,
+				 operands[0], 1, OPTAB_WIDEN);
+    }
+
+  if (!rtx_equal_p (dst, operands[0]))
+    emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustrunchiqi2"
+  [(set (match_operand:QI 0 "register_operand")
+	(us_truncate:HI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (HImode, operands[1]);
+  rtx sat = force_reg (HImode, GEN_INT (GET_MODE_MASK (QImode)));
+  rtx dst;
+
+  emit_insn (gen_cmphi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+    {
+      rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			     const0_rtx);
+
+      dst = force_reg (QImode, operands[0]);
+      emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+			      gen_lowpart (SImode, op1),
+			      gen_lowpart (SImode, sat)));
+    }
+  else
+    {
+      rtx msk = gen_reg_rtx (QImode);
+
+      emit_insn (gen_x86_movqicc_0_m1_neg (msk));
+      dst = expand_simple_binop (QImode, IOR,
+				 gen_lowpart (QImode, op1), msk,
+				 operands[0], 1, OPTAB_WIDEN);
+    }
+
+  if (!rtx_equal_p (dst, operands[0]))
+    emit_move_insn (operands[0], dst);
+  DONE;
+})
+
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "<insn>xf3"
diff --git a/gcc/testsuite/gcc.target/i386/sattrunc-1.c b/gcc/testsuite/gcc.target/i386/sattrunc-1.c
new file mode 100644
index 00000000000..b1116a836dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sattrunc-1.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "sbb|cmov" 6 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "sbb|cmov" 3 { target ia32 } } } */
+
+#include <stdint.h>
+
+#define DEF_SAT_U_TRUNC(WT, NT)			\
+NT sat_u_truc_##WT##_to_##NT (WT x)		\
+{						\
+  _Bool overflow = x > (WT)(NT)(-1);		\
+  return (NT)x | (NT)-overflow;			\
+}
+
+#ifdef __x86_64__
+DEF_SAT_U_TRUNC(uint64_t, uint32_t)
+DEF_SAT_U_TRUNC(uint64_t, uint16_t)
+DEF_SAT_U_TRUNC(uint64_t, uint8_t)
+#endif
+
+DEF_SAT_U_TRUNC(uint32_t, uint16_t)
+DEF_SAT_U_TRUNC(uint32_t, uint8_t)
+
+DEF_SAT_U_TRUNC(uint16_t, uint8_t)