x86_64: Add insn patterns for V1TI mode logic operations.

  On x86_64, V1TI mode holds a 128-bit integer value in a (vector) SSE
register (where regular TI mode uses a pair of 64-bit general purpose
scalar registers).  This patch improves the implementation of AND, IOR,
XOR and NOT on these values.

The benefit is demonstrated by the following simple test program:

typedef unsigned __int128 v1ti __attribute__ ((__vector_size__ (16)));
v1ti and(v1ti x, v1ti y) { return x & y; }
v1ti ior(v1ti x, v1ti y) { return x | y; }
v1ti xor(v1ti x, v1ti y) { return x ^ y; }
v1ti not(v1ti x) { return ~x; }

For which GCC currently generates the rather large:

and:    movdqa  %xmm0, %xmm2
        movq    %xmm1, %rdx
        movq    %xmm0, %rax
        andq    %rdx, %rax
        movhlps %xmm2, %xmm3
        movhlps %xmm1, %xmm4
        movq    %rax, %xmm0
        movq    %xmm4, %rdx
        movq    %xmm3, %rax
        andq    %rdx, %rax
        movq    %rax, %xmm5
        punpcklqdq      %xmm5, %xmm0
        ret

ior:    movdqa  %xmm0, %xmm2
        movq    %xmm1, %rdx
        movq    %xmm0, %rax
        orq     %rdx, %rax
        movhlps %xmm2, %xmm3
        movhlps %xmm1, %xmm4
        movq    %rax, %xmm0
        movq    %xmm4, %rdx
        movq    %xmm3, %rax
        orq     %rdx, %rax
        movq    %rax, %xmm5
        punpcklqdq      %xmm5, %xmm0
        ret

xor:    movdqa  %xmm0, %xmm2
        movq    %xmm1, %rdx
        movq    %xmm0, %rax
        xorq    %rdx, %rax
        movhlps %xmm2, %xmm3
        movhlps %xmm1, %xmm4
        movq    %rax, %xmm0
        movq    %xmm4, %rdx
        movq    %xmm3, %rax
        xorq    %rdx, %rax
        movq    %rax, %xmm5
        punpcklqdq      %xmm5, %xmm0
        ret

not:    movdqa  %xmm0, %xmm1
        movq    %xmm0, %rax
        notq    %rax
        movhlps %xmm1, %xmm2
        movq    %rax, %xmm0
        movq    %xmm2, %rax
        notq    %rax
        movq    %rax, %xmm3
        punpcklqdq      %xmm3, %xmm0
        ret

with this patch we now generate the much more efficient:

and:    pand    %xmm1, %xmm0
        ret

ior:    por     %xmm1, %xmm0
        ret

xor:    pxor    %xmm1, %xmm0
        ret

not:    pcmpeqd %xmm1, %xmm1
        pxor    %xmm1, %xmm0
        ret

For my first few attempts at this patch I tried adding V1TI to the
existing VI and VI12_AVX_512F mode iterators, but these then have
dependencies on other iterators (and attributes), and so on until
everything ties itself into a knot, as V1TI mode isn't really a
first-class vector mode on x86_64.  Hence I ultimately opted to use
simple stand-alone patterns (as used by the existing TF mode support).

This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap"
and "make -k check" with no new failures.  Ok for mainline?

2021-10-22  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/sse.md (<any_logic>v1ti3): New define_insn to
	implement V1TImode AND, IOR and XOR on TARGET_SSE2 (and above).
	(one_cmplv1ti2): New define expand.

gcc/testsuite/ChangeLog
	* gcc.target/i386/sse2-v1ti-logic.c: New test case.
	* gcc.target/i386/sse2-v1ti-logic-2.c: New test case.

Thanks in advance,
Roger
--

/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target sse2 } */

typedef unsigned __int128 v1ti __attribute__ ((__vector_size__ (16)));

v1ti and(v1ti x, v1ti y)
{
  return x & y;
}

v1ti ior(v1ti x, v1ti y)
{
  return x | y;
}

v1ti xor(v1ti x, v1ti y)
{
  return x ^ y;
}

v1ti not(v1ti x)
{
  return ~x;
}

/* { dg-final { scan-assembler "pand" } } */
/* { dg-final { scan-assembler "por" } } */
/* { dg-final { scan-assembler-times "pxor" 2 } } */
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target sse2 } */

typedef unsigned __int128 v1ti __attribute__ ((__vector_size__ (16)));

v1ti x;
v1ti y;
v1ti z;

void and2()
{
  x &= y;
}

void and3()
{
  x = y & z;
}

void ior2()
{
  x |= y;
}

void ior3()
{
  x = y | z;
}

void xor2()
{
  x ^= y;
}

void xor3()
{
  x =  y ^ z;
}

void not1()
{
  x = ~x;
}

void not2()
{
  x = ~y;
}

/* { dg-final { scan-assembler-times "pand" 2 } } */
/* { dg-final { scan-assembler-times "por" 2 } } */
/* { dg-final { scan-assembler-times "pxor" 4 } } */

Message ID	002f01d7c715$1cc96400$565c2c00$@nextmovesoftware.com
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 779CA3858416 From: "Roger Sayle" <roger@nextmovesoftware.com> To: "'GCC Patches'" <gcc-patches@gcc.gnu.org> Subject: [PATCH] x86_64: Add insn patterns for V1TI mode logic operations. Date: Fri, 22 Oct 2021 08:19:10 +0100 Message-ID: <002f01d7c715$1cc96400$565c2c00$@nextmovesoftware.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----=_NextPart_000_0030_01D7C71D.7E9015F0" Thread-Index: AdfHFFoya7hYWHs0SFmGK5ZMxSkfDA== Content-Language: en-gb Precedence: list Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
Series	x86_64: Add insn patterns for V1TI mode logic operations. \| x86_64: Add insn patterns for V1TI mode logic operations.

x86_64: Add insn patterns for V1TI mode logic operations.

Commit Message

Comments

Patch