@@ -6157,12 +6157,52 @@ ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
}
}
+/* Expand move of V1TI mode register X to a new TI mode register. */
+static rtx
+ix86_expand_v1ti_to_ti (rtx x)
+{
+ rtx result = gen_reg_rtx (TImode);
+ emit_move_insn (result, gen_lowpart (TImode, x));
+ return result;
+}
+
+/* Expand move of TI mode register X to a new V1TI mode register. */
+static rtx
+ix86_expand_ti_to_v1ti (rtx x)
+{
+ rtx result = gen_reg_rtx (V1TImode);
+ if (TARGET_SSE2)
+ {
+ rtx lo = gen_lowpart (DImode, x);
+ rtx hi = gen_highpart (DImode, x);
+ rtx tmp = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_concatv2di (tmp, lo, hi));
+ emit_move_insn (result, gen_lowpart (V1TImode, tmp));
+ }
+ else
+ emit_move_insn (result, gen_lowpart (V1TImode, x));
+ return result;
+}
+
/* Expand V1TI mode shift (of rtx_code CODE) by constant. */
-void ix86_expand_v1ti_shift (enum rtx_code code, rtx operands[])
+void
+ix86_expand_v1ti_shift (enum rtx_code code, rtx operands[])
{
- HOST_WIDE_INT bits = INTVAL (operands[2]) & 127;
rtx op1 = force_reg (V1TImode, operands[1]);
+ if (!CONST_INT_P (operands[2]))
+ {
+ rtx tmp1 = ix86_expand_v1ti_to_ti (op1);
+ rtx tmp2 = gen_reg_rtx (TImode);
+ emit_insn (code == ASHIFT ? gen_ashlti3 (tmp2, tmp1, operands[2])
+ : gen_lshrti3 (tmp2, tmp1, operands[2]));
+ rtx tmp3 = ix86_expand_ti_to_v1ti (tmp2);
+ emit_move_insn (operands[0], tmp3);
+ return;
+ }
+
+ HOST_WIDE_INT bits = INTVAL (operands[2]) & 127;
+
if (bits == 0)
{
emit_move_insn (operands[0], op1);
@@ -6173,7 +6213,7 @@ void ix86_expand_v1ti_shift (enum rtx_code code, rtx operands[])
{
rtx tmp = gen_reg_rtx (V1TImode);
if (code == ASHIFT)
- emit_insn (gen_sse2_ashlv1ti3 (tmp, op1, GEN_INT (bits)));
+ emit_insn (gen_sse2_ashlv1ti3 (tmp, op1, GEN_INT (bits)));
else
emit_insn (gen_sse2_lshrv1ti3 (tmp, op1, GEN_INT (bits)));
emit_move_insn (operands[0], tmp);
@@ -6228,11 +6268,24 @@ void ix86_expand_v1ti_shift (enum rtx_code code, rtx operands[])
}
/* Expand V1TI mode rotate (of rtx_code CODE) by constant. */
-void ix86_expand_v1ti_rotate (enum rtx_code code, rtx operands[])
+void
+ix86_expand_v1ti_rotate (enum rtx_code code, rtx operands[])
{
- HOST_WIDE_INT bits = INTVAL (operands[2]) & 127;
rtx op1 = force_reg (V1TImode, operands[1]);
+ if (!CONST_INT_P (operands[2]))
+ {
+ rtx tmp1 = ix86_expand_v1ti_to_ti (op1);
+ rtx tmp2 = gen_reg_rtx (TImode);
+ emit_insn (code == ROTATE ? gen_rotlti3 (tmp2, tmp1, operands[2])
+ : gen_rotrti3 (tmp2, tmp1, operands[2]));
+ rtx tmp3 = ix86_expand_ti_to_v1ti (tmp2);
+ emit_move_insn (operands[0], tmp3);
+ return;
+ }
+
+ HOST_WIDE_INT bits = INTVAL (operands[2]) & 127;
+
if (bits == 0)
{
emit_move_insn (operands[0], op1);
@@ -6320,6 +6373,469 @@ void ix86_expand_v1ti_rotate (enum rtx_code code, rtx operands[])
emit_move_insn (operands[0], tmp4);
}
+/* Expand V1TI mode ashiftrt by constant. */
+void
+ix86_expand_v1ti_ashiftrt (rtx operands[])
+{
+ rtx op1 = force_reg (V1TImode, operands[1]);
+
+ if (!CONST_INT_P (operands[2]))
+ {
+ rtx tmp1 = ix86_expand_v1ti_to_ti (op1);
+ rtx tmp2 = gen_reg_rtx (TImode);
+ emit_insn (gen_ashrti3 (tmp2, tmp1, operands[2]));
+ rtx tmp3 = ix86_expand_ti_to_v1ti (tmp2);
+ emit_move_insn (operands[0], tmp3);
+ return;
+ }
+
+ HOST_WIDE_INT bits = INTVAL (operands[2]) & 127;
+
+ if (bits == 0)
+ {
+ emit_move_insn (operands[0], op1);
+ return;
+ }
+
+ if (bits == 127)
+ {
+ /* Two operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp4, gen_lowpart (V1TImode, tmp3));
+ emit_move_insn (operands[0], tmp4);
+ return;
+ }
+
+ if (bits == 64)
+ {
+ /* Three operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, tmp1));
+ emit_move_insn (tmp5, gen_lowpart (V2DImode, tmp3));
+ emit_insn (gen_vec_interleave_highv2di (tmp6, tmp4, tmp5));
+
+ rtx tmp7 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp7, gen_lowpart (V1TImode, tmp6));
+ emit_move_insn (operands[0], tmp7);
+ return;
+ }
+
+ if (bits == 96)
+ {
+ /* Three operations. */
+ rtx tmp3 = gen_reg_rtx (V2DImode);
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp3, gen_lowpart (V2DImode, tmp1));
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, tmp2));
+ emit_insn (gen_vec_interleave_highv2di (tmp5, tmp3, tmp4));
+
+ rtx tmp6 = gen_reg_rtx (V4SImode);
+ rtx tmp7 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp6, gen_lowpart (V4SImode, tmp5));
+ emit_insn (gen_sse2_pshufd (tmp7, tmp6, GEN_INT (0xfd)));
+
+ rtx tmp8 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp8, gen_lowpart (V1TImode, tmp7));
+ emit_move_insn (operands[0], tmp8);
+ return;
+ }
+
+ if (TARGET_AVX2 || TARGET_SSE4_1)
+ {
+ /* Three operations. */
+ if (bits == 32)
+ {
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (31)));
+
+ rtx tmp3 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp3, op1, GEN_INT (32)));
+
+ if (TARGET_AVX2)
+ {
+ rtx tmp4 = gen_reg_rtx (V4SImode);
+ rtx tmp5 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp4, gen_lowpart (V4SImode, tmp3));
+ emit_insn (gen_avx2_pblenddv4si (tmp5, tmp2, tmp4,
+ GEN_INT (7)));
+
+ rtx tmp6 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp6, gen_lowpart (V1TImode, tmp5));
+ emit_move_insn (operands[0], tmp6);
+ }
+ else
+ {
+ rtx tmp4 = gen_reg_rtx (V8HImode);
+ rtx tmp5 = gen_reg_rtx (V8HImode);
+ rtx tmp6 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp4, gen_lowpart (V8HImode, tmp2));
+ emit_move_insn (tmp5, gen_lowpart (V8HImode, tmp3));
+ emit_insn (gen_sse4_1_pblendw (tmp6, tmp4, tmp5,
+ GEN_INT (0x3f)));
+
+ rtx tmp7 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp7, gen_lowpart (V1TImode, tmp6));
+ emit_move_insn (operands[0], tmp7);
+ }
+ return;
+ }
+
+ /* Three operations. */
+ if (bits == 8 || bits == 16 || bits == 24)
+ {
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (bits)));
+
+ rtx tmp3 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp3, op1, GEN_INT (bits)));
+
+ if (TARGET_AVX2)
+ {
+ rtx tmp4 = gen_reg_rtx (V4SImode);
+ rtx tmp5 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp4, gen_lowpart (V4SImode, tmp3));
+ emit_insn (gen_avx2_pblenddv4si (tmp5, tmp2, tmp4,
+ GEN_INT (7)));
+
+ rtx tmp6 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp6, gen_lowpart (V1TImode, tmp5));
+ emit_move_insn (operands[0], tmp6);
+ }
+ else
+ {
+ rtx tmp4 = gen_reg_rtx (V8HImode);
+ rtx tmp5 = gen_reg_rtx (V8HImode);
+ rtx tmp6 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp4, gen_lowpart (V8HImode, tmp2));
+ emit_move_insn (tmp5, gen_lowpart (V8HImode, tmp3));
+ emit_insn (gen_sse4_1_pblendw (tmp6, tmp4, tmp5,
+ GEN_INT (0x3f)));
+
+ rtx tmp7 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp7, gen_lowpart (V1TImode, tmp6));
+ emit_move_insn (operands[0], tmp7);
+ }
+ return;
+ }
+ }
+
+ if (bits > 96)
+ {
+ /* Four operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (bits - 96)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp1, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, tmp2));
+ emit_move_insn (tmp5, gen_lowpart (V2DImode, tmp3));
+ emit_insn (gen_vec_interleave_highv2di (tmp6, tmp4, tmp5));
+
+ rtx tmp7 = gen_reg_rtx (V4SImode);
+ rtx tmp8 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp7, gen_lowpart (V4SImode, tmp6));
+ emit_insn (gen_sse2_pshufd (tmp8, tmp7, GEN_INT (0xfd)));
+
+ rtx tmp9 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp9, gen_lowpart (V1TImode, tmp8));
+ emit_move_insn (operands[0], tmp9);
+ return;
+ }
+
+ if (TARGET_SSE4_1 && (bits == 48 || bits == 80))
+ {
+ /* Four operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp4, op1, GEN_INT (bits)));
+
+ rtx tmp5 = gen_reg_rtx (V8HImode);
+ rtx tmp6 = gen_reg_rtx (V8HImode);
+ rtx tmp7 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp5, gen_lowpart (V8HImode, tmp3));
+ emit_move_insn (tmp6, gen_lowpart (V8HImode, tmp4));
+ emit_insn (gen_sse4_1_pblendw (tmp7, tmp5, tmp6,
+ GEN_INT (bits == 48 ? 0x1f : 0x07)));
+
+ rtx tmp8 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp8, gen_lowpart (V1TImode, tmp7));
+ emit_move_insn (operands[0], tmp8);
+ return;
+ }
+
+ if ((bits & 7) == 0)
+ {
+ /* Five operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp4, op1, GEN_INT (bits)));
+
+ rtx tmp5 = gen_reg_rtx (V1TImode);
+ rtx tmp6 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp5, gen_lowpart (V1TImode, tmp3));
+ emit_insn (gen_sse2_ashlv1ti3 (tmp6, tmp5, GEN_INT (128 - bits)));
+
+ rtx tmp7 = gen_reg_rtx (V2DImode);
+ rtx tmp8 = gen_reg_rtx (V2DImode);
+ rtx tmp9 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp7, gen_lowpart (V2DImode, tmp4));
+ emit_move_insn (tmp8, gen_lowpart (V2DImode, tmp6));
+ emit_insn (gen_iorv2di3 (tmp9, tmp7, tmp8));
+
+ rtx tmp10 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp10, gen_lowpart (V1TImode, tmp9));
+ emit_move_insn (operands[0], tmp10);
+ return;
+ }
+
+ if (TARGET_AVX2 && bits < 32)
+ {
+ /* Six operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (bits)));
+
+ rtx tmp3 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp3, op1, GEN_INT (64)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, op1));
+ emit_insn (gen_lshrv2di3 (tmp5, tmp4, GEN_INT (bits)));
+
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ rtx tmp7 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp6, gen_lowpart (V2DImode, tmp3));
+ emit_insn (gen_ashlv2di3 (tmp7, tmp6, GEN_INT (64 - bits)));
+
+ rtx tmp8 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp8, tmp5, tmp7));
+
+ rtx tmp9 = gen_reg_rtx (V4SImode);
+ rtx tmp10 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp9, gen_lowpart (V4SImode, tmp8));
+ emit_insn (gen_avx2_pblenddv4si (tmp10, tmp2, tmp9, GEN_INT (7)));
+
+ rtx tmp11 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp11, gen_lowpart (V1TImode, tmp10));
+ emit_move_insn (operands[0], tmp11);
+ return;
+ }
+
+ if (TARGET_SSE4_1 && bits < 15)
+ {
+ /* Six operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_ashrv4si3 (tmp2, tmp1, GEN_INT (bits)));
+
+ rtx tmp3 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp3, op1, GEN_INT (64)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, op1));
+ emit_insn (gen_lshrv2di3 (tmp5, tmp4, GEN_INT (bits)));
+
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ rtx tmp7 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp6, gen_lowpart (V2DImode, tmp3));
+ emit_insn (gen_ashlv2di3 (tmp7, tmp6, GEN_INT (64 - bits)));
+
+ rtx tmp8 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp8, tmp5, tmp7));
+
+ rtx tmp9 = gen_reg_rtx (V8HImode);
+ rtx tmp10 = gen_reg_rtx (V8HImode);
+ rtx tmp11 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp9, gen_lowpart (V8HImode, tmp2));
+ emit_move_insn (tmp10, gen_lowpart (V8HImode, tmp8));
+ emit_insn (gen_sse4_1_pblendw (tmp11, tmp9, tmp10, GEN_INT (0x3f)));
+
+ rtx tmp12 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp12, gen_lowpart (V1TImode, tmp11));
+ emit_move_insn (operands[0], tmp12);
+ return;
+ }
+
+ if (bits == 1)
+ {
+ /* Eight operations. */
+ rtx tmp1 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp1, op1, GEN_INT (64)));
+
+ rtx tmp2 = gen_reg_rtx (V2DImode);
+ rtx tmp3 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp2, gen_lowpart (V2DImode, op1));
+ emit_insn (gen_lshrv2di3 (tmp3, tmp2, GEN_INT (1)));
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp4, gen_lowpart (V2DImode, tmp1));
+ emit_insn (gen_ashlv2di3 (tmp5, tmp4, GEN_INT (63)));
+
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp6, tmp3, tmp5));
+
+ rtx tmp7 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_lshrv2di3 (tmp7, tmp2, GEN_INT (63)));
+
+ rtx tmp8 = gen_reg_rtx (V4SImode);
+ rtx tmp9 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp8, gen_lowpart (V4SImode, tmp7));
+ emit_insn (gen_sse2_pshufd (tmp9, tmp8, GEN_INT (0xbf)));
+
+ rtx tmp10 = gen_reg_rtx (V2DImode);
+ rtx tmp11 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp10, gen_lowpart (V2DImode, tmp9));
+ emit_insn (gen_ashlv2di3 (tmp11, tmp10, GEN_INT (31)));
+
+ rtx tmp12 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp12, tmp6, tmp11));
+
+ rtx tmp13 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp13, gen_lowpart (V1TImode, tmp12));
+ emit_move_insn (operands[0], tmp13);
+ return;
+ }
+
+ if (bits > 64)
+ {
+ /* Eight operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp4, op1, GEN_INT (64)));
+
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp5, gen_lowpart (V2DImode, tmp4));
+ emit_insn (gen_lshrv2di3 (tmp6, tmp5, GEN_INT (bits - 64)));
+
+ rtx tmp7 = gen_reg_rtx (V1TImode);
+ rtx tmp8 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp7, gen_lowpart (V1TImode, tmp3));
+ emit_insn (gen_sse2_ashlv1ti3 (tmp8, tmp7, GEN_INT (64)));
+
+ rtx tmp9 = gen_reg_rtx (V2DImode);
+ rtx tmp10 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp9, gen_lowpart (V2DImode, tmp3));
+ emit_insn (gen_ashlv2di3 (tmp10, tmp9, GEN_INT (128 - bits)));
+
+ rtx tmp11 = gen_reg_rtx (V2DImode);
+ rtx tmp12 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp11, gen_lowpart (V2DImode, tmp8));
+ emit_insn (gen_iorv2di3 (tmp12, tmp10, tmp11));
+
+ rtx tmp13 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp13, tmp6, tmp12));
+
+ rtx tmp14 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp14, gen_lowpart (V1TImode, tmp13));
+ emit_move_insn (operands[0], tmp14);
+ }
+ else
+ {
+ /* Nine operations. */
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, gen_lowpart (V4SImode, op1));
+ emit_insn (gen_sse2_pshufd (tmp2, tmp1, GEN_INT (0xff)));
+
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, GEN_INT (31)));
+
+ rtx tmp4 = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp4, op1, GEN_INT (64)));
+
+ rtx tmp5 = gen_reg_rtx (V2DImode);
+ rtx tmp6 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp5, gen_lowpart (V2DImode, op1));
+ emit_insn (gen_lshrv2di3 (tmp6, tmp5, GEN_INT (bits)));
+
+ rtx tmp7 = gen_reg_rtx (V2DImode);
+ rtx tmp8 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp7, gen_lowpart (V2DImode, tmp4));
+ emit_insn (gen_ashlv2di3 (tmp8, tmp7, GEN_INT (64 - bits)));
+
+ rtx tmp9 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp9, tmp6, tmp8));
+
+ rtx tmp10 = gen_reg_rtx (V1TImode);
+ rtx tmp11 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp10, gen_lowpart (V1TImode, tmp3));
+ emit_insn (gen_sse2_ashlv1ti3 (tmp11, tmp10, GEN_INT (64)));
+
+ rtx tmp12 = gen_reg_rtx (V2DImode);
+ rtx tmp13 = gen_reg_rtx (V2DImode);
+ emit_move_insn (tmp12, gen_lowpart (V2DImode, tmp11));
+ emit_insn (gen_ashlv2di3 (tmp13, tmp12, GEN_INT (64 - bits)));
+
+ rtx tmp14 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_iorv2di3 (tmp14, tmp9, tmp13));
+
+ rtx tmp15 = gen_reg_rtx (V1TImode);
+ emit_move_insn (tmp15, gen_lowpart (V1TImode, tmp14));
+ emit_move_insn (operands[0], tmp15);
+ }
+}
+
/* Return mode for the memcpy/memset loop counter. Prefer SImode over
DImode for constant loop counts. */
@@ -161,6 +161,7 @@ extern void ix86_split_ashr (rtx *, rtx, machine_mode);
extern void ix86_split_lshr (rtx *, rtx, machine_mode);
extern void ix86_expand_v1ti_shift (enum rtx_code, rtx[]);
extern void ix86_expand_v1ti_rotate (enum rtx_code, rtx[]);
+extern void ix86_expand_v1ti_ashiftrt (rtx[]);
extern rtx ix86_find_base_term (rtx);
extern bool ix86_check_movabs (rtx, int);
extern bool ix86_check_no_addr_space (rtx);
@@ -15079,8 +15079,8 @@
[(set (match_operand:V1TI 0 "register_operand")
(ashift:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")))]
- "TARGET_SSE2"
+ (match_operand:QI 2 "general_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_shift (ASHIFT, operands);
DONE;
@@ -15090,19 +15090,30 @@
[(set (match_operand:V1TI 0 "register_operand")
(lshiftrt:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")))]
- "TARGET_SSE2"
+ (match_operand:QI 2 "general_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_shift (LSHIFTRT, operands);
DONE;
})
+(define_expand "ashrv1ti3"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (ashiftrt:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:QI 2 "general_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
+{
+ ix86_expand_v1ti_ashiftrt (operands);
+ DONE;
+})
+
(define_expand "rotlv1ti3"
[(set (match_operand:V1TI 0 "register_operand")
(rotate:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")))]
- "TARGET_SSE2"
+ (match_operand:QI 2 "const_int_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_rotate (ROTATE, operands);
DONE;
@@ -15112,8 +15123,8 @@
[(set (match_operand:V1TI 0 "register_operand")
(rotatert:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")))]
- "TARGET_SSE2"
+ (match_operand:QI 2 "const_int_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_rotate (ROTATERT, operands);
DONE;
new file mode 100644
@@ -0,0 +1,167 @@
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+typedef __int128 v1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 ti;
+
+ti ashr(ti x, unsigned int i) { return x >> i; }
+
+v1ti ashr_1(v1ti x) { return x >> 1; }
+v1ti ashr_2(v1ti x) { return x >> 2; }
+v1ti ashr_7(v1ti x) { return x >> 7; }
+v1ti ashr_8(v1ti x) { return x >> 8; }
+v1ti ashr_9(v1ti x) { return x >> 9; }
+v1ti ashr_15(v1ti x) { return x >> 15; }
+v1ti ashr_16(v1ti x) { return x >> 16; }
+v1ti ashr_17(v1ti x) { return x >> 17; }
+v1ti ashr_23(v1ti x) { return x >> 23; }
+v1ti ashr_24(v1ti x) { return x >> 24; }
+v1ti ashr_25(v1ti x) { return x >> 25; }
+v1ti ashr_31(v1ti x) { return x >> 31; }
+v1ti ashr_32(v1ti x) { return x >> 32; }
+v1ti ashr_33(v1ti x) { return x >> 33; }
+v1ti ashr_47(v1ti x) { return x >> 47; }
+v1ti ashr_48(v1ti x) { return x >> 48; }
+v1ti ashr_49(v1ti x) { return x >> 49; }
+v1ti ashr_63(v1ti x) { return x >> 63; }
+v1ti ashr_64(v1ti x) { return x >> 64; }
+v1ti ashr_65(v1ti x) { return x >> 65; }
+v1ti ashr_72(v1ti x) { return x >> 72; }
+v1ti ashr_79(v1ti x) { return x >> 79; }
+v1ti ashr_80(v1ti x) { return x >> 80; }
+v1ti ashr_81(v1ti x) { return x >> 81; }
+v1ti ashr_95(v1ti x) { return x >> 95; }
+v1ti ashr_96(v1ti x) { return x >> 96; }
+v1ti ashr_97(v1ti x) { return x >> 97; }
+v1ti ashr_111(v1ti x) { return x >> 111; }
+v1ti ashr_112(v1ti x) { return x >> 112; }
+v1ti ashr_113(v1ti x) { return x >> 113; }
+v1ti ashr_119(v1ti x) { return x >> 119; }
+v1ti ashr_120(v1ti x) { return x >> 120; }
+v1ti ashr_121(v1ti x) { return x >> 121; }
+v1ti ashr_126(v1ti x) { return x >> 126; }
+v1ti ashr_127(v1ti x) { return x >> 127; }
+
+typedef v1ti (*fun)(v1ti);
+
+struct {
+ unsigned int i;
+ fun ashr;
+} table[35] = {
+ { 1, ashr_1 },
+ { 2, ashr_2 },
+ { 7, ashr_7 },
+ { 8, ashr_8 },
+ { 9, ashr_9 },
+ { 15, ashr_15 },
+ { 16, ashr_16 },
+ { 17, ashr_17 },
+ { 23, ashr_23 },
+ { 24, ashr_24 },
+ { 25, ashr_25 },
+ { 31, ashr_31 },
+ { 32, ashr_32 },
+ { 33, ashr_33 },
+ { 47, ashr_47 },
+ { 48, ashr_48 },
+ { 49, ashr_49 },
+ { 63, ashr_63 },
+ { 64, ashr_64 },
+ { 65, ashr_65 },
+ { 72, ashr_72 },
+ { 79, ashr_79 },
+ { 80, ashr_80 },
+ { 81, ashr_81 },
+ { 95, ashr_95 },
+ { 96, ashr_96 },
+ { 97, ashr_97 },
+ { 111, ashr_111 },
+ { 112, ashr_112 },
+ { 113, ashr_113 },
+ { 119, ashr_119 },
+ { 120, ashr_120 },
+ { 121, ashr_121 },
+ { 126, ashr_126 },
+ { 127, ashr_127 }
+};
+
+void test(ti x)
+{
+ unsigned int i;
+ v1ti t = (v1ti)x;
+
+ for (i=0; i<(sizeof(table)/sizeof(table[0])); i++) {
+ if ((ti)(*table[i].ashr)(t) != ashr(x,table[i].i))
+ __builtin_abort();
+ }
+}
+
+int main()
+{
+ ti x;
+
+ x = ((ti)0x0011223344556677ull)<<64 | 0x8899aabbccddeeffull;
+ test(x);
+ x = ((ti)0xffeeddccbbaa9988ull)<<64 | 0x7766554433221100ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = 0;
+ test(x);
+ x = 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64 | 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0x5a5a5a5a5a5a5a5aull)<<64 | 0x5a5a5a5a5a5a5a5aull;
+ test(x);
+ x = ((ti)0xa5a5a5a5a5a5a5a5ull)<<64 | 0xa5a5a5a5a5a5a5a5ull;
+ test(x);
+ x = 0xffull;
+ test(x);
+ x = 0xff00ull;
+ test(x);
+ x = 0xff0000ull;
+ test(x);
+ x = 0xff000000ull;
+ test(x);
+ x = 0xff00000000ull;
+ test(x);
+ x = 0xff0000000000ull;
+ test(x);
+ x = 0xff000000000000ull;
+ test(x);
+ x = 0xff00000000000000ull;
+ test(x);
+ x = ((ti)0xffull)<<64;
+ test(x);
+ x = ((ti)0xff00ull)<<64;
+ test(x);
+ x = ((ti)0xff0000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000ull)<<64;
+ test(x);
+ x = ((ti)0xff0000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000000000ull)<<64;
+ test(x);
+ x = 0xdeadbeefcafebabeull;
+ test(x);
+ x = ((ti)0xdeadbeefcafebabeull)<<64;
+ test(x);
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,166 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mavx2 " } */
+
+typedef __int128 v1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 ti;
+
+ti ashr(ti x, unsigned int i) { return x >> i; }
+
+v1ti ashr_1(v1ti x) { return x >> 1; }
+v1ti ashr_2(v1ti x) { return x >> 2; }
+v1ti ashr_7(v1ti x) { return x >> 7; }
+v1ti ashr_8(v1ti x) { return x >> 8; }
+v1ti ashr_9(v1ti x) { return x >> 9; }
+v1ti ashr_15(v1ti x) { return x >> 15; }
+v1ti ashr_16(v1ti x) { return x >> 16; }
+v1ti ashr_17(v1ti x) { return x >> 17; }
+v1ti ashr_23(v1ti x) { return x >> 23; }
+v1ti ashr_24(v1ti x) { return x >> 24; }
+v1ti ashr_25(v1ti x) { return x >> 25; }
+v1ti ashr_31(v1ti x) { return x >> 31; }
+v1ti ashr_32(v1ti x) { return x >> 32; }
+v1ti ashr_33(v1ti x) { return x >> 33; }
+v1ti ashr_47(v1ti x) { return x >> 47; }
+v1ti ashr_48(v1ti x) { return x >> 48; }
+v1ti ashr_49(v1ti x) { return x >> 49; }
+v1ti ashr_63(v1ti x) { return x >> 63; }
+v1ti ashr_64(v1ti x) { return x >> 64; }
+v1ti ashr_65(v1ti x) { return x >> 65; }
+v1ti ashr_72(v1ti x) { return x >> 72; }
+v1ti ashr_79(v1ti x) { return x >> 79; }
+v1ti ashr_80(v1ti x) { return x >> 80; }
+v1ti ashr_81(v1ti x) { return x >> 81; }
+v1ti ashr_95(v1ti x) { return x >> 95; }
+v1ti ashr_96(v1ti x) { return x >> 96; }
+v1ti ashr_97(v1ti x) { return x >> 97; }
+v1ti ashr_111(v1ti x) { return x >> 111; }
+v1ti ashr_112(v1ti x) { return x >> 112; }
+v1ti ashr_113(v1ti x) { return x >> 113; }
+v1ti ashr_119(v1ti x) { return x >> 119; }
+v1ti ashr_120(v1ti x) { return x >> 120; }
+v1ti ashr_121(v1ti x) { return x >> 121; }
+v1ti ashr_126(v1ti x) { return x >> 126; }
+v1ti ashr_127(v1ti x) { return x >> 127; }
+
+typedef v1ti (*fun)(v1ti);
+
+struct {
+ unsigned int i;
+ fun ashr;
+} table[35] = {
+ { 1, ashr_1 },
+ { 2, ashr_2 },
+ { 7, ashr_7 },
+ { 8, ashr_8 },
+ { 9, ashr_9 },
+ { 15, ashr_15 },
+ { 16, ashr_16 },
+ { 17, ashr_17 },
+ { 23, ashr_23 },
+ { 24, ashr_24 },
+ { 25, ashr_25 },
+ { 31, ashr_31 },
+ { 32, ashr_32 },
+ { 33, ashr_33 },
+ { 47, ashr_47 },
+ { 48, ashr_48 },
+ { 49, ashr_49 },
+ { 63, ashr_63 },
+ { 64, ashr_64 },
+ { 65, ashr_65 },
+ { 72, ashr_72 },
+ { 79, ashr_79 },
+ { 80, ashr_80 },
+ { 81, ashr_81 },
+ { 95, ashr_95 },
+ { 96, ashr_96 },
+ { 97, ashr_97 },
+ { 111, ashr_111 },
+ { 112, ashr_112 },
+ { 113, ashr_113 },
+ { 119, ashr_119 },
+ { 120, ashr_120 },
+ { 121, ashr_121 },
+ { 126, ashr_126 },
+ { 127, ashr_127 }
+};
+
+void test(ti x)
+{
+ unsigned int i;
+ v1ti t = (v1ti)x;
+
+ for (i=0; i<(sizeof(table)/sizeof(table[0])); i++) {
+ if ((ti)(*table[i].ashr)(t) != ashr(x,table[i].i))
+ __builtin_abort();
+ }
+}
+
+int main()
+{
+ ti x;
+
+ x = ((ti)0x0011223344556677ull)<<64 | 0x8899aabbccddeeffull;
+ test(x);
+ x = ((ti)0xffeeddccbbaa9988ull)<<64 | 0x7766554433221100ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = 0;
+ test(x);
+ x = 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64 | 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0x5a5a5a5a5a5a5a5aull)<<64 | 0x5a5a5a5a5a5a5a5aull;
+ test(x);
+ x = ((ti)0xa5a5a5a5a5a5a5a5ull)<<64 | 0xa5a5a5a5a5a5a5a5ull;
+ test(x);
+ x = 0xffull;
+ test(x);
+ x = 0xff00ull;
+ test(x);
+ x = 0xff0000ull;
+ test(x);
+ x = 0xff000000ull;
+ test(x);
+ x = 0xff00000000ull;
+ test(x);
+ x = 0xff0000000000ull;
+ test(x);
+ x = 0xff000000000000ull;
+ test(x);
+ x = 0xff00000000000000ull;
+ test(x);
+ x = ((ti)0xffull)<<64;
+ test(x);
+ x = ((ti)0xff00ull)<<64;
+ test(x);
+ x = ((ti)0xff0000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000ull)<<64;
+ test(x);
+ x = ((ti)0xff0000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000000000ull)<<64;
+ test(x);
+ x = 0xdeadbeefcafebabeull;
+ test(x);
+ x = ((ti)0xdeadbeefcafebabeull)<<64;
+ test(x);
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,166 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -msse4.1" } */
+
+typedef __int128 v1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 ti;
+
+ti ashr(ti x, unsigned int i) { return x >> i; }
+
+v1ti ashr_1(v1ti x) { return x >> 1; }
+v1ti ashr_2(v1ti x) { return x >> 2; }
+v1ti ashr_7(v1ti x) { return x >> 7; }
+v1ti ashr_8(v1ti x) { return x >> 8; }
+v1ti ashr_9(v1ti x) { return x >> 9; }
+v1ti ashr_15(v1ti x) { return x >> 15; }
+v1ti ashr_16(v1ti x) { return x >> 16; }
+v1ti ashr_17(v1ti x) { return x >> 17; }
+v1ti ashr_23(v1ti x) { return x >> 23; }
+v1ti ashr_24(v1ti x) { return x >> 24; }
+v1ti ashr_25(v1ti x) { return x >> 25; }
+v1ti ashr_31(v1ti x) { return x >> 31; }
+v1ti ashr_32(v1ti x) { return x >> 32; }
+v1ti ashr_33(v1ti x) { return x >> 33; }
+v1ti ashr_47(v1ti x) { return x >> 47; }
+v1ti ashr_48(v1ti x) { return x >> 48; }
+v1ti ashr_49(v1ti x) { return x >> 49; }
+v1ti ashr_63(v1ti x) { return x >> 63; }
+v1ti ashr_64(v1ti x) { return x >> 64; }
+v1ti ashr_65(v1ti x) { return x >> 65; }
+v1ti ashr_72(v1ti x) { return x >> 72; }
+v1ti ashr_79(v1ti x) { return x >> 79; }
+v1ti ashr_80(v1ti x) { return x >> 80; }
+v1ti ashr_81(v1ti x) { return x >> 81; }
+v1ti ashr_95(v1ti x) { return x >> 95; }
+v1ti ashr_96(v1ti x) { return x >> 96; }
+v1ti ashr_97(v1ti x) { return x >> 97; }
+v1ti ashr_111(v1ti x) { return x >> 111; }
+v1ti ashr_112(v1ti x) { return x >> 112; }
+v1ti ashr_113(v1ti x) { return x >> 113; }
+v1ti ashr_119(v1ti x) { return x >> 119; }
+v1ti ashr_120(v1ti x) { return x >> 120; }
+v1ti ashr_121(v1ti x) { return x >> 121; }
+v1ti ashr_126(v1ti x) { return x >> 126; }
+v1ti ashr_127(v1ti x) { return x >> 127; }
+
+typedef v1ti (*fun)(v1ti);
+
+struct {
+ unsigned int i;
+ fun ashr;
+} table[35] = {
+ { 1, ashr_1 },
+ { 2, ashr_2 },
+ { 7, ashr_7 },
+ { 8, ashr_8 },
+ { 9, ashr_9 },
+ { 15, ashr_15 },
+ { 16, ashr_16 },
+ { 17, ashr_17 },
+ { 23, ashr_23 },
+ { 24, ashr_24 },
+ { 25, ashr_25 },
+ { 31, ashr_31 },
+ { 32, ashr_32 },
+ { 33, ashr_33 },
+ { 47, ashr_47 },
+ { 48, ashr_48 },
+ { 49, ashr_49 },
+ { 63, ashr_63 },
+ { 64, ashr_64 },
+ { 65, ashr_65 },
+ { 72, ashr_72 },
+ { 79, ashr_79 },
+ { 80, ashr_80 },
+ { 81, ashr_81 },
+ { 95, ashr_95 },
+ { 96, ashr_96 },
+ { 97, ashr_97 },
+ { 111, ashr_111 },
+ { 112, ashr_112 },
+ { 113, ashr_113 },
+ { 119, ashr_119 },
+ { 120, ashr_120 },
+ { 121, ashr_121 },
+ { 126, ashr_126 },
+ { 127, ashr_127 }
+};
+
+void test(ti x)
+{
+ unsigned int i;
+ v1ti t = (v1ti)x;
+
+ for (i=0; i<(sizeof(table)/sizeof(table[0])); i++) {
+ if ((ti)(*table[i].ashr)(t) != ashr(x,table[i].i))
+ __builtin_abort();
+ }
+}
+
+int main()
+{
+ ti x;
+
+ x = ((ti)0x0011223344556677ull)<<64 | 0x8899aabbccddeeffull;
+ test(x);
+ x = ((ti)0xffeeddccbbaa9988ull)<<64 | 0x7766554433221100ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = 0;
+ test(x);
+ x = 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64 | 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0x5a5a5a5a5a5a5a5aull)<<64 | 0x5a5a5a5a5a5a5a5aull;
+ test(x);
+ x = ((ti)0xa5a5a5a5a5a5a5a5ull)<<64 | 0xa5a5a5a5a5a5a5a5ull;
+ test(x);
+ x = 0xffull;
+ test(x);
+ x = 0xff00ull;
+ test(x);
+ x = 0xff0000ull;
+ test(x);
+ x = 0xff000000ull;
+ test(x);
+ x = 0xff00000000ull;
+ test(x);
+ x = 0xff0000000000ull;
+ test(x);
+ x = 0xff000000000000ull;
+ test(x);
+ x = 0xff00000000000000ull;
+ test(x);
+ x = ((ti)0xffull)<<64;
+ test(x);
+ x = ((ti)0xff00ull)<<64;
+ test(x);
+ x = ((ti)0xff0000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000ull)<<64;
+ test(x);
+ x = ((ti)0xff0000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000000000ull)<<64;
+ test(x);
+ x = 0xdeadbeefcafebabeull;
+ test(x);
+ x = ((ti)0xdeadbeefcafebabeull)<<64;
+ test(x);
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+/* PR target/102986 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 sv1ti __attribute__ ((__vector_size__ (16)));
+
+uv1ti ashl(uv1ti x, unsigned int i) { return x << i; }
+uv1ti lshr(uv1ti x, unsigned int i) { return x >> i; }
+sv1ti ashr(sv1ti x, unsigned int i) { return x >> i; }
+uv1ti rotr(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); }
+uv1ti rotl(uv1ti x, unsigned int i) { return (x << i) | (x >> (128-i)); }
+
new file mode 100644
@@ -0,0 +1,113 @@
+/* PR target/102986 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 sv1ti __attribute__ ((__vector_size__ (16)));
+typedef __int128 v1ti __attribute__ ((__vector_size__ (16)));
+
+typedef unsigned __int128 uti;
+typedef __int128 sti;
+typedef __int128 ti;
+
+uv1ti ashl_v1ti(uv1ti x, unsigned int i) { return x << i; }
+uv1ti lshr_v1ti(uv1ti x, unsigned int i) { return x >> i; }
+sv1ti ashr_v1ti(sv1ti x, unsigned int i) { return x >> i; }
+uv1ti rotr_v1ti(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); }
+uv1ti rotl_v1ti(uv1ti x, unsigned int i) { return (x << i) | (x >> (128-i)); }
+
+uti ashl_ti(uti x, unsigned int i) { return x << i; }
+uti lshr_ti(uti x, unsigned int i) { return x >> i; }
+sti ashr_ti(sti x, unsigned int i) { return x >> i; }
+uti rotr_ti(uti x, unsigned int i) { return (x >> i) | (x << (128-i)); }
+uti rotl_ti(uti x, unsigned int i) { return (x << i) | (x >> (128-i)); }
+
+void test(ti x)
+{
+ unsigned int i;
+ uv1ti ut = (uv1ti)x;
+ sv1ti st = (sv1ti)x;
+
+ for (i=0; i<128; i++) {
+ if ((ti)ashl_v1ti(ut,i) != (ti)ashl_ti(x,i))
+ __builtin_abort();
+ if ((ti)lshr_v1ti(ut,i) != (ti)lshr_ti(x,i))
+ __builtin_abort();
+ if ((ti)ashr_v1ti(st,i) != (ti)ashr_ti(x,i))
+ __builtin_abort();
+ if ((ti)rotr_v1ti(ut,i) != (ti)rotr_ti(x,i))
+ __builtin_abort();
+ if ((ti)rotl_v1ti(ut,i) != (ti)rotl_ti(x,i))
+ __builtin_abort();
+ }
+}
+
+int main()
+{
+ ti x;
+
+ x = ((ti)0x0011223344556677ull)<<64 | 0x8899aabbccddeeffull;
+ test(x);
+ x = ((ti)0xffeeddccbbaa9988ull)<<64 | 0x7766554433221100ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0x0123456789abcdefull)<<64 | 0xfedcba9876543210ull;
+ test(x);
+ x = ((ti)0xfedcba9876543210ull)<<64 | 0x0123456789abcdefull;
+ test(x);
+ x = 0;
+ test(x);
+ x = 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64;
+ test(x);
+ x = ((ti)0xffffffffffffffffull)<<64 | 0xffffffffffffffffull;
+ test(x);
+ x = ((ti)0x5a5a5a5a5a5a5a5aull)<<64 | 0x5a5a5a5a5a5a5a5aull;
+ test(x);
+ x = ((ti)0xa5a5a5a5a5a5a5a5ull)<<64 | 0xa5a5a5a5a5a5a5a5ull;
+ test(x);
+ x = 0xffull;
+ test(x);
+ x = 0xff00ull;
+ test(x);
+ x = 0xff0000ull;
+ test(x);
+ x = 0xff000000ull;
+ test(x);
+ x = 0xff00000000ull;
+ test(x);
+ x = 0xff0000000000ull;
+ test(x);
+ x = 0xff000000000000ull;
+ test(x);
+ x = 0xff00000000000000ull;
+ test(x);
+ x = ((ti)0xffull)<<64;
+ test(x);
+ x = ((ti)0xff00ull)<<64;
+ test(x);
+ x = ((ti)0xff0000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000ull)<<64;
+ test(x);
+ x = ((ti)0xff0000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff000000000000ull)<<64;
+ test(x);
+ x = ((ti)0xff00000000000000ull)<<64;
+ test(x);
+ x = 0xdeadbeefcafebabeull;
+ test(x);
+ x = ((ti)0xdeadbeefcafebabeull)<<64;
+ test(x);
+
+ return 0;
+}
+