@@ -44,5 +44,5 @@ void foo_i2(dcmlx4_t dst[], const dcmlx_t src[], int n)
/* { dg-final { scan-tree-dump "Detected interleaving store of size 16" "vect" } } */
/* We're not able to peel & apply re-aligning to make accesses well-aligned for !vect_hw_misalign,
but we could by peeling the stores for alignment and applying re-aligning loads. */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { ! vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { ! vect_hw_misalign } } } } */
/* { dg-final { scan-tree-dump-not "gap of 6 elements" "vect" } } */
@@ -13,7 +13,8 @@ main1 ()
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
float out[N*8];
- /* Different operations - not SLPable. */
+ /* Different operations - we SLP the store and split the group to two
+ single-lane branches. */
for (i = 0; i < N*4; i++)
{
out[i*2] = ((float) in[i*2] * 2 + 6) ;
@@ -44,4 +45,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
@@ -40,6 +40,10 @@ main1 ()
out[i*8 + 3] = b3 - 1;
out[i*8 + 4] = b4 - 8;
out[i*8 + 5] = b5 - 7;
+ /* Due to the use in the ia[i] store we keep the feeding expression
+ in the form ((in[i*8 + 6] + 11) * 3 - 3) while other expressions
+ got associated as for example (in[i*5 + 5] * 4 + 33). That
+ causes SLP discovery to fail. */
out[i*8 + 6] = b6 - 3;
out[i*8 + 7] = b7 - 7;
@@ -76,5 +80,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { vect_strided8 && {! vect_load_lanes } } && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { vect_strided8 && {! vect_load_lanes } } && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
@@ -12,6 +12,7 @@ main1 ()
unsigned short out[N*8], out2[N*8], b0, b1, b2, b3, b4, a0, a1, a2, a3, b5;
unsigned short in[N*8];
+#pragma GCC novector
for (i = 0; i < N*8; i++)
{
in[i] = i;
@@ -202,18 +203,8 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */
-/* Some targets can vectorize the second of the three main loops using
- hybrid SLP. For 128-bit vectors, the required 4->3 permutations are:
-
- { 0, 1, 2, 4, 5, 6, 8, 9 }
- { 2, 4, 5, 6, 8, 9, 10, 12 }
- { 5, 6, 8, 9, 10, 12, 13, 14 }
-
- Not all vect_perm targets support that, and it's a bit too specific to have
- its own effective-target selector, so we just test targets directly. */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
@@ -125,4 +125,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
@@ -41,4 +41,4 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! vect_load_lanes } xfail { ! vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_load_lanes } xfail { ! vect_hw_misalign } } } } */
@@ -36,6 +36,5 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
}
}
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
/* { dg-final { scan-tree-dump "different gather base" vect { target { ! vect_gather_load_ifn } } } } */
/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! vect_gather_load_ifn } } } } */
@@ -25,4 +25,5 @@ matrix_mul (byte *in, byte *out, int size)
}
}
-/* { dg-final { scan-assembler "palignr" } } */
+/* We are no longer using hybrid SLP. */
+/* { dg-final { scan-assembler "palignr" { xfail *-*-* } } } */