[pushed] aarch64: Adjust tests after fix for PR102659
Commit Message
After the fix for PR102659, the vectoriser can no longer group
conditional accesses of the form:
for (int i = 0; i < n; ++i)
if (...)
...a[i * 2] + a[i * 2 + 1]...;
on LP64 targets. It has to treat them as two independent
gathers instead.
This was causing failures in the sve mask_struct*.c tests.
The tests weren't really testing that int iterators could
be used, so this patch switches to pointer-sized iterators
instead.
Tested on aarch64-linux-gnu & pushed.
Richard
gcc/testsuite/
* gcc.target/aarch64/sve/mask_struct_load_1.c: Use intptr_t
iterators instead of int iterators.
* gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_6.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_8.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_4.c: Likewise.
---
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c | 6 +++---
12 files changed, 25 insertions(+), 25 deletions(-)
Comments
On Thu, Feb 3, 2022 at 11:52 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> After the fix for PR102659, the vectoriser can no longer group
> conditional accesses of the form:
>
> for (int i = 0; i < n; ++i)
> if (...)
> ...a[i * 2] + a[i * 2 + 1]...;
>
> on LP64 targets. It has to treat them as two independent
> gathers instead.
Hmm, that's unfortunate. Can you file an enhancement bugreport?
How does using intptr_t help? i * 2 can still overflow with large n,
so can it with 'int' on ILP32. So I guess this is the old issue
of transforming (uint64)(i * 2 + 1) to (uint64)(i*2) + 1UL?
> This was causing failures in the sve mask_struct*.c tests.
> The tests weren't really testing that int iterators could
> be used, so this patch switches to pointer-sized iterators
> instead.
>
> Tested on aarch64-linux-gnu & pushed.
>
> Richard
>
>
> gcc/testsuite/
> * gcc.target/aarch64/sve/mask_struct_load_1.c: Use intptr_t
> iterators instead of int iterators.
> * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_6.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_7.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_load_8.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_store_3.c: Likewise.
> * gcc.target/aarch64/sve/mask_struct_store_4.c: Likewise.
> ---
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c | 4 ++--
> gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c | 6 +++---
> 12 files changed, 25 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c
> index 03b2b93df07..450fbb887e3 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 2] + src[i * 2 + 1]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c
> index 87ac3178be0..499abd7c99a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = (src[i * 3] \
> + src[i * 3 + 1] \
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c
> index 54806f93ad9..a5ce0716322 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = (src[i * 4] \
> + src[i * 4 + 1] \
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c
> index 4c73004f68d..bdfea964f88 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 3] + src[i * 3 + 2]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c
> index 2a33ee81d1a..299955a8a93 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 4] + src[i * 4 + 3]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c
> index d4542eca0f7..999c7b52422 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_6.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 2]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c
> index 9d26e15c0d9..57d71ed460a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_7.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 3] + src[i * 3 + 1]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c
> index 17fd4c0a079..1ead3ba695e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_8.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> if (cond[i]) \
> dest[i] = src[i * 4] + src[i * 4 + 2]; \
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c
> index a75a694f9c3..39fae68a5d0 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_1.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, INTYPE bias, int n) \
> + MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> { \
> INTYPE value = src[i] + bias; \
> if (cond[i]) \
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c
> index 0fd35f2ff52..0fe9b993402 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_2.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, INTYPE bias, int n) \
> + MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> { \
> INTYPE value = src[i] + bias; \
> if (cond[i]) \
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c
> index 1765d54a483..a9430870eee 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c
> @@ -6,9 +6,9 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, INTYPE bias, int n) \
> + MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> { \
> INTYPE value = src[i] + bias; \
> if (cond[i]) \
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c
> index 59e9ee49c4a..d3f0216406d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_4.c
> @@ -6,15 +6,15 @@
> #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
> void __attribute__ ((noinline, noclone)) \
> NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
> - MASKTYPE *__restrict cond, int n) \
> + MASKTYPE *__restrict cond, intptr_t n) \
> { \
> - for (int i = 0; i < n; ++i) \
> + for (intptr_t i = 0; i < n; ++i) \
> { \
> if (cond[i] < 8) \
> dest[i * 2] = src[i]; \
> if (cond[i] > 2) \
> dest[i * 2 + 1] = src[i]; \
> - } \
> + } \
> }
>
> #define TEST2(NAME, OUTTYPE, INTYPE) \
> --
> 2.25.1
>
Richard Biener <richard.guenther@gmail.com> writes:
> On Thu, Feb 3, 2022 at 11:52 AM Richard Sandiford via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> After the fix for PR102659, the vectoriser can no longer group
>> conditional accesses of the form:
>>
>> for (int i = 0; i < n; ++i)
>> if (...)
>> ...a[i * 2] + a[i * 2 + 1]...;
>>
>> on LP64 targets. It has to treat them as two independent
>> gathers instead.
>
> Hmm, that's unfortunate. Can you file an enhancement bugreport?
OK, filed as PR104368.
> How does using intptr_t help? i * 2 can still overflow with large n,
> so can it with 'int' on ILP32. So I guess this is the old issue
> of transforming (uint64)(i * 2 + 1) to (uint64)(i*2) + 1UL?
That does happen, but I'm not sure that it's the main problem.
SCEV analysis seems to fail for the a[i * 2] access too.
With ints the &a[i * 2] calculation is:
_45 = (unsigned int) i_26;
_46 = _45 * 2;
_5 = (int) _46;
_6 = (long unsigned int) _5;
_7 = _6 * 4;
_48 = _47 + _7;
and the &a[i * 2 + 1] calculation is:
_10 = _6 + 1;
_11 = _10 * 4;
_51 = _11 + _47;
With intptr_ts the &a[i * 2] calculation is:
i.0_1 = (long unsigned int) i_23;
_5 = i.0_1 * 8;
_40 = _39 + _5;
and the &a[i * 2 + 1] calculation is:
_8 = _5 + 4;
_43 = _8 + _39;
which looks correct.
If the intptr_t i * 2 wraps then a &a[(uintptr_t)i * 2] IV will still
behave correctly, so the {a, +, 8} SCEV still seems accurate. The int
i * 2 would instead wrap at 32 bits, so &a[(unsigned)i * 2] isn't
linear in any meaningful sense.
I don't know if the wrapping intptr_t SCEV leads to well-formed gimple
though. Are pointer IVs assumed not to overflow? If so, I guess we
might still be introducing UB for some intptr_t cases (although not
this one AFAICT, since any wrapping cases would be UB in the source too).
Thanks,
Richard
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 2] + src[i * 2 + 1]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = (src[i * 3] \
+ src[i * 3 + 1] \
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = (src[i * 4] \
+ src[i * 4 + 1] \
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 3] + src[i * 3 + 2]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 4] + src[i * 4 + 3]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 2]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 3] + src[i * 3 + 1]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 4] + src[i * 4 + 2]; \
}
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, INTYPE bias, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
{ \
INTYPE value = src[i] + bias; \
if (cond[i]) \
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, INTYPE bias, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
{ \
INTYPE value = src[i] + bias; \
if (cond[i]) \
@@ -6,9 +6,9 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, INTYPE bias, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
{ \
INTYPE value = src[i] + bias; \
if (cond[i]) \
@@ -6,15 +6,15 @@
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, intptr_t n) \
{ \
- for (int i = 0; i < n; ++i) \
+ for (intptr_t i = 0; i < n; ++i) \
{ \
if (cond[i] < 8) \
dest[i * 2] = src[i]; \
if (cond[i] > 2) \
dest[i * 2 + 1] = src[i]; \
- } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \