@@ -10,24 +10,25 @@
#include "avx10-helper.h"
#include <stdint.h>
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (_Float16 *res_ref, float *src1, float *src2)
{
float fp32;
int i;
- for (i = 0; i < SIZE_RES / 2; i++)
+ for (i = 0; i < SIZE / 2; i++)
{
fp32 = (float) 2 * i + 7 + i * 0.5;
res_ref[i] = fp32;
src2[i] = fp32;
}
- for (i = SIZE_RES / 2; i < SIZE_RES; i++)
+ for (i = SIZE / 2; i < SIZE; i++)
{
fp32 = (float)2 * i + 7 + i * 0.5;
res_ref[i] = fp32;
- src1[i - (SIZE_RES / 2)] = fp32;
+ src1[i - (SIZE / 2)] = fp32;
}
}
@@ -35,17 +36,27 @@ void
TEST (void)
{
int i;
- UNION_TYPE (AVX512F_LEN, h) res1;
+ UNION_TYPE (AVX512F_LEN, h) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, ) src1, src2;
- _Float16 res_ref[SIZE_RES];
- float fp32;
-
- for (i = 0; i < SIZE_RES; i++)
- res1.a[i] = 5;
-
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC (res_ref, src1.a, src2.a);
-
+
res1.x = INTRINSIC (_cvtx2ps_ph) (src1.x, src2.x);
if (UNION_CHECK (AVX512F_LEN, h) (res1, res_ref))
abort ();
+
+ res2.x = INTRINSIC (_mask_cvtx2ps_ph) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, h) (res2, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtx2ps_ph) (mask, src1.x, src2.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, h) (res3, res_ref))
+ abort ();
}
@@ -15,6 +15,9 @@
#define SRC_F16 (AVX512F_LEN / 16)
#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
#define DST_F16 (AVX512F_LEN_HALF / 16)
+#define SIZE SRC_F16
+
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, char *src1, _Float16 *src2)
@@ -39,9 +42,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, h) src2;
+ MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[DST_F8_I8];
sign = 1;
@@ -51,9 +55,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtbiasph_pbf8) (src1.x, src2.x);
- CALC(res_ref, src1.a, src2.a);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src1.a, src2.a);
+
+ res1.x = INTRINSIC (_cvtbiasph_pbf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtbiasph_pbf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
- if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ res3.x = INTRINSIC (_maskz_cvtbiasph_pbf8) (mask, src1.x, src2.x);
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
}
@@ -15,6 +15,9 @@
#define SRC_F16 (AVX512F_LEN / 16)
#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
#define DST_F16 (AVX512F_LEN_HALF / 16)
+#define SIZE SRC_F16
+
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, char *src1, _Float16 *src2)
@@ -39,9 +42,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, h) src2;
+ MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[DST_F8_I8];
sign = 1;
@@ -51,9 +55,23 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtbiassph_pbf8) (src1.x, src2.x);
- CALC(res_ref, src1.a, src2.a);
-
- if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src1.a, src2.a);
+
+
+ res1.x = INTRINSIC (_cvtbiassph_pbf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtbiassph_pbf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtbiassph_pbf8) (mask, src1.x, src2.x);
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
}
@@ -15,6 +15,9 @@
#define SRC_F16 (AVX512F_LEN / 16)
#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
#define DST_F16 (AVX512F_LEN_HALF / 16)
+#define SIZE SRC_F16
+
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, char *src1, _Float16 *src2)
@@ -39,9 +42,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, h) src2;
+ MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[DST_F8_I8];
sign = 1;
@@ -51,9 +55,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtbiasph_phf8) (src1.x, src2.x);
- CALC(res_ref, src1.a, src2.a);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src1.a, src2.a);
+
+ res1.x = INTRINSIC (_cvtbiasph_phf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtbiasph_phf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
- if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ res3.x = INTRINSIC (_maskz_cvtbiasph_phf8) (mask, src1.x, src2.x);
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
}
@@ -15,6 +15,9 @@
#define SRC_F16 (AVX512F_LEN / 16)
#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
#define DST_F16 (AVX512F_LEN_HALF / 16)
+#define SIZE SRC_F16
+
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, char *src1, _Float16 *src2)
@@ -39,9 +42,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, h) src2;
+ MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[DST_F8_I8];
sign = 1;
@@ -51,9 +55,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtbiassph_phf8) (src1.x, src2.x);
- CALC(res_ref, src1.a, src2.a);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src1.a, src2.a);
+
+ res1.x = INTRINSIC (_cvtbiassph_phf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtbiassph_phf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
- if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ res3.x = INTRINSIC (_maskz_cvtbiassph_phf8) (mask, src1.x, src2.x);
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
}
@@ -12,13 +12,14 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN_HALF / 8)
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
CALC (_Float16 *r, unsigned char *s)
{
int i;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
r[i] = convert_hf8_to_fp16(s[i]);
}
@@ -26,9 +27,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN, h) res;
+ UNION_TYPE (AVX512F_LEN, h) res1, res2, res3;
UNION_TYPE (AVX512F_LEN_HALF, i_b) src;
- _Float16 res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -37,9 +39,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvthf8_ph) (src.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src.a);
- if (UNION_ROUGH_CHECK (AVX512F_LEN, h) (res, res_ref, 0.0009765625))
+ res1.x = INTRINSIC (_cvthf8_ph) (src.x);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, h) (res1, res_ref, 0.0009765625))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvthf8_ph) (res2.x, mask, src.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, h) (res2, res_ref, 0.0009765625))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvthf8_ph) (mask, src.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, h) (res3, res_ref, 0.0009765625))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
@@ -24,7 +25,7 @@ CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
hf8_bf8 = 1;
saturate = 0;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -45,9 +46,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN, i_b) res;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src1, src2;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -57,9 +59,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtne2ph_pbf8) (src1.x, src2.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src1.a, src2.a);
+ res1.x = INTRINSIC (_cvtne2ph_pbf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtne2ph_pbf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtne2ph_pbf8) (mask, src1.x, src2.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
@@ -24,7 +25,7 @@ CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
hf8_bf8 = 1;
saturate = 1;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -45,9 +46,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN, i_b) res;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src1, src2;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -57,9 +59,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtnes2ph_pbf8) (src1.x, src2.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src1.a, src2.a);
+ res1.x = INTRINSIC (_cvtnes2ph_pbf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtnes2ph_pbf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtnes2ph_pbf8) (mask, src1.x, src2.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
@@ -24,7 +25,7 @@ CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
hf8_bf8 = 0;
saturate = 0;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -45,9 +46,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN, i_b) res;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src1, src2;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -57,9 +59,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtne2ph_phf8) (src1.x, src2.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src1.a, src2.a);
+ res1.x = INTRINSIC (_cvtne2ph_phf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtne2ph_phf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtne2ph_phf8) (mask, src1.x, src2.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
@@ -24,7 +25,7 @@ CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
hf8_bf8 = 0;
saturate = 1;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -45,9 +46,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN, i_b) res;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src1, src2;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -57,9 +59,22 @@ TEST (void)
sign *= -1;
}
- res.x = INTRINSIC (_cvtnes2ph_phf8) (src1.x, src2.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src1.a, src2.a);
+ res1.x = INTRINSIC (_cvtnes2ph_phf8) (src1.x, src2.x);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtnes2ph_phf8) (res2.x, mask, src1.x, src2.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtnes2ph_phf8) (mask, src1.x, src2.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
abort ();
}
@@ -6,15 +6,14 @@
#define AVX10_2
#define AVX10_2_512
#define AVX10_512BIT
-#define AVX512F_LEN 512
-#define AVX512F_LEN_HALF 256
#endif
#include "avx10-helper.h"
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN_HALF / 8)
+#define SIZE (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s)
@@ -24,7 +23,7 @@ CALC (unsigned char *r, _Float16 *s)
hf8_bf8 = 1;
saturate = 0;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -39,9 +38,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -50,9 +50,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtneph_pbf8) (src.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src.a);
-
+
+ res1.x = INTRINSIC (_cvtneph_pbf8) (src.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtneph_pbf8) (res2.x, mask, src.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtneph_pbf8) (mask, src.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN_HALF / 8)
+#define SIZE (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s)
@@ -22,7 +23,7 @@ CALC (unsigned char *r, _Float16 *s)
hf8_bf8 = 1;
saturate = 1;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -37,9 +38,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -48,9 +50,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtnesph_pbf8) (src.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src.a);
-
+
+ res1.x = INTRINSIC (_cvtnesph_pbf8) (src.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtnesph_pbf8) (res2.x, mask, src.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtnesph_pbf8) (mask, src.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN_HALF / 8)
+#define SIZE (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s)
@@ -22,7 +23,7 @@ CALC (unsigned char *r, _Float16 *s)
hf8_bf8 = 0;
saturate = 0;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -37,9 +38,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -48,9 +50,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtneph_phf8) (src.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src.a);
-
+
+ res1.x = INTRINSIC (_cvtneph_phf8) (src.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtneph_phf8) (res2.x, mask, src.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtneph_phf8) (mask, src.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
abort ();
}
@@ -12,7 +12,8 @@
#include "fp8-helper.h"
#define SIZE_SRC (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN_HALF / 8)
+#define SIZE (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
void
CALC (unsigned char *r, _Float16 *s)
@@ -22,7 +23,7 @@ CALC (unsigned char *r, _Float16 *s)
hf8_bf8 = 0;
saturate = 1;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
r[i] = 0;
if (i < SIZE_SRC)
@@ -37,9 +38,10 @@ void
TEST (void)
{
int i,sign;
- UNION_TYPE (AVX512F_LEN_HALF, i_b) res;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
UNION_TYPE (AVX512F_LEN, h) src;
- unsigned char res_ref[SIZE_RES];
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
sign = 1;
for (i = 0; i < SIZE_SRC; i++)
@@ -48,9 +50,22 @@ TEST (void)
sign = -sign;
}
- res.x = INTRINSIC (_cvtnesph_phf8) (src.x);
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
CALC(res_ref, src.a);
-
+
+ res1.x = INTRINSIC (_cvtnesph_phf8) (src.x);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res2.x = INTRINSIC (_mask_cvtnesph_phf8) (res2.x, mask, src.x);
+ MASK_MERGE (h) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+ abort ();
+
+ res3.x = INTRINSIC (_maskz_cvtnesph_phf8) (mask, src.x);
+ MASK_ZERO (h) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
abort ();
}
@@ -97,7 +97,7 @@ MAKE_MASK_ZERO(bf16_bf, __bf16)
/* Function which calculates result. */
#define CALC EVAL(calc_, AVX512F_LEN,)
-#if !defined(AVX512VL) || defined(AVX10_512)
+#if !defined(AVX512VL) || defined(AVX10_512BIT)
#define AVX512F_LEN 512
#define AVX512F_LEN_HALF 256
#endif