#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include "tests/malloc.h"
typedef unsigned char UChar;
typedef unsigned int UInt;
typedef unsigned long int UWord;
typedef unsigned long long int ULong;
typedef double Double;
typedef float Float;
#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM;
typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM;
typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block;
void showFloat ( XMM* vec, int idx )
{
Float f = vec->f32[idx];
int neg = signbit (f);
char sign = neg != 0 ? '-' : ' ';
switch (fpclassify (f)) {
case FP_NORMAL: {
for (int i = idx * 4 + 3; i >= idx * 4; i--)
printf("%02x", (UInt)vec->u8[i]);
break;
}
case FP_INFINITE: {
printf ("[ %cINF ]", sign);
break;
}
case FP_ZERO: {
printf ("[%cZERO ]", sign);
break;
}
case FP_NAN: {
printf ("[ NAN ]");
break;
}
default: {
printf ("[%cSUBNR]", sign);
break;
}
}
}
void showDouble ( XMM* vec, int idx )
{
Double d = vec->f64[idx];
int neg = signbit (d);
char sign = neg != 0 ? '-' : ' ';
switch (fpclassify (d)) {
case FP_NORMAL: {
for (int i = idx * 8 + 7; i >= idx * 8; i--)
printf("%02x", (UInt)vec->u8[i]);
break;
}
case FP_INFINITE: {
printf ("[ %cINF ]", sign);
break;
}
case FP_ZERO: {
printf ("[ %cZERO ]", sign);
break;
}
case FP_NAN: {
printf ("[ NAN ]");
break;
}
default: {
printf ("[ %cSUBNORMAL ]", sign);
break;
}
}
}
void showXMM ( XMM* vec, int isDouble )
{
if (isDouble) {
showDouble ( vec, 1 );
printf (".");
showDouble ( vec, 0 );
} else {
showFloat ( vec, 3 );
printf (".");
showFloat ( vec, 2 );
printf (".");
showFloat ( vec, 1 );
printf (".");
showFloat ( vec, 0 );
}
}
void showYMM ( YMM* vec, int isDouble )
{
assert(IS_32_ALIGNED(vec));
showXMM ( &vec->xmm[1], isDouble );
printf(".");
showXMM ( &vec->xmm[0], isDouble );
}
void showBlock ( char* msg, Block* block, int isDouble )
{
printf(" %s\n", msg);
printf("r1: "); showYMM(&block->r1, isDouble); printf("\n");
printf("r2: "); showYMM(&block->r2, isDouble); printf("\n");
printf("r3: "); showYMM(&block->r3, isDouble); printf("\n");
printf("r4: "); showYMM(&block->r4, isDouble); printf("\n");
printf(" m: "); showYMM(&block->m, isDouble); printf("\n");
}
static Double special_values[10];
static __attribute__((noinline))
Double negate ( Double d ) { return -d; }
static __attribute__((noinline))
Double divf64 ( Double x, Double y ) { return x/y; }
static __attribute__((noinline))
Double plusZero ( void ) { return 0.0; }
static __attribute__((noinline))
Double minusZero ( void ) { return negate(plusZero()); }
static __attribute__((noinline))
Double plusOne ( void ) { return 1.0; }
static __attribute__((noinline))
Double minusOne ( void ) { return negate(plusOne()); }
static __attribute__((noinline))
Double plusInf ( void ) { return 1.0 / 0.0; }
static __attribute__((noinline))
Double minusInf ( void ) { return negate(plusInf()); }
static __attribute__((noinline))
Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
static __attribute__((noinline))
Double minusNaN ( void ) { return negate(plusNaN()); }
static __attribute__((noinline))
Double plusDenorm ( void ) { return 1.23e-315 / 1e3; }
static __attribute__((noinline))
Double minusDenorm ( void ) { return negate(plusDenorm()); }
static void init_special_values ( void )
{
special_values[0] = plusZero();
special_values[1] = minusZero();
special_values[2] = plusOne();
special_values[3] = minusOne();
special_values[4] = plusInf();
special_values[5] = minusInf();
special_values[6] = plusNaN();
special_values[7] = minusNaN();
special_values[8] = plusDenorm();
special_values[9] = minusDenorm();
}
void specialFBlock ( Block* b )
{
int i;
Float* p = (Float*)b;
for (i = 0; i < sizeof(Block) / sizeof(Float); i++)
p[i] = (Float) special_values[i % 10];
}
void specialDBlock ( Block* b )
{
int i;
Double* p = (Double*)b;
for (i = 0; i < sizeof(Block) / sizeof(Double); i++)
p[i] = special_values[i % 10];
}
UChar randUChar ( void )
{
static UInt seed = 80021;
seed = 1103515245 * seed + 12345;
return (seed >> 17) & 0xFF;
}
void randBlock ( Block* b )
{
int i;
UChar* p = (UChar*)b;
for (i = 0; i < sizeof(Block); i++)
p[i] = randUChar();
}
void oneBlock ( Block* b )
{
int i;
UChar* p = (UChar*)b;
for (i = 0; i < sizeof(Block); i++)
p[i] = 1;
}
#define GEN_test(_name, _instr, _isD) \
__attribute__ ((noinline)) void \
test_##_name ( const char *n, Block* b) \
{ \
printf("%s %s\n", #_name, n); \
showBlock("before", b, _isD); \
__asm__ __volatile__( \
"vmovdqa 0(%0),%%ymm7" "\n\t" \
"vmovdqa 32(%0),%%ymm8" "\n\t" \
"vmovdqa 64(%0),%%ymm6" "\n\t" \
"vmovdqa 96(%0),%%ymm9" "\n\t" \
"leaq 128(%0),%%r14" "\n\t" \
_instr "\n\t" \
"vmovdqa %%ymm7, 0(%0)" "\n\t" \
"vmovdqa %%ymm8, 32(%0)" "\n\t" \
"vmovdqa %%ymm6, 64(%0)" "\n\t" \
"vmovdqa %%ymm9, 96(%0)" "\n\t" \
: /*OUT*/ \
: /*IN*/"r"(b) \
: /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
); \
showBlock("after", b, _isD); \
printf("\n"); \
}
/* All these defines do the same thing (and someone with stronger
preprocessor foo could probably express things much smaller).
They generate 4 different functions to test 4 variants of an
fma4 instruction. One with as input 4 registers, one where
the output register is also one of the input registers and
two versions where different inputs are a memory location.
The xmm variants create 128 versions, the ymm variants 256. */
#define GEN_test_VFMADDPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMADDPD_xmm(VFMADDPD)
#define GEN_test_VFMADDPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMADDPD_ymm(VFMADDPD)
#define GEN_test_VFMADDPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMADDPS_xmm(VFMADDPS)
#define GEN_test_VFMADDPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFMADDPS_ymm(VFMADDPS)
#define GEN_test_VFMADDSD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMADDSD_xmm(VFMADDSD)
#define GEN_test_VFMADDSS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMADDSS_xmm(VFMADDSS)
#define GEN_test_VFMADDSUBPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD)
#define GEN_test_VFMADDSUBPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD)
#define GEN_test_VFMADDSUBPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS)
#define GEN_test_VFMADDSUBPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS)
#define GEN_test_VFMSUBADDPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD)
#define GEN_test_VFMSUBADDPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD)
#define GEN_test_VFMSUBADDPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS)
#define GEN_test_VFMSUBADDPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS)
#define GEN_test_VFMSUBPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMSUBPD_xmm(VFMSUBPD)
#define GEN_test_VFMSUBPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMSUBPD_ymm(VFMSUBPD)
#define GEN_test_VFMSUBPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMSUBPS_xmm(VFMSUBPS)
#define GEN_test_VFMSUBPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFMSUBPS_ymm(VFMSUBPS)
#define GEN_test_VFMSUBSD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFMSUBSD_xmm(VFMSUBSD)
#define GEN_test_VFMSUBSS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFMSUBSS_xmm(VFMSUBSS)
#define GEN_test_VFNMADDPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFNMADDPD_xmm(VFNMADDPD)
#define GEN_test_VFNMADDPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFNMADDPD_ymm(VFNMADDPD)
#define GEN_test_VFNMADDPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFNMADDPS_xmm(VFNMADDPS)
#define GEN_test_VFNMADDPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFNMADDPS_ymm(VFNMADDPS)
#define GEN_test_VFNMADDSD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFNMADDSD_xmm(VFNMADDSD)
#define GEN_test_VFNMADDSS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFNMADDSS_xmm(VFNMADDSS)
#define GEN_test_VFNMSUBPD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFNMSUBPD_xmm(VFNMSUBPD)
#define GEN_test_VFNMSUBPD_ymm(_name) \
GEN_test(_name##_ymm, \
"vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_src_dst, \
"vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
GEN_test(_name##_ymm_mem1, \
"vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
GEN_test(_name##_ymm_mem2, \
"vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFNMSUBPD_ymm(VFNMSUBPD)
#define GEN_test_VFNMSUBPS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFNMSUBPS_xmm(VFNMSUBPS)
#define GEN_test_VFNMSUBPS_ymm(_name) \
GEN_test(_name##_ymm, \
"vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_src_dst, \
"vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
GEN_test(_name##_ymm_mem1, \
"vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
GEN_test(_name##_ymm_mem2, \
"vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
GEN_test_VFNMSUBPS_ymm(VFNMSUBPS)
#define GEN_test_VFNMSUBSD_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_src_dst, \
"vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
GEN_test(_name##_xmm_mem1, \
"vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
GEN_test(_name##_xmm_mem2, \
"vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
GEN_test_VFNMSUBSD_xmm(VFNMSUBSD)
#define GEN_test_VFNMSUBSS_xmm(_name) \
GEN_test(_name##_xmm, \
"vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_src_dst, \
"vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
GEN_test(_name##_xmm_mem1, \
"vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
GEN_test(_name##_xmm_mem2, \
"vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
GEN_test_VFNMSUBSS_xmm(VFNMSUBSS)
#define DO_test_block(_name, _sub, _bname, _block) \
test_##_name##_##_sub(_bname,_block);
#define DO_test(_name, _sub, _isD) { \
Block* b = memalign32(sizeof(Block)); \
oneBlock(b); \
DO_test_block(_name, _sub, "ones", b); \
if (_isD) { \
specialDBlock(b); \
DO_test_block(_name, _sub, "specialD", b); \
} else { \
specialFBlock(b); \
DO_test_block(_name, _sub, "specialF", b); \
} \
randBlock(b); \
DO_test_block(_name, _sub, "rand", b); \
free(b); \
}
#define DO_tests_xmm(_name,_isD) \
DO_test(_name, xmm, _isD); \
DO_test(_name, xmm_src_dst, _isD); \
DO_test(_name, xmm_mem1, _isD); \
DO_test(_name, xmm_mem2, _isD);
#define DO_tests_ymm(_name,_isD) \
DO_test(_name, ymm, _isD); \
DO_test(_name, ymm_src_dst, _isD); \
DO_test(_name, ymm_mem1, _isD); \
DO_test(_name, ymm_mem2, _isD);
int main ( void )
{
init_special_values();
// 128
DO_tests_xmm(VFMADDPD, 1);
DO_tests_xmm(VFMADDPS, 0);
DO_tests_xmm(VFMADDSD, 1);
DO_tests_xmm(VFMADDSS, 0);
DO_tests_xmm(VFMADDSUBPD, 1);
DO_tests_xmm(VFMADDSUBPS, 0);
DO_tests_xmm(VFMSUBADDPD, 1);
DO_tests_xmm(VFMSUBADDPS, 0);
DO_tests_xmm(VFMSUBPD, 1);
DO_tests_xmm(VFMSUBPS, 0);
DO_tests_xmm(VFMSUBSD, 1);
DO_tests_xmm(VFMSUBSS, 0);
DO_tests_xmm(VFNMADDPD, 1);
DO_tests_xmm(VFNMADDPS, 0);
DO_tests_xmm(VFNMADDSD, 1);
DO_tests_xmm(VFNMADDSS, 0);
DO_tests_xmm(VFNMSUBPD, 1);
DO_tests_xmm(VFNMSUBPS, 0);
DO_tests_xmm(VFNMSUBSD, 1);
DO_tests_xmm(VFNMSUBSS, 0);
// 256
/*
DO_tests_ymm(VFMADDPD, 1);
DO_tests_ymm(VFMADDPS, 0);
DO_tests_ymm(VFMADDSUBPD, 1);
DO_tests_ymm(VFMADDSUBPS, 0);
DO_tests_ymm(VFMSUBADDPD, 1);
DO_tests_ymm(VFMSUBADDPS, 0);
DO_tests_ymm(VFMSUBPD, 1);
DO_tests_ymm(VFMSUBPS, 0);
DO_tests_ymm(VFNMADDPD, 1);
DO_tests_ymm(VFNMADDPS, 0);
DO_tests_ymm(VFNMSUBPD, 1);
DO_tests_ymm(VFNMSUBPS, 0);
*/
return 0;
}