#include <stdio.h> #include <stdlib.h> #include <assert.h> #include <math.h> #include "tests/malloc.h" typedef unsigned char UChar; typedef unsigned int UInt; typedef unsigned long int UWord; typedef unsigned long long int ULong; typedef double Double; typedef float Float; #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM; typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM; typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block; void showFloat ( XMM* vec, int idx ) { Float f = vec->f32[idx]; int neg = signbit (f); char sign = neg != 0 ? '-' : ' '; switch (fpclassify (f)) { case FP_NORMAL: { for (int i = idx * 4 + 3; i >= idx * 4; i--) printf("%02x", (UInt)vec->u8[i]); break; } case FP_INFINITE: { printf ("[ %cINF ]", sign); break; } case FP_ZERO: { printf ("[%cZERO ]", sign); break; } case FP_NAN: { printf ("[ NAN ]"); break; } default: { printf ("[%cSUBNR]", sign); break; } } } void showDouble ( XMM* vec, int idx ) { Double d = vec->f64[idx]; int neg = signbit (d); char sign = neg != 0 ? '-' : ' '; switch (fpclassify (d)) { case FP_NORMAL: { for (int i = idx * 8 + 7; i >= idx * 8; i--) printf("%02x", (UInt)vec->u8[i]); break; } case FP_INFINITE: { printf ("[ %cINF ]", sign); break; } case FP_ZERO: { printf ("[ %cZERO ]", sign); break; } case FP_NAN: { printf ("[ NAN ]"); break; } default: { printf ("[ %cSUBNORMAL ]", sign); break; } } } void showXMM ( XMM* vec, int isDouble ) { if (isDouble) { showDouble ( vec, 1 ); printf ("."); showDouble ( vec, 0 ); } else { showFloat ( vec, 3 ); printf ("."); showFloat ( vec, 2 ); printf ("."); showFloat ( vec, 1 ); printf ("."); showFloat ( vec, 0 ); } } void showYMM ( YMM* vec, int isDouble ) { assert(IS_32_ALIGNED(vec)); showXMM ( &vec->xmm[1], isDouble ); printf("."); showXMM ( &vec->xmm[0], isDouble ); } void showBlock ( char* msg, Block* block, int isDouble ) { printf(" %s\n", msg); printf("r1: "); showYMM(&block->r1, isDouble); printf("\n"); printf("r2: "); showYMM(&block->r2, isDouble); printf("\n"); printf("r3: "); showYMM(&block->r3, isDouble); printf("\n"); printf("r4: "); showYMM(&block->r4, isDouble); printf("\n"); printf(" m: "); showYMM(&block->m, isDouble); printf("\n"); } static Double special_values[10]; static __attribute__((noinline)) Double negate ( Double d ) { return -d; } static __attribute__((noinline)) Double divf64 ( Double x, Double y ) { return x/y; } static __attribute__((noinline)) Double plusZero ( void ) { return 0.0; } static __attribute__((noinline)) Double minusZero ( void ) { return negate(plusZero()); } static __attribute__((noinline)) Double plusOne ( void ) { return 1.0; } static __attribute__((noinline)) Double minusOne ( void ) { return negate(plusOne()); } static __attribute__((noinline)) Double plusInf ( void ) { return 1.0 / 0.0; } static __attribute__((noinline)) Double minusInf ( void ) { return negate(plusInf()); } static __attribute__((noinline)) Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); } static __attribute__((noinline)) Double minusNaN ( void ) { return negate(plusNaN()); } static __attribute__((noinline)) Double plusDenorm ( void ) { return 1.23e-315 / 1e3; } static __attribute__((noinline)) Double minusDenorm ( void ) { return negate(plusDenorm()); } static void init_special_values ( void ) { special_values[0] = plusZero(); special_values[1] = minusZero(); special_values[2] = plusOne(); special_values[3] = minusOne(); special_values[4] = plusInf(); special_values[5] = minusInf(); special_values[6] = plusNaN(); special_values[7] = minusNaN(); special_values[8] = plusDenorm(); special_values[9] = minusDenorm(); } void specialFBlock ( Block* b ) { int i; Float* p = (Float*)b; for (i = 0; i < sizeof(Block) / sizeof(Float); i++) p[i] = (Float) special_values[i % 10]; } void specialDBlock ( Block* b ) { int i; Double* p = (Double*)b; for (i = 0; i < sizeof(Block) / sizeof(Double); i++) p[i] = special_values[i % 10]; } UChar randUChar ( void ) { static UInt seed = 80021; seed = 1103515245 * seed + 12345; return (seed >> 17) & 0xFF; } void randBlock ( Block* b ) { int i; UChar* p = (UChar*)b; for (i = 0; i < sizeof(Block); i++) p[i] = randUChar(); } void oneBlock ( Block* b ) { int i; UChar* p = (UChar*)b; for (i = 0; i < sizeof(Block); i++) p[i] = 1; } #define GEN_test(_name, _instr, _isD) \ __attribute__ ((noinline)) void \ test_##_name ( const char *n, Block* b) \ { \ printf("%s %s\n", #_name, n); \ showBlock("before", b, _isD); \ __asm__ __volatile__( \ "vmovdqa 0(%0),%%ymm7" "\n\t" \ "vmovdqa 32(%0),%%ymm8" "\n\t" \ "vmovdqa 64(%0),%%ymm6" "\n\t" \ "vmovdqa 96(%0),%%ymm9" "\n\t" \ "leaq 128(%0),%%r14" "\n\t" \ _instr "\n\t" \ "vmovdqa %%ymm7, 0(%0)" "\n\t" \ "vmovdqa %%ymm8, 32(%0)" "\n\t" \ "vmovdqa %%ymm6, 64(%0)" "\n\t" \ "vmovdqa %%ymm9, 96(%0)" "\n\t" \ : /*OUT*/ \ : /*IN*/"r"(b) \ : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ ); \ showBlock("after", b, _isD); \ printf("\n"); \ } /* All these defines do the same thing (and someone with stronger preprocessor foo could probably express things much smaller). They generate 4 different functions to test 4 variants of an fma4 instruction. One with as input 4 registers, one where the output register is also one of the input registers and two versions where different inputs are a memory location. The xmm variants create 128 versions, the ymm variants 256. */ #define GEN_test_VFMADDPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMADDPD_xmm(VFMADDPD) #define GEN_test_VFMADDPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFMADDPD_ymm(VFMADDPD) #define GEN_test_VFMADDPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMADDPS_xmm(VFMADDPS) #define GEN_test_VFMADDPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFMADDPS_ymm(VFMADDPS) #define GEN_test_VFMADDSD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMADDSD_xmm(VFMADDSD) #define GEN_test_VFMADDSS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMADDSS_xmm(VFMADDSS) #define GEN_test_VFMADDSUBPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD) #define GEN_test_VFMADDSUBPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD) #define GEN_test_VFMADDSUBPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS) #define GEN_test_VFMADDSUBPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS) #define GEN_test_VFMSUBADDPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD) #define GEN_test_VFMSUBADDPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD) #define GEN_test_VFMSUBADDPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS) #define GEN_test_VFMSUBADDPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS) #define GEN_test_VFMSUBPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMSUBPD_xmm(VFMSUBPD) #define GEN_test_VFMSUBPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFMSUBPD_ymm(VFMSUBPD) #define GEN_test_VFMSUBPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMSUBPS_xmm(VFMSUBPS) #define GEN_test_VFMSUBPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFMSUBPS_ymm(VFMSUBPS) #define GEN_test_VFMSUBSD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFMSUBSD_xmm(VFMSUBSD) #define GEN_test_VFMSUBSS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFMSUBSS_xmm(VFMSUBSS) #define GEN_test_VFNMADDPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFNMADDPD_xmm(VFNMADDPD) #define GEN_test_VFNMADDPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFNMADDPD_ymm(VFNMADDPD) #define GEN_test_VFNMADDPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFNMADDPS_xmm(VFNMADDPS) #define GEN_test_VFNMADDPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFNMADDPS_ymm(VFNMADDPS) #define GEN_test_VFNMADDSD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFNMADDSD_xmm(VFNMADDSD) #define GEN_test_VFNMADDSS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFNMADDSS_xmm(VFNMADDSS) #define GEN_test_VFNMSUBPD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFNMSUBPD_xmm(VFNMSUBPD) #define GEN_test_VFNMSUBPD_ymm(_name) \ GEN_test(_name##_ymm, \ "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_src_dst, \ "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ GEN_test(_name##_ymm_mem1, \ "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ GEN_test(_name##_ymm_mem2, \ "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); GEN_test_VFNMSUBPD_ymm(VFNMSUBPD) #define GEN_test_VFNMSUBPS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFNMSUBPS_xmm(VFNMSUBPS) #define GEN_test_VFNMSUBPS_ymm(_name) \ GEN_test(_name##_ymm, \ "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_src_dst, \ "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ GEN_test(_name##_ymm_mem1, \ "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ GEN_test(_name##_ymm_mem2, \ "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); GEN_test_VFNMSUBPS_ymm(VFNMSUBPS) #define GEN_test_VFNMSUBSD_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_src_dst, \ "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ GEN_test(_name##_xmm_mem1, \ "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ GEN_test(_name##_xmm_mem2, \ "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); GEN_test_VFNMSUBSD_xmm(VFNMSUBSD) #define GEN_test_VFNMSUBSS_xmm(_name) \ GEN_test(_name##_xmm, \ "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_src_dst, \ "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ GEN_test(_name##_xmm_mem1, \ "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ GEN_test(_name##_xmm_mem2, \ "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); GEN_test_VFNMSUBSS_xmm(VFNMSUBSS) #define DO_test_block(_name, _sub, _bname, _block) \ test_##_name##_##_sub(_bname,_block); #define DO_test(_name, _sub, _isD) { \ Block* b = memalign32(sizeof(Block)); \ oneBlock(b); \ DO_test_block(_name, _sub, "ones", b); \ if (_isD) { \ specialDBlock(b); \ DO_test_block(_name, _sub, "specialD", b); \ } else { \ specialFBlock(b); \ DO_test_block(_name, _sub, "specialF", b); \ } \ randBlock(b); \ DO_test_block(_name, _sub, "rand", b); \ free(b); \ } #define DO_tests_xmm(_name,_isD) \ DO_test(_name, xmm, _isD); \ DO_test(_name, xmm_src_dst, _isD); \ DO_test(_name, xmm_mem1, _isD); \ DO_test(_name, xmm_mem2, _isD); #define DO_tests_ymm(_name,_isD) \ DO_test(_name, ymm, _isD); \ DO_test(_name, ymm_src_dst, _isD); \ DO_test(_name, ymm_mem1, _isD); \ DO_test(_name, ymm_mem2, _isD); int main ( void ) { init_special_values(); // 128 DO_tests_xmm(VFMADDPD, 1); DO_tests_xmm(VFMADDPS, 0); DO_tests_xmm(VFMADDSD, 1); DO_tests_xmm(VFMADDSS, 0); DO_tests_xmm(VFMADDSUBPD, 1); DO_tests_xmm(VFMADDSUBPS, 0); DO_tests_xmm(VFMSUBADDPD, 1); DO_tests_xmm(VFMSUBADDPS, 0); DO_tests_xmm(VFMSUBPD, 1); DO_tests_xmm(VFMSUBPS, 0); DO_tests_xmm(VFMSUBSD, 1); DO_tests_xmm(VFMSUBSS, 0); DO_tests_xmm(VFNMADDPD, 1); DO_tests_xmm(VFNMADDPS, 0); DO_tests_xmm(VFNMADDSD, 1); DO_tests_xmm(VFNMADDSS, 0); DO_tests_xmm(VFNMSUBPD, 1); DO_tests_xmm(VFNMSUBPS, 0); DO_tests_xmm(VFNMSUBSD, 1); DO_tests_xmm(VFNMSUBSS, 0); // 256 /* DO_tests_ymm(VFMADDPD, 1); DO_tests_ymm(VFMADDPS, 0); DO_tests_ymm(VFMADDSUBPD, 1); DO_tests_ymm(VFMADDSUBPS, 0); DO_tests_ymm(VFMSUBADDPD, 1); DO_tests_ymm(VFMSUBADDPS, 0); DO_tests_ymm(VFMSUBPD, 1); DO_tests_ymm(VFMSUBPS, 0); DO_tests_ymm(VFNMADDPD, 1); DO_tests_ymm(VFNMADDPS, 0); DO_tests_ymm(VFNMSUBPD, 1); DO_tests_ymm(VFNMSUBPS, 0); */ return 0; }