/* Copyright (C) 2013 IBM
Authors: Carl Love <carll@us.ibm.com>
Maynard Johnson <maynardj@us.ibm.com>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
This program is based heavily on the test_isa_2_06_part*.c source files.
*/
#include <stdio.h>
#ifdef HAS_ISA_2_07
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <altivec.h>
#include <math.h>
#ifndef __powerpc64__
typedef uint32_t HWord_t;
#else
typedef uint64_t HWord_t;
#endif /* __powerpc64__ */
#ifdef VGP_ppc64le_linux
#define isLE 1
#else
#define isLE 0
#endif
register HWord_t r14 __asm__ ("r14");
register HWord_t r15 __asm__ ("r15");
register HWord_t r16 __asm__ ("r16");
register HWord_t r17 __asm__ ("r17");
register double f14 __asm__ ("fr14");
register double f15 __asm__ ("fr15");
register double f16 __asm__ ("fr16");
register double f17 __asm__ ("fr17");
static volatile unsigned int cond_reg;
#define True 1
#define False 0
#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
#define SET_CR(_arg) \
__asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
#define SET_XER(_arg) \
__asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
#define GET_CR(_lval) \
__asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
#define GET_XER(_lval) \
__asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
#define GET_CR_XER(_lval_cr,_lval_xer) \
do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
#define SET_CR_ZERO \
SET_CR(0)
#define SET_XER_ZERO \
SET_XER(0)
#define SET_CR_XER_ZERO \
do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
#define SET_FPSCR_ZERO \
do { double _d = 0.0; \
__asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
} while (0)
typedef unsigned char Bool;
/* These functions below that construct a table of floating point
* values were lifted from none/tests/ppc32/jm-insns.c.
*/
#if defined (DEBUG_ARGS_BUILD)
#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
#else
#define AB_DPRINTF(fmt, args...) do { } while (0)
#endif
static inline void register_farg (void *farg,
int s, uint16_t _exp, uint64_t mant)
{
uint64_t tmp;
tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
*(uint64_t *)farg = tmp;
AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
}
static inline void register_sp_farg (void *farg,
int s, uint16_t _exp, uint32_t mant)
{
uint32_t tmp;
tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
*(uint32_t *)farg = tmp;
}
typedef struct fp_test_args {
int fra_idx;
int frb_idx;
} fp_test_args_t;
static int nb_special_fargs;
static double * spec_fargs;
static float * spec_sp_fargs;
static void build_special_fargs_table(void)
{
/*
* Double precision:
* Sign goes from zero to one (1 bit)
* Exponent goes from 0 to ((1 << 12) - 1) (11 bits)
* Mantissa goes from 1 to ((1 << 52) - 1) (52 bits)
* + special values:
* +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000
* -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000
* +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
* -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
* +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
* -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
* (8 values)
*
* Single precision
* Sign: 1 bit
* Exponent: 8 bits
* Mantissa: 23 bits
* +0.0 : 0 0x00 0x000000 => 0x00000000
* -0.0 : 1 0x00 0x000000 => 0x80000000
* +infinity : 0 0xFF 0x000000 => 0x7F800000
* -infinity : 1 0xFF 0x000000 => 0xFF800000
* +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
* -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
* +QNaN : 0 0xFF 0x400000 => 0x7FC00000
* -QNaN : 1 0xFF 0x400000 => 0xFFC00000
*/
uint64_t mant;
uint32_t mant_sp;
uint16_t _exp;
int s;
int j, i = 0;
if (spec_fargs)
return;
spec_fargs = malloc( 20 * sizeof(double) );
spec_sp_fargs = malloc( 20 * sizeof(float) );
// #0
s = 0;
_exp = 0x3fd;
mant = 0x8000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #1
s = 0;
_exp = 0x404;
mant = 0xf000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #2
s = 0;
_exp = 0x001;
mant = 0x8000000b77501ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #3
s = 0;
_exp = 0x7fe;
mant = 0x800000000051bULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #4
s = 0;
_exp = 0x012;
mant = 0x3214569900000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* Special values */
/* +0.0 : 0 0x000 0x0000000000000 */
// #5
s = 0;
_exp = 0x000;
mant = 0x0000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* -0.0 : 1 0x000 0x0000000000000 */
// #6
s = 1;
_exp = 0x000;
mant = 0x0000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* +infinity : 0 0x7FF 0x0000000000000 */
// #7
s = 0;
_exp = 0x7FF;
mant = 0x0000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* -infinity : 1 0x7FF 0x0000000000000 */
// #8
s = 1;
_exp = 0x7FF;
mant = 0x0000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/*
* This comment applies to values #9 and #10 below:
* When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
* so we can't just copy the double-precision value to the corresponding slot in the
* single-precision array (i.e., in the loop at the end of this function). Instead, we
* have to manually set the bits using register_sp_farg().
*/
/* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
// #9
s = 0;
_exp = 0x7FF;
mant = 0x7FFFFFFFFFFFFULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
_exp = 0xff;
mant_sp = 0x3FFFFF;
register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
/* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
// #10
s = 1;
_exp = 0x7FF;
mant = 0x7FFFFFFFFFFFFULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
_exp = 0xff;
mant_sp = 0x3FFFFF;
register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
/* +QNaN : 0 0x7FF 0x8000000000000 */
// #11
s = 0;
_exp = 0x7FF;
mant = 0x8000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* -QNaN : 1 0x7FF 0x8000000000000 */
// #12
s = 1;
_exp = 0x7FF;
mant = 0x8000000000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* denormalized value */
// #13
s = 1;
_exp = 0x000;
mant = 0x8340000078000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* Negative finite number */
// #14
s = 1;
_exp = 0x40d;
mant = 0x0650f5a07b353ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
/* A few positive finite numbers ... */
// #15
s = 0;
_exp = 0x412;
mant = 0x32585a9900000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #16
s = 0;
_exp = 0x413;
mant = 0x82511a2000000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #17
s = 0;
_exp = 0x403;
mant = 0x12ef5a9300000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #18
s = 0;
_exp = 0x405;
mant = 0x14bf5d2300000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
// #19
s = 0;
_exp = 0x409;
mant = 0x76bf982440000ULL;
register_farg(&spec_fargs[i++], s, _exp, mant);
nb_special_fargs = i;
for (j = 0; j < i; j++) {
if (!(j == 9 || j == 10))
spec_sp_fargs[j] = spec_fargs[j];
}
}
static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
0, 0, 0, 0 };
static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
0x89abcdef,
0x00112233,
0x74556677,
0x00001abb,
0x00000001,
0x31929394,
0xa1a2a3a4,
};
#define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
#define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4)
static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
0x0102030405060708ULL,
0x090A0B0C0E0D0E0FULL,
0xF1F2F3F4F5F6F7F8ULL,
0xF9FAFBFCFEFDFEFFULL
};
#define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
#define NUM_VDARGS_VECS (NUM_VDARGS_INTS/2)
typedef void (*test_func_t)(void);
struct test_table
{
test_func_t test_category;
char * name;
};
typedef enum {
SINGLE_TEST,
SINGLE_TEST_SINGLE_RES,
DOUBLE_TEST,
DOUBLE_TEST_SINGLE_RES
} precision_type_t;
#define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
typedef enum {
VX_FP_SMAS, // multiply add single precision result
VX_FP_SMSS, // multiply sub single precision result
VX_FP_SNMAS, // negative multiply add single precision result
VX_FP_SNMSS, // negative multiply sub single precision result
VX_FP_OTHER,
VX_CONV_WORD,
VX_ESTIMATE,
VX_CONV_TO_SINGLE,
VX_CONV_TO_DOUBLE,
VX_SCALAR_CONV_TO_WORD,
VX_SCALAR_SP_TO_VECTOR_SP,
VX_DEFAULT
} vx_fp_test_type;
typedef enum {
VSX_LOAD = 1,
VSX_LOAD_SPLAT,
VSX_STORE,
} vsx_ldst_type;
typedef enum {
VSX_AND = 1,
VSX_NAND,
VSX_ANDC,
VSX_OR,
VSX_ORC,
VSX_NOR,
VSX_XOR,
VSX_EQV,
} vsx_log_op;
struct vx_fp_test1
{
test_func_t test_func;
const char *name;
fp_test_args_t * targs;
int num_tests;
vx_fp_test_type test_type;
};
struct ldst_test
{
test_func_t test_func;
const char *name;
precision_type_t precision;
void * base_addr;
uint32_t offset;
vsx_ldst_type type;
};
struct vx_fp_test2
{
test_func_t test_func;
const char *name;
fp_test_args_t * targs;
int num_tests;
precision_type_t precision;
vx_fp_test_type test_type;
const char * op;
};
struct xs_conv_test
{
test_func_t test_func;
const char *name;
int num_tests;
};
struct simple_test
{
test_func_t test_func;
const char *name;
};
struct vsx_logic_test
{
test_func_t test_func;
const char *name;
vsx_log_op op;
};
typedef struct vsx_logic_test logic_test_t;
typedef struct ldst_test ldst_test_t;
typedef struct simple_test xs_conv_test_t;
typedef struct vx_fp_test1 vx_fp_test_basic_t;
typedef struct vx_fp_test2 vx_fp_test2_t;
typedef struct test_table test_table_t;
static vector unsigned int vec_out, vec_inA, vec_inB;
static void test_xscvdpspn(void)
{
__asm__ __volatile__ ("xscvdpspn %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xscvspdpn(void)
{
__asm__ __volatile__ ("xscvspdpn %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static int do_asp;
static void test_xsmadds(void)
{
if (do_asp)
__asm__ __volatile__ ("xsmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
else
__asm__ __volatile__ ("xsmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xsmsubs(void)
{
if (do_asp)
__asm__ __volatile__ ("xsmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
else
__asm__ __volatile__ ("xsmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xscvsxdsp (void)
{
__asm__ __volatile__ ("xscvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xscvuxdsp (void)
{
__asm__ __volatile__ ("xscvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xsnmadds(void)
{
if (do_asp)
__asm__ __volatile__ ("xsnmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
else
__asm__ __volatile__ ("xsnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xsnmsubs(void)
{
if (do_asp)
__asm__ __volatile__ ("xsnmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
else
__asm__ __volatile__ ("xsnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_stxsspx(void)
{
__asm__ __volatile__ ("stxsspx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
}
static void test_stxsiwx(void)
{
__asm__ __volatile__ ("stxsiwx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
}
static void test_lxsiwax(void)
{
__asm__ __volatile__ ("lxsiwax %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}
static void test_lxsiwzx(void)
{
__asm__ __volatile__ ("lxsiwzx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}
static void test_lxsspx(void)
{
__asm__ __volatile__ ("lxsspx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}
static void test_xssqrtsp(void)
{
__asm__ __volatile__ ("xssqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xsrsqrtesp(void)
{
__asm__ __volatile__ ("xsrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
/* Three argument instuctions */
static void test_xxleqv(void)
{
__asm__ __volatile__ ("xxleqv %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xxlorc(void)
{
__asm__ __volatile__ ("xxlorc %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xxlnand(void)
{
__asm__ __volatile__ ("xxlnand %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xsaddsp(void)
{
__asm__ __volatile__ ("xsaddsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}
static void test_xssubsp(void)
{
__asm__ __volatile__ ("xssubsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}
static void test_xsdivsp(void)
{
__asm__ __volatile__ ("xsdivsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}
static void test_xsmulsp(void)
{
__asm__ __volatile__ ("xsmulsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}
static void test_xsresp(void)
{
__asm__ __volatile__ ("xsresp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xsrsp(void)
{
__asm__ __volatile__ ("xsrsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
fp_test_args_t vx_math_tests[] = {
{8, 8},
{8, 14},
{8, 6},
{8, 5},
{8, 4},
{8, 7},
{8, 9},
{8, 11},
{14, 8},
{14, 14},
{14, 6},
{14, 5},
{14, 4},
{14, 7},
{14, 9},
{14, 11},
{6, 8},
{6, 14},
{6, 6},
{6, 5},
{6, 4},
{6, 7},
{6, 9},
{6, 11},
{5, 8},
{5, 14},
{5, 6},
{5, 5},
{5, 4},
{5, 7},
{5, 9},
{5, 11},
{4, 8},
{4, 14},
{4, 6},
{4, 5},
{4, 1},
{4, 7},
{4, 9},
{4, 11},
{7, 8},
{7, 14},
{7, 6},
{7, 5},
{7, 4},
{7, 7},
{7, 9},
{7, 11},
{10, 8},
{10, 14},
{10, 6},
{10, 5},
{10, 4},
{10, 7},
{10, 9},
{10, 11},
{12, 8},
{12, 14},
{12, 6},
{12, 5},
{12, 4},
{12, 7},
{12, 9},
{12, 11},
{8, 8},
{8, 14},
{8, 6},
{8, 5},
{8, 4},
{8, 7},
{8, 9},
{8, 11},
{14, 8},
{14, 14},
{14, 6},
{14, 5},
{14, 4},
{14, 7},
{14, 9},
{14, 11},
{6, 8},
{6, 14},
{6, 6},
{6, 5},
{6, 4},
{6, 7},
{6, 9},
{6, 11},
{5, 8},
{5, 14},
{5, 6},
{5, 5},
{5, 4},
{5, 7},
{5, 9},
{5, 11},
{4, 8},
{4, 14},
{4, 6},
{4, 5},
{4, 1},
{4, 7},
{4, 9},
{4, 11},
{7, 8},
{7, 14},
{7, 6},
{7, 5},
{7, 4},
{7, 7},
{7, 9},
{7, 11},
{10, 8},
{10, 14},
{10, 6},
{10, 5},
{10, 4},
{10, 7},
{10, 9},
{10, 11},
{12, 8},
{12, 14},
{12, 6},
{12, 5},
{12, 4},
{12, 7},
{12, 9},
{12, 11}
};
// These are all double precision inputs with double word outputs (mostly converted to single precision)
static vx_fp_test_basic_t vx_fp_tests[] = {
{ &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
{ &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
{ &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
{ &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
{ &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
{ &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
{ NULL, NULL, NULL, 0, 0 }
};
static vx_fp_test2_t
vsx_one_fp_arg_tests[] = {
{ &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
{ &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
{ &test_xsresp, "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
{ &test_xsrsp, "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
{ &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
{ &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
{ NULL, NULL, NULL, 0, 0, 0, NULL}
};
// These are all double precision inputs with double word outputs (mostly converted to single precision)
static vx_fp_test_basic_t
vx_simple_scalar_fp_tests[] = {
{ &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
{ &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
{ NULL, NULL, NULL, 0 , 0}
};
static ldst_test_t
ldst_tests[] = {
{ &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
{ &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
{ &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
{ &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
{ &test_lxsspx, "lxsspx", SINGLE_TEST, NULL, 0, VSX_LOAD },
{ NULL, NULL, 0, NULL, 0, 0 } };
static xs_conv_test_t
xs_conv_tests[] = {
{ &test_xscvsxdsp, "xscvsxdsp"},
{ &test_xscvuxdsp, "xscvuxdsp"},
{ NULL, NULL}
};
static logic_test_t
logic_tests[] = {
{ &test_xxleqv, "xxleqv", VSX_EQV },
{ &test_xxlorc, "xxlorc", VSX_ORC },
{ &test_xxlnand, "xxlnand", VSX_NAND },
{ NULL, NULL}
};
Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
{
/* NOTE:
* This function has been verified only with the xsresp and xsrsqrtes instructions.
*
* Technically, the number of bits of precision for xsresp and xsrsqrtesp is
* 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
* does an actual reciprocal calculation versus estimation, so the answer we get back from
* valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
* precision) and the estimate may still be within expected tolerances. On top of that,
* we can't count on these estimates always being the same across implementations.
* For example, with the fre[s] instruction (which should be correct to within one part
* in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
* one implementation could return 1.0111_1111_0000 and another implementation could return
* 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
* single bit in common.
*
* The upshot is we can't validate the VEX output for these instructions by comparing against
* stored bit patterns. We must check that the result is within expected tolerances.
*/
/* A mask to be used for validation as a last resort.
* Only use 12 bits of precision for reasons discussed above.
*/
#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
Bool result = False;
double src_dp, res_dp;
float calc_diff = 0;
float real_diff = 0;
double recip_divisor;
float div_result;
float calc_diff_tmp;
src_dp = res_dp = 0;
Bool src_is_negative = False;
Bool res_is_negative = False;
unsigned long long * dst_dp = NULL;
unsigned long long * src_dp_ull;
dst_dp = (unsigned long long *) &vec_out;
src_dp = spec_fargs[idx];
src_dp_ull = (unsigned long long *) &src_dp;
src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
// Below are common rules
if (isnan(src_dp))
return isnan(res_dp);
if (fpclassify(src_dp) == FP_ZERO)
return isinf(res_dp);
if (!src_is_negative && isinf(src_dp))
return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
if (is_rsqrte) {
if (src_is_negative)
return isnan(res_dp);
} else {
if (src_is_negative && isinf(src_dp))
return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
}
if (is_rsqrte)
recip_divisor = sqrt(src_dp);
else
recip_divisor = src_dp;
/* The instructions handled by this function take a double precision
* input, perform a reciprocal estimate in double-precision, round
* the result to single precision and store into the destination
* register in double precision format. So, to check the result
* for accuracy, we use float (single precision) values.
*/
div_result = 1.0/recip_divisor;
calc_diff_tmp = recip_divisor * 16384.0;
if (isnormal(calc_diff_tmp)) {
calc_diff = fabs(1.0/calc_diff_tmp);
real_diff = fabs((float)res_dp - div_result);
result = ( ( res_dp == div_result )
|| ( real_diff <= calc_diff ) );
#if FRES_DEBUG
unsigned int * dv = (unsigned int *)&div_result;
unsigned int * rd = (unsigned int *)&real_diff;
unsigned int * cd = (unsigned int *)&calc_diff;
printf("\n\t {computed div_result: %08x; real_diff: %08x; calc_diff: %08x}\n",
*dv, *rd, *cd);
#endif
} else {
/* Unable to compute theoretical difference, so we fall back to masking out
* un-precise bits.
*/
unsigned int * div_result_sp = (unsigned int *)&div_result;
float res_sp = (float)res_dp;
unsigned int * dst_sp = (unsigned int *)&res_sp;
#if FRES_DEBUG
unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
printf("Unable to compute theoretical difference, so we fall back to masking\n");
printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
*calc_diff_tmp_sp, *div_result_sp, *dst_sp);
#endif
result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
}
return result;
}
static void test_vx_fp_ops(void)
{
test_func_t func;
int k;
char * test_name = (char *)malloc(20);
void * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;
if (isLE) {
vecA_void_ptr = (void *)&vec_inA + 8;
vecB_void_ptr = (void *)&vec_inB + 8;
vecOut_void_ptr = (void *)&vec_out + 8;
} else {
vecA_void_ptr = (void *)&vec_inA;
vecB_void_ptr = (void *)&vec_inB;
vecOut_void_ptr = (void *)&vec_out;
}
k = 0;
build_special_fargs_table();
while ((func = vx_fp_tests[k].test_func)) {
int i, repeat = 0;
unsigned long long * frap, * frbp, * dst;
vx_fp_test_basic_t test_group = vx_fp_tests[k];
vx_fp_test_type test_type = test_group.test_type;
switch (test_type) {
case VX_FP_SMAS:
case VX_FP_SMSS:
case VX_FP_SNMAS:
case VX_FP_SNMSS:
if (test_type == VX_FP_SMAS)
strcpy(test_name, "xsmadd");
else if (test_type == VX_FP_SMSS)
strcpy(test_name, "xsmsub");
else if (test_type == VX_FP_SNMAS)
strcpy(test_name, "xsnmadd");
else
strcpy(test_name, "xsnmsub");
if (!repeat) {
repeat = 1;
strcat(test_name, "asp");
do_asp = 1;
}
break;
case VX_FP_OTHER:
strcpy(test_name, test_group.name);
break;
default:
printf("ERROR: Invalid VX FP test type %d\n", test_type);
exit(1);
}
again:
for (i = 0; i < test_group.num_tests; i++) {
unsigned int * inA, * inB, * pv;
fp_test_args_t aTest = test_group.targs[i];
inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
int idx;
unsigned long long vsr_XT;
pv = (unsigned int *)&vec_out;
// Only need to copy one doubleword into each vector's element 0
memcpy(vecA_void_ptr, inA, 8);
memcpy(vecB_void_ptr, inB, 8);
// clear vec_out
for (idx = 0; idx < 4; idx++, pv++)
*pv = 0;
if (test_type != VX_FP_OTHER) {
/* Then we need a third src argument, which is stored in element 0 of
* VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds
* src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
* src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test
* data (input args, result) contain only two inputs, so I arbitrarily
* use spec_fargs elements 4 and 14 (alternating) for the third source
* argument. We can use the same input data for a given pair of
* adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
* the expected result should be the same.
*/
int extra_arg_idx;
if (i % 2)
extra_arg_idx = 4;
else
extra_arg_idx = 14;
if (repeat) {
/* We're on the first time through of one of the VX_FP_SMx
* test types, meaning we're testing a xs<ZZZ>adp case, thus
* we have to swap inputs as described above:
* src2 <= VSX[XT]
* src3 <= VSX[XB]
*/
memcpy(vecOut_void_ptr, inB, 8); // src2
memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8); //src3
frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
} else {
// Don't need to init src2, as it's done before the switch()
memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8); //src3
}
memcpy(&vsr_XT, vecOut_void_ptr, 8);
}
(*func)();
dst = (unsigned long long *) &vec_out;
if (isLE)
dst++;
if (test_type == VX_FP_OTHER)
printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
*frap, *frbp, *dst);
else
printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
test_name, vsr_XT, *frap, *frbp, *dst );
}
/*
{
// Debug code. Keep this block commented out except when debugging.
double result, expected;
memcpy(&result, dst, 8);
memcpy(&expected, &aTest.dp_bin_result, 8);
printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
expected, result );
}
*/
printf( "\n" );
if (repeat) {
repeat = 0;
strcat(test_name, "UNKNOWN");
switch (test_type) {
case VX_FP_SMAS:
case VX_FP_SMSS:
case VX_FP_SNMAS:
case VX_FP_SNMSS:
if (test_type == VX_FP_SMAS)
strcpy(test_name, "xsmadd");
else if (test_type == VX_FP_SMSS)
strcpy(test_name, "xsmsub");
else if (test_type == VX_FP_SNMAS)
strcpy(test_name, "xsnmadd");
else
strcpy(test_name, "xsnmsub");
do_asp = 0;
strcat(test_name, "msp");
break;
default:
break;
}
goto again;
}
k++;
}
printf( "\n" );
free(test_name);
}
static void test_vsx_one_fp_arg(void)
{
test_func_t func;
int k;
void * vecB_void_ptr;
k = 0;
build_special_fargs_table();
while ((func = vsx_one_fp_arg_tests[k].test_func)) {
int idx, i;
unsigned long long *dst_dp;
unsigned int * dst_sp;
vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
/* size of source operands */
Bool dp = ((test_group.precision == DOUBLE_TEST) ||
(test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
/* size of result */
Bool dp_res = IS_DP_RESULT(test_group.precision);
Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
vecB_void_ptr = (void *)&vec_inB;
if (isLE) {
vecB_void_ptr += dp? 8 : 12;
}
for (i = 0; i < test_group.num_tests; i++) {
unsigned int * pv;
void * inB;
pv = (unsigned int *)&vec_out;
// clear vec_out
for (idx = 0; idx < 4; idx++, pv++)
*pv = 0;
if (dp) {
int vec_out_idx;
unsigned long long * frB_dp;
if (isLE)
vec_out_idx = dp_res ? 1 : 3;
else
vec_out_idx = 0;
if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
/* Take a single-precision value stored in double word element 0
* of src in double-precision format and convert to single-
* precision and store in word element 0 of dst.
*/
double input = spec_sp_fargs[i];
memcpy(vecB_void_ptr, (void *)&input, 8);
} else {
inB = (void *)&spec_fargs[i];
// copy double precision FP into input vector element 0
memcpy(vecB_void_ptr, inB, 8);
}
// execute test insn
(*func)();
if (dp_res)
dst_dp = (unsigned long long *) &vec_out;
else
dst_sp = (unsigned int *) &vec_out;
printf("#%d: %s ", i, test_group.name);
frB_dp = (unsigned long long *)&spec_fargs[i];
printf("%s(%016llx)", test_group.op, *frB_dp);
if (test_group.test_type == VX_ESTIMATE)
{
Bool res;
res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
printf(" ==> %s)", res ? "PASS" : "FAIL");
} else if (dp_res) {
printf(" = %016llx", dst_dp[vec_out_idx]);
} else {
printf(" = %08x", dst_sp[vec_out_idx]);
}
printf("\n");
} else { // single precision test type
int vec_out_idx;
if (isLE)
vec_out_idx = dp_res ? 1 : 3;
else
vec_out_idx = 0;
// Clear input vector
pv = (unsigned int *)&vec_inB;
for (idx = 0; idx < 4; idx++, pv++)
*pv = 0;
inB = (void *)&spec_sp_fargs[i];
// copy single precision FP into input vector element i
memcpy(vecB_void_ptr, inB, 4);
// execute test insn
(*func)();
if (dp_res)
dst_dp = (unsigned long long *) &vec_out;
else
dst_sp = (unsigned int *) &vec_out;
// print result
printf("#%d: %s ", i, test_group.name);
printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
if (dp_res)
printf(" = %016llx", dst_dp[vec_out_idx]);
else
printf(" = %08x", dst_sp[vec_out_idx]);
printf("\n");
}
}
k++;
printf( "\n" );
}
}
/* This function currently only supports two double precision input arguments. */
static void test_vsx_two_fp_arg(void)
{
test_func_t func;
int k = 0;
void * vecA_void_ptr, * vecB_void_ptr;
if (isLE) {
vecA_void_ptr = (void *)&vec_inA + 8;
vecB_void_ptr = (void *)&vec_inB + 8;
} else {
vecA_void_ptr = (void *)&vec_inA;
vecB_void_ptr = (void *)&vec_inB;
}
build_special_fargs_table();
while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
unsigned long long * frap, * frbp, * dst;
unsigned int * pv;
int idx;
vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
pv = (unsigned int *)&vec_out;
// clear vec_out
for (idx = 0; idx < 4; idx++, pv++)
*pv = 0;
void * inA, * inB;
int i;
for (i = 0; i < test_group.num_tests; i++) {
fp_test_args_t aTest = test_group.targs[i];
inA = (void *)&spec_fargs[aTest.fra_idx];
inB = (void *)&spec_fargs[aTest.frb_idx];
frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
// Only need to copy one doubleword into each vector's element 0
memcpy(vecA_void_ptr, inA, 8);
memcpy(vecB_void_ptr, inB, 8);
(*func)();
dst = (unsigned long long *) &vec_out;
if (isLE)
dst++;
printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
*frap, *frbp, *dst);
}
printf( "\n" );
k++;
}
}
/* This function handles the following cases:
* 1) Single precision value stored in double-precision
* floating-point format in doubleword element 0 of src VSX register
* 2) Integer word value stored in word element 1 of src VSX register
*/
static void _do_store_test (ldst_test_t storeTest)
{
test_func_t func;
unsigned int *dst32;
unsigned int i, idx;
unsigned int * pv = (unsigned int *) storeTest.base_addr;
void * vecA_void_ptr;
if (isLE) {
if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
vecA_void_ptr = (void *)&vec_inA + 8;
} else {
if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
vecA_void_ptr = (void *)&vec_inA + 4;
else
vecA_void_ptr = (void *)&vec_inA;
}
func = storeTest.test_func;
r14 = (HWord_t) storeTest.base_addr;
r15 = (HWord_t) storeTest.offset;
/* test some of the pre-defined single precision values */
for (i = 0; i < nb_special_fargs; i+=3) {
// clear out storage destination
for (idx = 0; idx < 4; idx++)
*(pv + idx) = 0;
printf( "%s:", storeTest.name );
if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
{
unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
printf(" %08x ==> ", *arg_ptr);
} else {
unsigned long long * dp;
double input = spec_sp_fargs[i];
dp = (unsigned long long *)&input;
memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
printf(" %016llx ==> ", *dp);
}
// execute test insn
(*func)();
dst32 = (unsigned int*)(storeTest.base_addr);
dst32 += (storeTest.offset/sizeof(int));
printf( "%08x\n", *dst32);
}
printf("\n");
}
static void _do_load_test(ldst_test_t loadTest)
{
test_func_t func;
unsigned int i;
unsigned long long * dst_dp;
func = loadTest.test_func;
r15 = (HWord_t) loadTest.offset;
if (loadTest.base_addr == NULL) {
/* Test lxsspx: source is single precision value, so let's */
/* test some of the pre-defined single precision values. */
int num_loops = (loadTest.offset == 0) ? nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
for (i = 0; i < num_loops; i+=3) {
unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
printf( "%s:", loadTest.name );
printf(" %08x ==> ", *sp);
r14 = (HWord_t)&spec_sp_fargs[i];
// execute test insn
(*func)();
dst_dp = (unsigned long long *) &vec_out;
if (isLE)
dst_dp++;
printf("%016llx\n", *dst_dp);
}
} else {
// source is an integer word
int num_loops = (loadTest.offset == 0) ? NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
for (i = 0; i < num_loops; i++) {
printf( "%s:", loadTest.name );
r14 = (HWord_t)&viargs[i];
printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);
// execute test insn
(*func)();
dst_dp = (unsigned long long *) &vec_out;
if (isLE)
dst_dp++;
printf("%016llx\n", *dst_dp);
}
}
printf("\n");
}
static void test_ldst(void)
{
int k = 0;
while (ldst_tests[k].test_func) {
if (ldst_tests[k].type == VSX_STORE)
_do_store_test(ldst_tests[k]);
else {
_do_load_test(ldst_tests[k]);
}
k++;
printf("\n");
}
}
static void test_xs_conv_ops(void)
{
test_func_t func;
int k = 0;
void * vecB_void_ptr;
if (isLE)
vecB_void_ptr = (void *)&vec_inB + 8;
else
vecB_void_ptr = (void *)&vec_inB;
build_special_fargs_table();
while ((func = xs_conv_tests[k].test_func)) {
int i;
unsigned long long * dst;
xs_conv_test_t test_group = xs_conv_tests[k];
for (i = 0; i < NUM_VDARGS_INTS; i++) {
unsigned long long * inB, * pv;
int idx;
inB = (unsigned long long *)&vdargs[i];
memcpy(vecB_void_ptr, inB, 8);
pv = (unsigned long long *)&vec_out;
// clear vec_out
for (idx = 0; idx < 2; idx++, pv++)
*pv = 0ULL;
(*func)();
dst = (unsigned long long *) &vec_out;
if (isLE)
dst++;
printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
}
k++;
printf("\n");
}
printf( "\n" );
}
static void test_vsx_logic(void)
{
logic_test_t aTest;
test_func_t func;
int k;
k = 0;
while ((func = logic_tests[k].test_func)) {
unsigned int * pv;
unsigned int * inA, * inB, * dst;
int idx, i;
aTest = logic_tests[k];
for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
pv = (unsigned int *)&vec_out;
inA = &viargs[i];
inB = &viargs[i];
memcpy(&vec_inA, inA, sizeof(vector unsigned int));
memcpy(&vec_inB, inB, sizeof(vector unsigned int));
// clear vec_out
for (idx = 0; idx < 4; idx++, pv++)
*pv = 0;
// execute test insn
(*func)();
dst = (unsigned int*) &vec_out;
printf( "#%d: %10s ", k, aTest.name);
printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
}
k++;
}
printf( "\n" );
}
//----------------------------------------------------------
static test_table_t all_tests[] = {
{ &test_vx_fp_ops,
"Test VSX floating point instructions"},
{ &test_vsx_one_fp_arg,
"Test VSX vector and scalar single argument instructions"} ,
{ &test_vsx_logic,
"Test VSX logic instructions" },
{ &test_xs_conv_ops,
"Test VSX scalar integer conversion instructions" },
{ &test_ldst,
"Test VSX load/store dp to sp instructions" },
{ &test_vsx_two_fp_arg,
"Test VSX vector and scalar two argument instructions"} ,
{ NULL, NULL }
};
#endif
int main(int argc, char *argv[])
{
#ifdef HAS_ISA_2_07
test_table_t aTest;
test_func_t func;
int i = 0;
while ((func = all_tests[i].test_category)) {
aTest = all_tests[i];
printf( "%s\n", aTest.name );
(*func)();
i++;
}
#else
printf("NO ISA 2.07 SUPPORT\n");
#endif
return 0;
}