/* Copyright (C) 2011 IBM Author: Maynard Johnson <maynardj@us.ibm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ #ifdef HAS_VSX #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <malloc.h> #include <altivec.h> #ifndef __powerpc64__ typedef uint32_t HWord_t; #else typedef uint64_t HWord_t; #endif /* __powerpc64__ */ #ifdef VGP_ppc64le_linux #define isLE 1 #else #define isLE 0 #endif register HWord_t r14 __asm__ ("r14"); register HWord_t r15 __asm__ ("r15"); register HWord_t r16 __asm__ ("r16"); register HWord_t r17 __asm__ ("r17"); register double f14 __asm__ ("fr14"); register double f15 __asm__ ("fr15"); register double f16 __asm__ ("fr16"); register double f17 __asm__ ("fr17"); static volatile unsigned int cond_reg; #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" #define SET_CR(_arg) \ __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); #define SET_XER(_arg) \ __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); #define GET_CR(_lval) \ __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) #define GET_XER(_lval) \ __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) #define GET_CR_XER(_lval_cr,_lval_xer) \ do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) #define SET_CR_ZERO \ SET_CR(0) #define SET_XER_ZERO \ SET_XER(0) #define SET_CR_XER_ZERO \ do { SET_CR_ZERO; SET_XER_ZERO; } while (0) #define SET_FPSCR_ZERO \ do { double _d = 0.0; \ __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ } while (0) typedef void (*test_func_t)(void); typedef struct ldst_test ldst_test_t; typedef struct vsx_logic_test logic_test_t; typedef struct xs_conv_test xs_conv_test_t; typedef struct p7_fp_test fp_test_t; typedef struct vx_fp_test vx_fp_test_t; typedef struct vsx_move_test move_test_t; typedef struct vsx_permute_test permute_test_t; typedef struct test_table test_table_t; static double *fargs = NULL; static int nb_fargs; /* These functions below that construct a table of floating point * values were lifted from none/tests/ppc32/jm-insns.c. */ #if defined (DEBUG_ARGS_BUILD) #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) #else #define AB_DPRINTF(fmt, args...) do { } while (0) #endif static inline void register_farg (void *farg, int s, uint16_t _exp, uint64_t mant) { uint64_t tmp; tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; *(uint64_t *)farg = tmp; AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", s, _exp, mant, *(uint64_t *)farg, *(double *)farg); } static void build_fargs_table(void) /* * Double precision: * Sign goes from zero to one (1 bit) * Exponent goes from 0 to ((1 << 12) - 1) (11 bits) * Mantissa goes from 1 to ((1 << 52) - 1) (52 bits) * + special values: * +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000 * -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000 * +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000 * -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000 * +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF * -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF * (8 values) * * Single precision * Sign: 1 bit * Exponent: 8 bits * Mantissa: 23 bits * +0.0 : 0 0x00 0x000000 => 0x00000000 * -0.0 : 1 0x00 0x000000 => 0x80000000 * +infinity : 0 0xFF 0x000000 => 0x7F800000 * -infinity : 1 0xFF 0x000000 => 0xFF800000 * +QNaN : 0 0xFF 0x400000 => 0x7FC00000 * -QNaN : 1 0xFF 0x400000 => 0xFFC00000 * +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF * -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF */ { uint64_t mant; uint16_t _exp, e1; int s; int i=0; if (nb_fargs) return; fargs = malloc( 16 * sizeof(double) ); for (s = 0; s < 2; s++) { for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) { if (e1 >= 0x400) e1 = 0x3fe; _exp = e1; for (mant = 0x0000000000001ULL; mant < (1ULL << 52); /* Add 'random' bits */ mant = ((mant + 0x4A6) << 29) + 0x359) { register_farg( &fargs[i++], s, _exp, mant ); } if (e1 == 0x3fe) break; } } // add a few smaller values to fargs . . . s = 0; _exp = 0x002; mant = 0x0000000000b01ULL; register_farg(&fargs[i++], s, _exp, mant); _exp = 0x000; mant = 0x00000203f0b3dULL; register_farg(&fargs[i++], s, _exp, mant); mant = 0x00000005a203dULL; register_farg(&fargs[i++], s, _exp, mant); s = 1; _exp = 0x002; mant = 0x0000000000b01ULL; register_farg(&fargs[i++], s, _exp, mant); _exp = 0x000; mant = 0x00000203f0b3dULL; register_farg(&fargs[i++], s, _exp, mant); nb_fargs = i; } typedef struct fp_test_args { int fra_idx; int frb_idx; int cr_flags; } fp_test_args_t; fp_test_args_t ftdiv_tests[] = { {0, 1, 0x8}, {9, 1, 0xa}, {1, 12, 0xa}, {0, 2, 0xa}, {1, 3, 0xa}, {3, 0, 0xa}, {0, 3, 0xa}, {4, 0, 0xa}, {7, 1, 0xe}, {8, 1, 0xe}, {1, 7, 0xe}, {0, 13, 0xe}, {5, 5, 0xe}, {5, 6, 0xe}, }; fp_test_args_t xscmpX_tests[] = { {8, 8, 0x2}, {8, 14, 0x8}, {8, 6, 0x8}, {8, 5, 0x8}, {8, 4, 0x8}, {8, 7, 0x8}, {8, 9, 0x1}, {8, 11, 0x1}, {14, 8, 0x4}, {14, 14, 0x2}, {14, 6, 0x8}, {14, 5, 0x8}, {14, 4, 0x8}, {14, 7, 0x8}, {14, 9, 0x1}, {14, 11, 0x1}, {6, 8, 0x4}, {6, 14, 0x4}, {6, 6, 0x2}, {6, 5, 0x2}, {6, 4, 0x8}, {6, 7, 0x8}, {6, 9, 0x1}, {6, 11, 0x1}, {5, 8, 0x4}, {5, 14, 0x4}, {5, 6, 0x2}, {5, 5, 0x2}, {5, 4, 0x8}, {5, 7, 0x8}, {5, 9, 0x1}, {5, 11, 0x1}, {4, 8, 0x4}, {4, 14, 0x4}, {4, 6, 0x4}, {4, 5, 0x4}, {4, 1, 0x8}, {4, 7, 0x8}, {4, 9, 0x1}, {4, 11, 0x1}, {7, 8, 0x4}, {7, 14, 0x4}, {7, 6, 0x4}, {7, 5, 0x4}, {7, 4, 0x4}, {7, 7, 0x2}, {7, 9, 0x1}, {7, 11, 0x1}, {10, 8, 0x1}, {10, 14, 0x1}, {10, 6, 0x1}, {10, 5, 0x1}, {10, 4, 0x1}, {10, 7, 0x1}, {10, 9, 0x1}, {10, 11, 0x1}, {12, 8, 0x1}, {12, 14, 0x1}, {12, 6, 0x1}, {12, 5, 0x1}, {12, 4, 0x1}, {12, 7, 0x1}, {12, 9, 0x1}, {12, 11, 0x1}, }; fp_test_args_t xsadddp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xsdivdp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xsmaddXdp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xsmsubXdp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xsnmaddXdp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xsmuldp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; fp_test_args_t xssubdp_tests[] = { {8, 8, 0x0}, {8, 14, 0x0}, {8, 6, 0x0}, {8, 5, 0x0}, {8, 4, 0x0}, {8, 7, 0x0}, {8, 9, 0x0}, {8, 11, 0x0}, {14, 8, 0x0}, {14, 14, 0x0}, {14, 6, 0x0}, {14, 5, 0x0}, {14, 4, 0x0}, {14, 7, 0x0}, {14, 9, 0x0}, {14, 11, 0x0}, {6, 8, 0x0}, {6, 14, 0x0}, {6, 6, 0x0}, {6, 5, 0x0}, {6, 4, 0x0}, {6, 7, 0x0}, {6, 9, 0x0}, {6, 11, 0x0}, {5, 8, 0x0}, {5, 14, 0x0}, {5, 6, 0x0}, {5, 5, 0x0}, {5, 4, 0x0}, {5, 7, 0x0}, {5, 9, 0x0}, {5, 11, 0x0}, {4, 8, 0x0}, {4, 14, 0x0}, {4, 6, 0x0}, {4, 5, 0x0}, {4, 1, 0x0}, {4, 7, 0x0}, {4, 9, 0x0}, {4, 11, 0x0}, {7, 8, 0x0}, {7, 14, 0x0}, {7, 6, 0x0}, {7, 5, 0x0}, {7, 4, 0x0}, {7, 7, 0x0}, {7, 9, 0x0}, {7, 11, 0x0}, {10, 8, 0x0}, {10, 14, 0x0}, {10, 6, 0x0}, {10, 5, 0x0}, {10, 4, 0x0}, {10, 7, 0x0}, {10, 9, 0x0}, {10, 11, 0x0}, {12, 8, 0x0}, {12, 14, 0x0}, {12, 6, 0x0}, {12, 5, 0x0}, {12, 4, 0x0}, {12, 7, 0x0}, {12, 9, 0x0}, {12, 11, 0x0}, }; static int nb_special_fargs; static double * spec_fargs; static void build_special_fargs_table(void) { /* The special floating point values created below are for * use in the ftdiv tests for setting the fe_flag and fg_flag, * but they can also be used for other tests (e.g., xscmpudp). * * Note that fl_flag is 'always '1' on ppc64 Linux. * Entry Sign Exp fraction Special value 0 0 3fd 0x8000000000000ULL Positive finite number 1 0 404 0xf000000000000ULL ... 2 0 001 0x8000000b77501ULL ... 3 0 7fe 0x800000000051bULL ... 4 0 012 0x3214569900000ULL ... 5 0 000 0x0000000000000ULL +0.0 (+zero) 6 1 000 0x0000000000000ULL -0.0 (-zero) 7 0 7ff 0x0000000000000ULL +infinity 8 1 7ff 0x0000000000000ULL -infinity 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 11 0 7ff 0x8000000000000ULL +QNaN 12 1 7ff 0x8000000000000ULL -QNaN 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 14 1 40d 0x0650f5a07b353ULL Negative finite number */ uint64_t mant; uint16_t _exp; int s; int i = 0; if (spec_fargs) return; spec_fargs = malloc( 16 * sizeof(double) ); // #0 s = 0; _exp = 0x3fd; mant = 0x8000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); // #1 s = 0; _exp = 0x404; mant = 0xf000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* None of the ftdiv tests succeed. * FRA = value #0; FRB = value #1 * ea_ = -2; e_b = 5 * fl_flag || fg_flag || fe_flag = 100 */ /************************************************* * fe_flag tests * *************************************************/ /* fe_flag <- 1 if FRA is a NaN * FRA = value #9; FRB = value #1 * e_a = 1024; e_b = 5 * fl_flag || fg_flag || fe_flag = 101 */ /* fe_flag <- 1 if FRB is a NaN * FRA = value #1; FRB = value #12 * e_a = 5; e_b = 1024 * fl_flag || fg_flag || fe_flag = 101 */ /* fe_flag <- 1 if e_b <= -1022 * FRA = value #0; FRB = value #2 * e_a = -2; e_b = -1022 * fl_flag || fg_flag || fe_flag = 101 * */ // #2 s = 0; _exp = 0x001; mant = 0x8000000b77501ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* fe_flag <- 1 if e_b >= 1021 * FRA = value #1; FRB = value #3 * e_a = 5; e_b = 1023 * fl_flag || fg_flag || fe_flag = 101 */ // #3 s = 0; _exp = 0x7fe; mant = 0x800000000051bULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023 * Let FRA = value #3 and FRB be value #0. * e_a = 1023; e_b = -2 * fl_flag || fg_flag || fe_flag = 101 */ /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023 * Let FRA = value #0 above and FRB be value #3 above * e_a = -2; e_b = 1023 * fl_flag || fg_flag || fe_flag = 101 */ /* fe_flag <- 1 if FRA != 0 && e_a <= -970 * Let FRA = value #4 and FRB be value #0 * e_a = -1005; e_b = -2 * fl_flag || fg_flag || fe_flag = 101 */ // #4 s = 0; _exp = 0x012; mant = 0x3214569900000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /************************************************* * fg_flag tests * *************************************************/ /* fg_flag <- 1 if FRA is an Infinity * NOTE: FRA = Inf also sets fe_flag * Do two tests, using values #7 and #8 (+/- Inf) for FRA. * Test 1: * Let FRA be value #7 and FRB be value #1 * e_a = 1024; e_b = 5 * fl_flag || fg_flag || fe_flag = 111 * * Test 2: * Let FRA be value #8 and FRB be value #1 * e_a = 1024; e_b = 5 * fl_flag || fg_flag || fe_flag = 111 * */ /* fg_flag <- 1 if FRB is an Infinity * NOTE: FRB = Inf also sets fe_flag * Let FRA be value #1 and FRB be value #7 * e_a = 5; e_b = 1024 * fl_flag || fg_flag || fe_flag = 111 */ /* fg_flag <- 1 if FRB is denormalized * NOTE: e_b < -1022 ==> fe_flag <- 1 * Let FRA be value #0 and FRB be value #13 * e_a = -2; e_b = -1023 * fl_flag || fg_flag || fe_flag = 111 */ /* fg_flag <- 1 if FRB is +zero * NOTE: FRA = Inf also sets fe_flag * Let FRA = val #5; FRB = val #5 * ea_ = -1023; e_b = -1023 * fl_flag || fg_flag || fe_flag = 111 */ /* fg_flag <- 1 if FRB is -zero * NOTE: FRA = Inf also sets fe_flag * Let FRA = val #5; FRB = val #6 * ea_ = -1023; e_b = -1023 * fl_flag || fg_flag || fe_flag = 111 */ /* Special values */ /* +0.0 : 0 0x000 0x0000000000000 */ // #5 s = 0; _exp = 0x000; mant = 0x0000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* -0.0 : 1 0x000 0x0000000000000 */ // #6 s = 1; _exp = 0x000; mant = 0x0000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* +infinity : 0 0x7FF 0x0000000000000 */ // #7 s = 0; _exp = 0x7FF; mant = 0x0000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* -infinity : 1 0x7FF 0x0000000000000 */ // #8 s = 1; _exp = 0x7FF; mant = 0x0000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ // #9 s = 0; _exp = 0x7FF; mant = 0x7FFFFFFFFFFFFULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ // #10 s = 1; _exp = 0x7FF; mant = 0x7FFFFFFFFFFFFULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* +QNaN : 0 0x7FF 0x8000000000000 */ // #11 s = 0; _exp = 0x7FF; mant = 0x8000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* -QNaN : 1 0x7FF 0x8000000000000 */ // #12 s = 1; _exp = 0x7FF; mant = 0x8000000000000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* denormalized value */ // #13 s = 1; _exp = 0x000; mant = 0x8340000078000ULL; register_farg(&spec_fargs[i++], s, _exp, mant); /* Negative finite number */ // #14 s = 1; _exp = 0x40d; mant = 0x0650f5a07b353ULL; register_farg(&spec_fargs[i++], s, _exp, mant); nb_special_fargs = i; } struct test_table { test_func_t test_category; char * name; }; struct p7_fp_test { test_func_t test_func; const char *name; int single; // 1=single precision result; 0=double precision result }; typedef enum { VX_FP_CMP, VX_FP_SMA, VX_FP_SMS, VX_FP_SNMA, VX_FP_OTHER } vx_fp_test_type; struct vx_fp_test { test_func_t test_func; const char *name; fp_test_args_t * targs; int num_tests; vx_fp_test_type test_type; }; struct xs_conv_test { test_func_t test_func; const char *name; int num_tests; }; typedef enum { VSX_LOAD =1, VSX_LOAD_SPLAT, VSX_STORE } vsx_ldst_type; struct ldst_test { test_func_t test_func; const char *name; void * base_addr; uint32_t offset; int num_words_to_process; vsx_ldst_type type; }; typedef enum { VSX_AND = 1, VSX_XOR, VSX_ANDC, VSX_OR, VSX_NOR } vsx_log_op; struct vsx_logic_test { test_func_t test_func; const char *name; vsx_log_op op; }; struct vsx_move_test { test_func_t test_func; const char *name; }; struct vsx_permute_test { test_func_t test_func; const char *name; unsigned int xa[4]; unsigned int xb[4]; }; static vector unsigned int vec_out, vec_inA, vec_inB; static void test_lxsdx(void) { __asm__ __volatile__ ("lxsdx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); } static void test_lxvd2x(void) { __asm__ __volatile__ ("lxvd2x %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); } static void test_lxvdsx(void) { __asm__ __volatile__ ("lxvdsx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); } static void test_lxvw4x(void) { __asm__ __volatile__ ("lxvw4x %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); } static void test_stxsdx(void) { __asm__ __volatile__ ("stxsdx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); } static void test_stxvd2x(void) { __asm__ __volatile__ ("stxvd2x %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); } static void test_stxvw4x(void) { __asm__ __volatile__ ("stxvw4x %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); } static void test_xxlxor(void) { __asm__ __volatile__ ("xxlxor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxlor(void) { __asm__ __volatile__ ("xxlor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxlnor(void) { __asm__ __volatile__ ("xxlnor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxland(void) { __asm__ __volatile__ ("xxland %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxlandc(void) { __asm__ __volatile__ ("xxlandc %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxmrghw(void) { __asm__ __volatile__ ("xxmrghw %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxmrglw(void) { __asm__ __volatile__ ("xxmrglw %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxpermdi_00(void) { __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxpermdi_01(void) { __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxpermdi_10(void) { __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxpermdi_11(void) { __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxsldwi_0(void) { __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxsldwi_1(void) { __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxsldwi_2(void) { __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xxsldwi_3(void) { __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_fcfids (void) { __asm__ __volatile__ ("fcfids %0, %1" : "=f" (f17): "d" (f14)); } static void test_fcfidus (void) { __asm__ __volatile__ ("fcfidus %0, %1" : "=f" (f17): "d" (f14)); } static void test_fcfidu (void) { __asm__ __volatile__ ("fcfidu %0, %1" : "=f" (f17): "d" (f14)); } static void test_xsabsdp (void) { __asm__ __volatile__ ("xsabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static void test_xscpsgndp (void) { __asm__ __volatile__ ("xscpsgndp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsnabsdp (void) { __asm__ __volatile__ ("xsnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static void test_xsnegdp (void) { __asm__ __volatile__ ("xsnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static int do_cmpudp; static void test_xscmp (void) { if (do_cmpudp) __asm__ __volatile__ ("xscmpudp cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB)); else __asm__ __volatile__ ("xscmpodp cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsadddp(void) { __asm__ __volatile__ ("xsadddp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsdivdp(void) { __asm__ __volatile__ ("xsdivdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static int do_adp; static void test_xsmadd(void) { if (do_adp) __asm__ __volatile__ ("xsmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); else __asm__ __volatile__ ("xsmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsmsub(void) { if (do_adp) __asm__ __volatile__ ("xsmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); else __asm__ __volatile__ ("xsmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsnmadd(void) { if (do_adp) __asm__ __volatile__ ("xsnmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); else __asm__ __volatile__ ("xsnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xsmuldp(void) { __asm__ __volatile__ ("xsmuldp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xssubdp(void) { __asm__ __volatile__ ("xssubdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); } static void test_xscvdpsxds (void) { __asm__ __volatile__ ("xscvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static void test_xscvsxddp (void) { __asm__ __volatile__ ("xscvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static void test_xscvuxddp (void) { __asm__ __volatile__ ("xscvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); } static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0, 0, 0, 0, 0 }; #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0]) #define NUM_VSTG_VECS (NUM_VSTG_INTS/4) static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567, 0x89abcdef, 0x00112233, 0x44556677, 0x8899aabb, 0x91929394, 0xa1a2a3a4, 0xb1b2b3b4, 0xc1c2c3c4, 0xd1d2d3d4, 0x7a6b5d3e }; #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0]) #define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4) static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD }, { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD }, { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD }, { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD }, { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT }, { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT }, { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD }, { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD }, { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE }, { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE }, { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE }, { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE }, { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE }, { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE }, { NULL, NULL, NULL, 0, 0, 0 } }; static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR }, { &test_xxlor, "xxlor", VSX_OR } , { &test_xxlnor, "xxlnor", VSX_NOR }, { &test_xxland, "xxland", VSX_AND }, { &test_xxlandc, "xxlandc", VSX_ANDC }, { NULL, NULL, 0}}; static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" }, { &test_xscpsgndp, "xscpsgndp" }, { &test_xsnabsdp, "xsnabsdp" }, { &test_xsnegdp, "xsnegdp" }, { NULL, NULL } }; static permute_test_t permute_tests[] = { { &test_xxmrghw, "xxmrghw", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxmrghw, "xxmrghw", { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */ { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */ }, { &test_xxmrglw, "xxmrglw", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxmrglw, "xxmrglw", { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */ { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */ }, { &test_xxpermdi_00, "xxpermdi DM=00", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxpermdi_01, "xxpermdi DM=01", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxpermdi_10, "xxpermdi DM=10", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxpermdi_11, "xxpermdi DM=11", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxsldwi_0, "xxsldwi SHW=0", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxsldwi_1, "xxsldwi SHW=1", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxsldwi_2, "xxsldwi SHW=2", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { &test_xxsldwi_3, "xxsldwi SHW=3", { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ }, { NULL, NULL } }; static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 }, { &test_fcfidus, "fcfidus", 1 }, { &test_fcfidu, "fcfidu", 1 }, { NULL, NULL, 0 }, }; static vx_fp_test_t vx_fp_tests[] = { { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP}, { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER}, { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER}, { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA}, { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS}, { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA}, { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER}, { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER}, { NULL, NULL, NULL, 0, 0 } }; static xs_conv_test_t xs_conv_tests[] = { { &test_xscvdpsxds, "xscvdpsxds", 15}, { &test_xscvsxddp, "xscvsxddp", 15}, { &test_xscvuxddp, "xscvuxddp", 15}, { NULL, NULL, 0} }; #ifdef __powerpc64__ static void test_ldbrx(void) { int i; HWord_t reg_out; unsigned char * byteIn, * byteOut; r14 = (HWord_t)viargs; // Just try the instruction an arbitrary number of times at different r15 offsets. for (i = 0; i < 3; i++) { int j, k; reg_out = 0; r15 = i * 4; __asm__ __volatile__ ("ldbrx %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15)); byteIn = ((unsigned char *)(r14 + r15)); byteOut = (unsigned char *)®_out; printf("ldbrx:"); for (k = 0; k < 8; k++) { printf( " %02x", (byteIn[k])); } printf(" (reverse) =>"); for (j = 0; j < 8; j++) { printf( " %02x", (byteOut[j])); } printf("\n"); } printf( "\n" ); } static void test_popcntd(void) { uint64_t res; unsigned long long src = 0x9182736405504536ULL; r14 = src; __asm__ __volatile__ ("popcntd %0, %1" : "=r" (res): "r" (r14)); printf("popcntd: 0x%llx => %d\n", src, (int)res); printf( "\n" ); } #endif static void test_lfiwzx(void) { unsigned int i; unsigned int * src; uint64_t reg_out; r14 = (HWord_t)viargs; // Just try the instruction an arbitrary number of times at different r15 offsets. for (i = 0; i < 3; i++) { reg_out = 0; r15 = i * 4; __asm__ __volatile__ ("lfiwzx %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15)); src = ((unsigned int *)(r14 + r15)); printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out); } printf( "\n" ); } static void test_vx_fp_ops(void) { test_func_t func; int k; char * test_name = (char *)malloc(20); k = 0; build_special_fargs_table(); while ((func = vx_fp_tests[k].test_func)) { int i, condreg, repeat = 0; unsigned int flags; unsigned long long * frap, * frbp, * dst; vx_fp_test_t test_group = vx_fp_tests[k]; vx_fp_test_type test_type = test_group.test_type; switch (test_type) { case VX_FP_CMP: strcpy(test_name, "xscmp"); if (!repeat) { repeat = 1; strcat(test_name, "udp"); do_cmpudp = 1; } break; case VX_FP_SMA: case VX_FP_SMS: case VX_FP_SNMA: if (test_type == VX_FP_SMA) strcpy(test_name, "xsmadd"); else if (test_type == VX_FP_SMS) strcpy(test_name, "xsmsub"); else strcpy(test_name, "xsnmadd"); if (!repeat) { repeat = 1; strcat(test_name, "adp"); do_adp = 1; } break; case VX_FP_OTHER: strcpy(test_name, test_group.name); break; default: printf("ERROR: Invalid VX FP test type %d\n", test_type); exit(1); } again: for (i = 0; i < test_group.num_tests; i++) { unsigned int * inA, * inB, * pv; double * dpA = (double *)&vec_inA; double * dpB = (double *)&vec_inB; double * dpT = (double *)&vec_out; fp_test_args_t aTest = test_group.targs[i]; inA = (unsigned int *)&spec_fargs[aTest.fra_idx]; inB = (unsigned int *)&spec_fargs[aTest.frb_idx]; frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; // Only need to copy one doubleword into each vector's element 0 if (isLE) { // With LE, vector element 0 is the second doubleword from the left memset(dpA, 0, 8); memset(dpB, 0, 8); dpA++; dpB++; } memcpy(dpA, inA, 8); memcpy(dpB, inB, 8); switch (test_type) { case VX_FP_CMP: SET_FPSCR_ZERO; SET_CR_XER_ZERO; (*func)(); GET_CR(flags); condreg = (flags & 0x0f000000) >> 24; printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg); // printf("\tFRA: %e; FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]); if ( condreg != aTest.cr_flags) { printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg); } break; case VX_FP_SMA: case VX_FP_SMS: case VX_FP_SNMA: case VX_FP_OTHER: { int idx; unsigned long long vsr_XT; pv = (unsigned int *)&vec_out; // clear vec_out for (idx = 0; idx < 4; idx++, pv++) *pv = 0; if (test_type != VX_FP_OTHER) { /* Then we need a third src argument, which is stored in element 0 of * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test * data (input args) contain only two inputs, so I arbitrarily * use spec_fargs elements 4 and 14 (alternating) for the third source * argument. We can use the same input data for a given pair of * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus * the expected result should be the same. */ int extra_arg_idx; if (i % 2) extra_arg_idx = 4; else extra_arg_idx = 14; if (repeat) { /* We're on the first time through of one of the VX_FP_SMx * test types, meaning we're testing a xs<ZZZ>adp case, thus we * have to swap inputs as described above: * src2 <= VSX[XT] * src3 <= VSX[XB] */ if (isLE) dpT++; memcpy(dpT, inB, 8); // src2 memcpy(dpB, &spec_fargs[extra_arg_idx], 8); //src3 frbp = (unsigned long long *)&spec_fargs[extra_arg_idx]; } else { // Don't need to init src2, as it's done before the switch() if (isLE) dpT++; memcpy(dpT, &spec_fargs[extra_arg_idx], 8); //src3 } memcpy(&vsr_XT, dpT, 8); } (*func)(); dst = (unsigned long long *) &vec_out; if (isLE) dst++; if (test_type == VX_FP_OTHER) printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst); else printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i, test_name, vsr_XT, *frap, *frbp, *dst ); /* { // Debug code. Keep this block commented out except when debugging. double result, expected; memcpy(&result, dst, 8); memcpy(&expected, &aTest.dp_bin_result, 8); printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx], expected, result ); } */ break; } } } printf( "\n" ); if (repeat) { repeat = 0; switch (test_type) { case VX_FP_CMP: strcpy(test_name, "xscmp"); strcat(test_name, "odp"); do_cmpudp = 0; break; case VX_FP_SMA: case VX_FP_SMS: case VX_FP_SNMA: if (test_type == VX_FP_SMA) strcpy(test_name, "xsmadd"); else if (test_type == VX_FP_SMS) strcpy(test_name, "xsmsub"); else strcpy(test_name, "xsnmadd"); strcat(test_name, "mdp"); do_adp = 0; break; case VX_FP_OTHER: break; } goto again; } k++; } printf( "\n" ); free(test_name); } static void test_xs_conv_ops(void) { test_func_t func; int k = 0; double * dpB = (double *)&vec_inB; if (isLE) { memset(dpB, 0, 8); dpB++; } build_special_fargs_table(); while ((func = xs_conv_tests[k].test_func)) { int i; unsigned long long * frbp, * dst; xs_conv_test_t test_group = xs_conv_tests[k]; for (i = 0; i < test_group.num_tests; i++) { unsigned int * inB, * pv; int idx; inB = (unsigned int *)&spec_fargs[i]; frbp = (unsigned long long *)&spec_fargs[i]; memcpy(dpB, inB, 8); pv = (unsigned int *)&vec_out; // clear vec_out for (idx = 0; idx < 4; idx++, pv++) *pv = 0; (*func)(); dst = (unsigned long long *) &vec_out; if (isLE) dst++; printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst); } k++; printf("\n"); } printf( "\n" ); } static void do_load_test(ldst_test_t loadTest) { test_func_t func; unsigned int *src, *dst; int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0; int i, j, m, k; i = j = 0; func = loadTest.test_func; for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) { int again; j = 0; r14 += i * 16; do { unsigned int * pv = (unsigned int *)&vec_out; int idx; // clear vec_out for (idx = 0; idx < 4; idx++, pv+=idx) *pv = 0; again = 0; r15 = j; // execute test insn (*func)(); src = (unsigned int*) (((unsigned char *)r14) + j); dst = (unsigned int*) &vec_out; printf( "%s:", loadTest.name); for (m = 0; m < loadTest.num_words_to_process; m++) { printf( " %08x", src[splat ? m % 2 : m]); } printf( " =>"); m = 0; k = loadTest.num_words_to_process; if (isLE) { if (loadTest.num_words_to_process == 2) { m = 2; k += 2; } } for (; m < k; m++) { printf( " %08x", dst[m]); } printf("\n"); if (j == 0 && loadTest.offset) { again = 1; j += loadTest.offset; } } while (again); } } static void do_store_test ( ldst_test_t storeTest ) { test_func_t func; unsigned int *src, *dst; int m; func = storeTest.test_func; r14 = (HWord_t) storeTest.base_addr; r15 = (HWord_t) storeTest.offset; unsigned int * pv = (unsigned int *) storeTest.base_addr; int idx; // clear out storage destination for (idx = 0; idx < 4; idx++, pv += idx) *pv = 0; memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char)); // execute test insn (*func)(); src = &viargs[0]; dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset); printf( "%s:", storeTest.name ); for (m = 0; m < storeTest.num_words_to_process; m++) { printf( " %08x", src[m] ); } printf( " =>" ); for (m = 0; m < storeTest.num_words_to_process; m++) { printf( " %08x", dst[m] ); } printf( "\n" ); } static void test_ldst(void) { int k = 0; while (ldst_tests[k].test_func) { if (ldst_tests[k].type == VSX_STORE) do_store_test(ldst_tests[k]); else do_load_test(ldst_tests[k]); k++; printf("\n"); } } static void test_ftdiv(void) { int i, num_tests, crx; unsigned int flags; unsigned long long * frap, * frbp; build_special_fargs_table(); num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0]; for (i = 0; i < num_tests; i++) { fp_test_args_t aTest = ftdiv_tests[i]; f14 = spec_fargs[aTest.fra_idx]; f15 = spec_fargs[aTest.frb_idx]; frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; SET_FPSCR_ZERO; SET_CR_XER_ZERO; __asm__ __volatile__ ("ftdiv cr1, %0, %1" : : "d" (f14), "d" (f15)); GET_CR(flags); crx = (flags & 0x0f000000) >> 24; printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx); // printf("\tFRA: %e; FRB: %e\n", f14, f15); if ( crx != aTest.cr_flags) { printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx); } } printf( "\n" ); } static void test_p7_fpops ( void ) { int k = 0; test_func_t func; build_fargs_table(); while ((func = fp_tests[k].test_func)) { float res; double resd; unsigned long long u0; int i; int res32 = strcmp(fp_tests[k].name, "fcfidu"); for (i = 0; i < nb_fargs; i++) { u0 = *(unsigned long long *) (&fargs[i]); f14 = fargs[i]; (*func)(); if (res32) { res = f17; printf( "%s %016llx => (raw sp) %08x)", fp_tests[k].name, u0, *((unsigned int *)&res)); } else { resd = f17; printf( "%s %016llx => (raw sp) %016llx)", fp_tests[k].name, u0, *(unsigned long long *)(&resd)); } printf( "\n" ); } k++; printf( "\n" ); } } static void test_vsx_logic(void) { logic_test_t aTest; test_func_t func; int k; k = 0; while ((func = logic_tests[k].test_func)) { unsigned int * pv; int startA, startB; unsigned int * inA, * inB, * dst; int idx, i; startA = 0; aTest = logic_tests[k]; for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) { startB = startA + 4; pv = (unsigned int *)&vec_out; inA = &viargs[startA]; inB = &viargs[startB]; memcpy(&vec_inA, inA, sizeof(vector unsigned char)); memcpy(&vec_inB, inB, sizeof(vector unsigned char)); // clear vec_out for (idx = 0; idx < 4; idx++, pv++) *pv = 0; // execute test insn (*func)(); dst = (unsigned int*) &vec_out; printf( "%s:", aTest.name); printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name); printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]); printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]); } k++; } printf( "\n" ); } static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) = { { 0x0123456789abcdefULL, 0x0011223344556677ULL}, { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL}, { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL} }; #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0]) static void test_move_ops (void) { move_test_t aTest; test_func_t func; int k; k = 0; while ((func = move_tests[k].test_func)) { unsigned int * pv; int startA, startB; unsigned long long * inA, * inB, * dst; int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0); int idx; inA = NULL; aTest = move_tests[k]; for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) { inB = (unsigned long long *)&vec_args[startB]; memcpy(&vec_inB, inB, sizeof(vector unsigned char)); if (isLE) inB++; startA = 0; repeat: if (use_vecA) { inA = (unsigned long long *)&vec_args[startA]; memcpy(&vec_inA, inA, sizeof(vector unsigned char)); startA++; } pv = (unsigned int *)&vec_out; // clear vec_out for (idx = 0; idx < 4; idx++, pv++) *pv = 0; // execute test insn (*func)(); dst = (unsigned long long *) &vec_out; if (isLE) { dst++; inA++; } printf( "%s:", aTest.name); if (use_vecA) printf( " X[A]: %016llx ", *inA); printf( " X[B]: %016llx", *inB); printf(" => %016llx\n", *dst); if (use_vecA && startA < NUM_VEC_ARGS_LONGS) goto repeat; } k++; printf( "\n" ); } } static void test_permute_ops (void) { permute_test_t *aTest; unsigned int *dst = (unsigned int *) &vec_out; for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++) { /* Grab test input and clear output vector. */ memcpy(&vec_inA, aTest->xa, sizeof(vec_inA)); memcpy(&vec_inB, aTest->xb, sizeof(vec_inB)); memset(dst, 0, sizeof(vec_out)); /* execute test insn */ aTest->test_func(); printf( "%s:\n", aTest->name); printf( " XA[%08x,%08x,%08x,%08x]\n", aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]); printf( " XB[%08x,%08x,%08x,%08x]\n", aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]); printf( " => XT[%08x,%08x,%08x,%08x]\n", dst[0], dst[1], dst[2], dst[3]); } printf( "\n" ); } static test_table_t all_tests[] = { { &test_ldst, "Test VSX load/store instructions" }, { &test_vsx_logic, "Test VSX logic instructions" }, #ifdef __powerpc64__ { &test_ldbrx, "Test ldbrx instruction" }, { &test_popcntd, "Test popcntd instruction" }, #endif { &test_lfiwzx, "Test lfiwzx instruction" }, { &test_p7_fpops, "Test P7 floating point convert instructions"}, { &test_ftdiv, "Test ftdiv instruction" }, { &test_move_ops, "Test VSX move instructions"}, { &test_permute_ops, "Test VSX permute instructions"}, { &test_vx_fp_ops, "Test VSX floating point instructions"}, { &test_xs_conv_ops, "Test VSX scalar integer conversion instructions" }, { NULL, NULL } }; #endif // HAS_VSX int main(int argc, char *argv[]) { #ifdef HAS_VSX test_table_t aTest; test_func_t func; int i = 0; while ((func = all_tests[i].test_category)) { aTest = all_tests[i]; printf( "%s\n", aTest.name ); (*func)(); i++; } #endif // HAS _VSX return 0; }