/* Copyright (C) 2012 IBM Author: Maynard Johnson <maynardj@us.ibm.com> Carl Love <carll@us.ibm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <elf.h> #include <link.h> #define PPC_FEATURE_HAS_VSX 0x00000080 /* Vector Scalar Extension. */ #if defined(HAS_DFP) register double f14 __asm__ ("fr14"); register double f15 __asm__ ("fr15"); register double f16 __asm__ ("fr16"); register double f17 __asm__ ("fr17"); register double f18 __asm__ ("fr18"); register double f19 __asm__ ("fr19"); typedef unsigned char Bool; #define True 1 #define False 0 #define SET_FPSCR_ZERO \ do { double _d = 0.0; \ __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ } while (0) #define GET_FPSCR(_arg) \ __asm__ __volatile__ ("mffs %0" : "=f"(_arg) ) #define SET_FPSCR_DRN \ __asm__ __volatile__ ("mtfsf 1, %0, 0, 1" : : "f"(f14) ) #define SH_0 0 #define SH_1 1 #define SH_2 15 #define SH_3 63 #define NUM_RND_MODES 8 #define CONDREG_MASK 0x0f000000 #define CONDREG_SHIFT 24 static char ** my_envp; static inline char** __auxv_find(void) { char **result = my_envp; /* Scan over the env vector looking for the ending NULL */ for (; *result != NULL; ++result) { } /* Bump the pointer one more step, which should be the auxv. */ return ++result; } static unsigned long fetch_at_hwcap(void) { static unsigned long auxv_hwcap = 0; int i; ElfW(auxv_t) * auxv_buf = NULL; if (auxv_hwcap) return auxv_hwcap; auxv_buf = (ElfW(auxv_t)*) __auxv_find(); for (i = 0; auxv_buf[i].a_type != AT_NULL; i++) if (auxv_buf[i].a_type == AT_HWCAP) { auxv_hwcap = auxv_buf[i].a_un.a_val; break; } return auxv_hwcap; } int get_vsx(void) { /* Check to see if the AUX vector has the bit set indicating the HW * supports the vsx instructions. This implies the processor is * at least a POWER 7. */ unsigned long hwcap; hwcap = fetch_at_hwcap(); if ((hwcap & PPC_FEATURE_HAS_VSX) == PPC_FEATURE_HAS_VSX) return 1; return 0; } /* The assembly-level instructions being tested */ static void _test_dscri (int shift) { switch(shift) { case SH_0: __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); break; case SH_1: __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); break; case SH_2: __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); break; case SH_3: __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); break; default: printf(" dscri, unsupported shift case %d\n", shift); } } static void _test_dscli (int shift) { switch(shift) { case SH_0: __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); break; case SH_1: __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); break; case SH_2: __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); break; case SH_3: __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); break; default: printf(" dscli, unsupported shift case %d\n", shift); } } static void _test_dctdp (void) { __asm__ __volatile__ ("dctdp %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_drsp (void) { __asm__ __volatile__ ("drsp %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_dctfix (void) { __asm__ __volatile__ ("dctfix %0, %1" : "=f" (f18) : "f" (f14)); } /* Power 7 and newer processors support this instruction */ static void _test_dcffix (void) { __asm__ __volatile__ ("dcffix %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_dscriq (int shift) { switch(shift) { case SH_0: __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); break; case SH_1: __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); break; case SH_2: __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); break; case SH_3: __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); break; default: printf(" dscriq, unsupported shift case %d\n", shift); } } static void _test_dscliq (int shift) { switch(shift) { case SH_0: __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); break; case SH_1: __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); break; case SH_2: __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); break; case SH_3: __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); break; default: printf(" dscliq, unsupported shift case %d\n", shift); } } static void _test_dctqpq (void) { __asm__ __volatile__ ("dctqpq %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_dctfixq (void) { __asm__ __volatile__ ("dctfixq %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_drdpq (void) { __asm__ __volatile__ ("drdpq %0, %1" : "=f" (f18) : "f" (f14)); } static void _test_dcffixq (void) { __asm__ __volatile__ ("dcffixq %0, %1" : "=f" (f18) : "f" (f14)); } typedef void (*test_func_t)(); typedef void (*test_func_main_t)(int); typedef void (*test_func_shift_t)(int); typedef struct test_table { test_func_main_t test_category; char * name; } test_table_t; static unsigned long long dfp128_vals[] = { // Some finite numbers 0x2207c00000000000ULL, 0x0000000000000e50ULL, 0x2f07c00000000000ULL, 0x000000000014c000ULL, //large number 0xa207c00000000000ULL, 0x00000000000000e0ULL, 0x2206c00000000000ULL, 0x00000000000000cfULL, 0xa205c00000000000ULL, 0x000000010a395bcfULL, 0x6209400000fd0000ULL, 0x00253f1f534acdd4ULL, // a small number 0x000400000089b000ULL, 0x0a6000d000000049ULL, // very small number // flavors of zero 0x2208000000000000ULL, 0x0000000000000000ULL, 0xa208000000000000ULL, 0x0000000000000000ULL, // negative 0xa248000000000000ULL, 0x0000000000000000ULL, // flavors of NAN 0x7c00000000000000ULL, 0x0000000000000000ULL, // quiet 0xfc00000000000000ULL, 0xc00100035b007700ULL, 0x7e00000000000000ULL, 0xfe000000d0e0a0d0ULL, // signaling // flavors of Infinity 0x7800000000000000ULL, 0x0000000000000000ULL, 0xf800000000000000ULL, 0x0000000000000000ULL, // negative 0xf900000000000000ULL, 0x0000000000000000ULL }; static unsigned long long int64_vals[] = { // I64 values 0x0ULL, // zero 0x1ULL, // one 0xffffffffffffffffULL, // minus one 0x2386f26fc0ffffULL, // 9999999999999999 0xffdc790d903f0001ULL, // -9999999999999999 0x462d53c8abac0ULL, // 1234567890124567 0xfffb9d2ac3754540ULL, // -1234567890124567 }; static unsigned long long dfp64_vals[] = { // various finite numbers 0x2234000000000e50ULL, 0x223400000014c000ULL, 0xa2340000000000e0ULL,// negative 0x22240000000000cfULL, 0xa21400010a395bcfULL,// negative 0x6e4d3f1f534acdd4ULL,// large number 0x000400000089b000ULL,// very small number // flavors of zero 0x2238000000000000ULL, 0xa238000000000000ULL, 0x4248000000000000ULL, // flavors of NAN 0x7e34000000000111ULL, 0xfe000000d0e0a0d0ULL,//signaling 0xfc00000000000000ULL,//quiet // flavors of Infinity 0x7800000000000000ULL, 0xf800000000000000ULL,//negative 0x7a34000000000000ULL, }; typedef struct dfp_test_args { int fra_idx; int frb_idx; } dfp_test_args_t; /* Index pairs from dfp64_vals or dfp128_vals array to be used with * dfp_two_arg_tests */ static dfp_test_args_t int64_args_x1[] = { /* {int64 input val, unused } */ {0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}, }; static dfp_test_args_t dfp_2args_x1[] = { /* {dfp_arg, shift_arg} */ {0, SH_0}, {0, SH_1}, {0, SH_2}, {0, SH_3}, {5, SH_0}, {5, SH_1}, {5, SH_2}, {5, SH_3}, {6, SH_0}, {6, SH_1}, {6, SH_2}, {6, SH_3}, {7, SH_0}, {7, SH_1}, {7, SH_2}, {7, SH_3}, {10, SH_0}, {10, SH_1}, {10, SH_2}, {10, SH_3}, {13, SH_0}, {13, SH_1}, {13, SH_2}, {13, SH_3}, }; /* Index pairs from dfp64_vals array to be used with dfp_one_arg_tests */ static dfp_test_args_t dfp_1args_x1[] = { /* {dfp_arg, unused} */ {0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}, {7, 0}, {8, 0}, {9, 0}, {10, 0}, {11, 0}, {12, 0}, {13, 0}, {14, 0}, }; typedef enum { LONG_TEST, QUAD_TEST } precision_type_t; typedef struct dfp_test { test_func_t test_func; const char * name; dfp_test_args_t * targs; int num_tests; precision_type_t precision; const char * op; Bool cr_supported; } dfp_test_t; /* The dcffix and dcffixq tests are a little different in that they both take * an I64 input. */ static dfp_test_t dfp_dcffix_dcffixq_tests[] = { { &_test_dcffixq,"dcffixq", int64_args_x1, 7, QUAD_TEST, "I64S->D128", True}, /* Power 7 instruction */ { &_test_dcffix, "dcffix", int64_args_x1, 7, LONG_TEST, "I64S->D64", True}, { NULL, NULL, NULL, 0, 0, NULL} }; static dfp_test_t dfp_one_arg_tests[] = { { &_test_dctdp, "dctdp", dfp_1args_x1, 15, LONG_TEST, "D32->D64", True}, { &_test_drsp, "drsp", dfp_1args_x1, 15, LONG_TEST, "D64->D32", True}, { &_test_dctfix, "dctfix", dfp_1args_x1, 15, LONG_TEST, "D64->I64S", True}, { &_test_dctqpq, "dctqpq", dfp_1args_x1, 15, QUAD_TEST, "D64->D128", True}, { &_test_dctfixq,"dctfixq", dfp_1args_x1, 15, QUAD_TEST, "D128->I64S", True}, { &_test_drdpq, "drdpq", dfp_1args_x1, 15, QUAD_TEST, "D128->D64", True}, { NULL, NULL, NULL, 0, 0, NULL} }; static dfp_test_t dfp_two_arg_tests[] = { { &_test_dscri, "dscri", dfp_2args_x1, 20, LONG_TEST, ">>", True}, { &_test_dscli, "dscli", dfp_2args_x1, 20, LONG_TEST, "<<", True}, { &_test_dscriq, "dscriq", dfp_2args_x1, 20, QUAD_TEST, ">>", True}, { &_test_dscliq, "dscliq", dfp_2args_x1, 20, QUAD_TEST, "<<", True}, { NULL, NULL, NULL, 0, 0, NULL} }; void set_rounding_mode(unsigned long long rnd_mode) { double fpscr; unsigned long long * hex_fpscr = (unsigned long long *)&fpscr; *hex_fpscr = 0ULL; __asm__ __volatile__ ("mffs %0" : "=f"(f14)); fpscr = f14; *hex_fpscr &= 0xFFFFFFF0FFFFFFFFULL; *hex_fpscr |= (rnd_mode << 32); f14 = fpscr; SET_FPSCR_DRN; } static void test_dfp_one_arg_ops(int unused) { test_func_t func; unsigned long long u0, u0x; double res, d0, *d0p; double d0x, *d0xp; unsigned long round_mode; int k = 0; u0x = 0; d0p = &d0; d0xp = &d0x; while ((func = dfp_one_arg_tests[k].test_func)) { int i; for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { /* Do each test with each of the possible rounding modes */ dfp_test_t test_group = dfp_one_arg_tests[k]; printf("\ntest with rounding mode %lu \n", round_mode); /* The set_rounding_mode() uses the global value f14. Call the * function before setting up the test for the specific instruction * to avoid avoid conflicts using f14. */ set_rounding_mode(round_mode); for (i = 0; i < test_group.num_tests; i++) { if (test_group.precision == LONG_TEST) { u0 = dfp64_vals[test_group.targs[i].fra_idx]; } else { u0 = dfp128_vals[test_group.targs[i].fra_idx * 2]; u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1]; } *(unsigned long long *)d0p = u0; f14 = d0; if (test_group.precision == QUAD_TEST) { *(unsigned long long *)d0xp = u0x; f15 = d0x; } (*func)(); res = f18; printf("%s %016llx", test_group.name, u0); if (test_group.precision == LONG_TEST) { printf(" %s => %016llx", test_group.op, *((unsigned long long *)(&res))); } else { double resx = f19; printf(" %016llx %s ==> %016llx %016llx", u0x, test_group.op, *((unsigned long long *)(&res)), *((unsigned long long *)(&resx))); } printf("\n"); } } k++; printf( "\n" ); } } static void test_dfp_two_arg_ops(int unused) /* Shift instructions: first argument is the DFP source, second argument * is 6 bit shift amount. */ { test_func_shift_t func; unsigned long long u0, u0x; unsigned int shift_by; double res, d0, *d0p; double d0x, *d0xp; unsigned long round_mode; int k = 0; u0x = 0; d0p = &d0; d0xp = &d0x; while ((func = dfp_two_arg_tests[k].test_func)) { int i; for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { /* Do each test with each of the possible rounding modes */ dfp_test_t test_group = dfp_two_arg_tests[k]; printf("\ntest with rounding mode %lu \n", round_mode); /* The set_rounding_mode() uses the global value f14. Call the * function before setting up the test for the specific instruction * to avoid avoid conflicts using f14. */ set_rounding_mode(round_mode); for (i = 0; i < test_group.num_tests; i++) { shift_by = test_group.targs[i].frb_idx; if (test_group.precision == LONG_TEST) { u0 = dfp64_vals[test_group.targs[i].fra_idx]; } else { u0 = dfp128_vals[test_group.targs[i].fra_idx * 2]; u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1]; } *(unsigned long long *)d0p = u0; f14 = d0; if (test_group.precision == QUAD_TEST) { *(unsigned long long *)d0xp = u0x; f15 = d0x; } (*func)(shift_by); res = f18; printf("%s %016llx", test_group.name, u0); if (test_group.precision) { printf(" %s %-3d => %016llx", test_group.op, shift_by, *((unsigned long long *)(&res))); } else { double resx = f19; printf(" %016llx %s %-3d ==> %016llx %016llx", u0x, test_group.op, shift_by, *((unsigned long long *)(&res)), *((unsigned long long *)(&resx))); } printf("\n" ); } } k++; printf( "\n" ); } } static void test_dcffix_dcffixq(int has_vsx) { test_func_t func; unsigned long long u0; double res, d0, *d0p; int k = 0, round_mode; d0p = &d0; while ((func = dfp_dcffix_dcffixq_tests[k].test_func)) { int i; if ((!has_vsx) && (!strcmp("dcffix", dfp_dcffix_dcffixq_tests[k].name))) { k++; /* The test instruction is dcffix it is supported on POWER 7 * and newer processors. Skip if not POWER 7 or newer. */ continue; } for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { /* Do each test with each of the possible rounding modes */ dfp_test_t test_group = dfp_dcffix_dcffixq_tests[k]; printf("\ntest with rounding mode %u \n", round_mode); /* The set_rounding_mode() uses the global value f14. Call the * function before setting up the test for the specific instruction * to avoid avoid conflicts using f14. */ set_rounding_mode(round_mode); for (i = 0; i < test_group.num_tests; i++) { /* The instructions take I64 inputs */ u0 = int64_vals[test_group.targs[i].fra_idx]; *(unsigned long long *)d0p = u0; f14 = d0; (*func)(); res = f18; printf("%s %016llx", test_group.name, u0); if (test_group.precision) { printf(" %s => %016llx", test_group.op, *((unsigned long long *)(&res))); } else { double resx = f19; printf(" %s ==> %016llx %016llx", test_group.op, *((unsigned long long *)(&res)), *((unsigned long long *)(&resx))); } printf("\n" ); } } k++; printf( "\n" ); } } static test_table_t all_tests[] = { { &test_dfp_one_arg_ops, "Test DFP fomat conversion instructions" }, { &test_dfp_two_arg_ops, "Test DFP shift instructions" }, { test_dcffix_dcffixq, "Test DCFFIX and DCFFIXQ instructions" }, { NULL, NULL } }; #endif // HAS_DFP int main(int argc, char ** argv, char ** envp) { #if defined(HAS_DFP) test_table_t aTest; test_func_t func; int i = 0, has_vsx; /* If the processor has the VSX functionality then it is POWER 7 * or newer. */ my_envp = envp; has_vsx = get_vsx(); while ((func = all_tests[i].test_category)) { aTest = all_tests[i]; printf( "%s\n", aTest.name ); (*func)(has_vsx); i++; } #endif // HAS_DFP return 0; }