C++程序  |  299行  |  12.54 KB

#include <stdio.h>

#define N 256

unsigned long long reg_val_double[N];

void init_reg_val_double()
{
   unsigned long c = 19650218UL;
   int i;
   reg_val_double[0]= c & 0xffffffffUL;
   for (i = 1; i < N; i++) {
      reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^
                          (reg_val_double[i - 1] >> 30)) + i);
   }
}


/* Make a copy of original array to prevent the unexpected changes by Atomic Add
   Instructions */
unsigned long long reg_val_double_copy[N]; 

void copy_reg_val_double()
{
   int i;
   for (i = 0; i < N; i++) {
      reg_val_double_copy[i] = reg_val_double[i];
   }
}

/* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear
   instructions. After executing each instruction we must check both memory
   location and register value.

   1: Move arguments (offset and base address) to registers 
   2: Add offset and base address to make absolute address
   3: Execute instruction
   4: Move result from register ($t3)
   5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses)
*/
#define TEST1_32(instruction, offset,mem)                    \
{                                                            \
   unsigned long out = 0;                                    \
   unsigned long res_mem = 0;                                \
   __asm__ volatile(                                         \
     "move         $t0, %2"        "\n\t"                    \
     "move         $t1, %3"        "\n\t"                    \
     "daddu        $t0, $t1, $t0"  "\n\t"                    \
     instruction " $t3, ($t0)"     "\n\t"                    \
     "move         %0,  $t3"       "\n\t"                    \
     "lw           %1,  0($t0)"    "\n\t"                    \
     : "=&r" (out), "=&r"(res_mem)                           \
     : "r" (mem) , "r" (offset)                              \
     : "$12", "$13", "cc", "memory"                          \
     );                                                      \
   printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
          instruction, offset, out, res_mem);                \
}

#define TEST1_64(instruction, offset,mem)                     \
{                                                             \
   unsigned long out = 0;                                     \
   unsigned long res_mem = 0;                                 \
   __asm__ volatile(                                          \
     "move         $t0, %2"        "\n\t"                     \
     "move         $t1, %3"        "\n\t"                     \
     "daddu        $t0, $t1, $t0"  "\n\t"                     \
     instruction " $t3, ($t0)"     "\n\t"                     \
     "move         %0,  $t3"       "\n\t"                     \
     "ld           %1,  0($t0)"    "\n\t"                     \
     : "=&r" (out), "=&r"(res_mem)                            \
     : "r" (mem) , "r" (offset)                               \
     : "$12", "$13", "cc", "memory"                           \
     );                                                       \
   printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
          instruction, offset, out, res_mem);                 \
}

/* Test 2 macro is used for pop/dpop/baddu instructions. After executing each
   instructions the macro performs following operations:

   1: Move arguments to registers
   2: Execute instruction
   3: Move result to register ($t3)
*/
#define TEST2(instruction, RSVal, RTVal)                            \
{                                                                   \
   unsigned long out;                                               \
   __asm__ volatile(                                                \
      "move $t1, %1"  "\n\t"                                        \
      "move $t2, %2"  "\n\t"                                        \
      instruction     "\n\t"                                        \
      "move %0, $t3"  "\n\t"                                        \
      : "=&r" (out)                                                 \
      : "r" (RSVal), "r" (RTVal)                                    \
      : "$12", "$13", "cc", "memory"                                \
        );                                                          \
   printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n",                 \
          instruction, out, (long long) RSVal, (long long) RTVal);  \
}

/* TEST3 macro is used for store atomic add and store atomic add doubleword 
   instructions. Following operations are performed by the test macro:

   1: Move arguments to the register
   2: Add offset and base address to make absolute address 
   3: Execute instruction
   4: Load memory data
*/
#define TEST3(instruction, offset, mem, value)                   \
{                                                                \
    unsigned long out = 0;                                       \
    unsigned long outPre = 0;                                    \
   __asm__ volatile(                                             \
     "move         $t0, %2"        "\n\t"                        \
     "move         $t1, %3"        "\n\t"                        \
     "daddu        $t0, $t1, $t0"  "\n\t"                        \
     "ld           %1,  0($t0)"    "\n\t"                        \
     "move         $t2, %4"        "\n\t"                        \
     instruction " $t2, ($t0)"     "\n\t"                        \
     "ld           %0,  0($t0)"    "\n\t"                        \
     : "=&r" (out), "=&r" (outPre)                               \
     : "r" (mem) , "r" (offset), "r" (value)                     \
     : "$12", "$13", "$14", "cc", "memory"                       \
     );                                                          \
     printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n",  \
            instruction, value, outPre, out);                    \
}

/* TEST4_32/64 is used for load atomic add/swap instructions. Following
   operations are performed by macro after execution of each instruction:

   1: Move arguments to register.
   2: Add offset and base address to make absolute address.
   3: Execute instruction.
   4: Move result to register.
   5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit).
*/
#define TEST4_32(instruction, offset, mem)                   \
{                                                            \
    unsigned long out = 0;                                   \
    unsigned long res_mem = 0;                               \
   __asm__ volatile(                                         \
      "move         $t0, %2"          "\n\t"                 \
      "move         $t1, %3"          "\n\t"                 \
      "daddu        $t0, $t0, $t1"    "\n\t"                 \
      instruction " $t3, ($t0), $t1"  "\n\t"                 \
      "move         %0,  $t3"         "\n\t"                 \
      "lw           %1,  0($t0)"      "\n\t"                 \
      : "=&r" (out), "=&r"(res_mem)                          \
      : "r" (mem) , "r" (offset)                             \
      : "$12", "$13", "cc", "memory"                         \
     );                                                      \
   printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
          instruction, offset, out, res_mem);                \
}

#define TEST4_64(instruction, offset, mem)                    \
{                                                             \
    unsigned long out = 0;                                    \
    unsigned long res_mem = 0;                                \
   __asm__ volatile(                                          \
      "move         $t0, %2"          "\n\t"                  \
      "move         $t1, %3"          "\n\t"                  \
      "daddu        $t0, $t0,   $t1"  "\n\t"                  \
      instruction " $t3, ($t0), $t1"  "\n\t"                  \
      "move         %0,  $t3"         "\n\t"                  \
      "ld           %1,  0($t0)"      "\n\t"                  \
     : "=&r" (out), "=&r"(res_mem)                            \
     : "r" (mem) , "r" (offset)                               \
     : "$12", "$13", "cc", "memory"                           \
     );                                                       \
   printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
          instruction, offset, out, res_mem);                 \
}

typedef enum {
   BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD,
   LAS, LASD, LAC, LACD
} cvm_op;

int main()
{
#if (_MIPS_ARCH_OCTEON2)
   init_reg_val_double();
   int i,j;
   cvm_op op;
   for (op = BADDU; op <= LACD; op++) {
      switch(op){
         /* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */
         case BADDU: {
            for(i = 4; i < N; i += 4)
               for(j = 4; j < N; j += 4)
                  TEST2("baddu $t3, $t1, $t2", reg_val_double[i],
                                               reg_val_double[j]);
            break;
         }
         case POP: {  /* Count Ones in a Word - POP */
            for(j = 4; j < N; j += 4)
               TEST2("pop $t3, $t1", reg_val_double[j], 0);
            break;
         }
         case DPOP: {  /* Count Ones in a Doubleword - DPOP */
            for(j = 8; j < N; j += 8)
               TEST2("dpop $t3, $t1", reg_val_double[j], 0);
            break;
         }
         case SAA: {  /* Atomic Add Word - saa rt, (base). */
            copy_reg_val_double();
            for(j = 4; j < N; j += 4)
               TEST3("saa", j, reg_val_double_copy, reg_val_double[j]);
            break;
         }
         case SAAD: {  /* Atomic Add Double - saad rt, (base). */
            copy_reg_val_double();
            for(j = 8; j < N; j += 8)
               TEST3("saad", j, reg_val_double_copy, reg_val_double[j]);
            break;
         }
         case LAA: {  /* Load Atomic Add Word - laa rd, (base), rt. */
            copy_reg_val_double();
            for(j = 4; j < N; j += 4)
               TEST4_32("laa", j, reg_val_double_copy);
            break;
         }
         case LAAD: {  /* Load Atomic Add Double - laad rd, (base), rt */
            copy_reg_val_double();
            for(j = 8; j < N; j += 8)
               TEST4_64("laad ", j, reg_val_double_copy);
            break;
         }
         case LAW: {  /* Load Atomic Swap Word - law rd, (base), rt */
            copy_reg_val_double();
            for(j = 4; j < N; j += 4)
               TEST4_32("law", j, reg_val_double_copy);
            break;
         }
         case LAWD: {  /* Load Atomic Swap Double - lawd rd, (base), rt */
            copy_reg_val_double();
            for(j = 8; j < N; j += 8)
               TEST4_64("lawd", j, reg_val_double_copy);
            break;
         }
         case LAI: {  /* Load Atomic Increment Word - lai rd, (base) */
            copy_reg_val_double();
            for(i = 4; i < N; i += 4)
               TEST1_32("lai", i, reg_val_double_copy);
            break;
         }
         case LAID: {  /* Load Atomic Increment Double - laid rd, (base) */
            copy_reg_val_double();
            for(i = 8; i < N; i += 8)
              TEST1_64("laid ", i, reg_val_double_copy);
            break;
         }
         case LAD: {  /* Load Atomic Decrement Word - lad rd, (base) */
            copy_reg_val_double();
            for(i = 4; i < N; i += 4)
               TEST1_32("lad", i, reg_val_double_copy);
            break;
         }
         case LADD: {  /* Load Atomic Decrement Double - ladd rd, (base) */
            copy_reg_val_double();
            for(i = 8; i < N; i += 8)
               TEST1_64("ladd",i, reg_val_double_copy);
            break;
         }
         case LAS:{   /* Load Atomic Set Word - las rd, (base) */
            copy_reg_val_double();
            for(i = 4; i < N; i += 4)
               TEST1_32("las",i, reg_val_double_copy);
            break;
         }
         case LASD:{  /* Load Atomic Set Word - lasd rd, (base) */
            copy_reg_val_double();
            for(i = 8; i < N; i += 8)
               TEST1_64("lasd",i, reg_val_double_copy);
            break;
         }
         case LAC: {  /* Load Atomic Clear Word - lac rd, (base) */
            copy_reg_val_double();
            for(i = 4; i < N; i += 4)
               TEST1_32("lac",i, reg_val_double_copy);
            break;
         }
         case LACD: {  /* Load Atomic Clear Double - lacd rd, (base) */
            copy_reg_val_double();
            for(i = 8; i < N; i += 8)
               TEST1_64("lacd",i, reg_val_double_copy);
            break;
         }
         default:
            printf("Nothing to be executed \n");
      }
   }
#endif
   return 0;
}