#include <stdio.h>
#define N 256
unsigned long long reg_val_double[N];
void init_reg_val_double()
{
unsigned long c = 19650218UL;
int i;
reg_val_double[0]= c & 0xffffffffUL;
for (i = 1; i < N; i++) {
reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^
(reg_val_double[i - 1] >> 30)) + i);
}
}
/* Make a copy of original array to prevent the unexpected changes by Atomic Add
Instructions */
unsigned long long reg_val_double_copy[N];
void copy_reg_val_double()
{
int i;
for (i = 0; i < N; i++) {
reg_val_double_copy[i] = reg_val_double[i];
}
}
/* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear
instructions. After executing each instruction we must check both memory
location and register value.
1: Move arguments (offset and base address) to registers
2: Add offset and base address to make absolute address
3: Execute instruction
4: Move result from register ($t3)
5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses)
*/
#define TEST1_32(instruction, offset,mem) \
{ \
unsigned long out = 0; \
unsigned long res_mem = 0; \
__asm__ volatile( \
"move $t0, %2" "\n\t" \
"move $t1, %3" "\n\t" \
"daddu $t0, $t1, $t0" "\n\t" \
instruction " $t3, ($t0)" "\n\t" \
"move %0, $t3" "\n\t" \
"lw %1, 0($t0)" "\n\t" \
: "=&r" (out), "=&r"(res_mem) \
: "r" (mem) , "r" (offset) \
: "$12", "$13", "cc", "memory" \
); \
printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n", \
instruction, offset, out, res_mem); \
}
#define TEST1_64(instruction, offset,mem) \
{ \
unsigned long out = 0; \
unsigned long res_mem = 0; \
__asm__ volatile( \
"move $t0, %2" "\n\t" \
"move $t1, %3" "\n\t" \
"daddu $t0, $t1, $t0" "\n\t" \
instruction " $t3, ($t0)" "\n\t" \
"move %0, $t3" "\n\t" \
"ld %1, 0($t0)" "\n\t" \
: "=&r" (out), "=&r"(res_mem) \
: "r" (mem) , "r" (offset) \
: "$12", "$13", "cc", "memory" \
); \
printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n", \
instruction, offset, out, res_mem); \
}
/* Test 2 macro is used for pop/dpop/baddu instructions. After executing each
instructions the macro performs following operations:
1: Move arguments to registers
2: Execute instruction
3: Move result to register ($t3)
*/
#define TEST2(instruction, RSVal, RTVal) \
{ \
unsigned long out; \
__asm__ volatile( \
"move $t1, %1" "\n\t" \
"move $t2, %2" "\n\t" \
instruction "\n\t" \
"move %0, $t3" "\n\t" \
: "=&r" (out) \
: "r" (RSVal), "r" (RTVal) \
: "$12", "$13", "cc", "memory" \
); \
printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n", \
instruction, out, (long long) RSVal, (long long) RTVal); \
}
/* TEST3 macro is used for store atomic add and store atomic add doubleword
instructions. Following operations are performed by the test macro:
1: Move arguments to the register
2: Add offset and base address to make absolute address
3: Execute instruction
4: Load memory data
*/
#define TEST3(instruction, offset, mem, value) \
{ \
unsigned long out = 0; \
unsigned long outPre = 0; \
__asm__ volatile( \
"move $t0, %2" "\n\t" \
"move $t1, %3" "\n\t" \
"daddu $t0, $t1, $t0" "\n\t" \
"ld %1, 0($t0)" "\n\t" \
"move $t2, %4" "\n\t" \
instruction " $t2, ($t0)" "\n\t" \
"ld %0, 0($t0)" "\n\t" \
: "=&r" (out), "=&r" (outPre) \
: "r" (mem) , "r" (offset), "r" (value) \
: "$12", "$13", "$14", "cc", "memory" \
); \
printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n", \
instruction, value, outPre, out); \
}
/* TEST4_32/64 is used for load atomic add/swap instructions. Following
operations are performed by macro after execution of each instruction:
1: Move arguments to register.
2: Add offset and base address to make absolute address.
3: Execute instruction.
4: Move result to register.
5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit).
*/
#define TEST4_32(instruction, offset, mem) \
{ \
unsigned long out = 0; \
unsigned long res_mem = 0; \
__asm__ volatile( \
"move $t0, %2" "\n\t" \
"move $t1, %3" "\n\t" \
"daddu $t0, $t0, $t1" "\n\t" \
instruction " $t3, ($t0), $t1" "\n\t" \
"move %0, $t3" "\n\t" \
"lw %1, 0($t0)" "\n\t" \
: "=&r" (out), "=&r"(res_mem) \
: "r" (mem) , "r" (offset) \
: "$12", "$13", "cc", "memory" \
); \
printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n", \
instruction, offset, out, res_mem); \
}
#define TEST4_64(instruction, offset, mem) \
{ \
unsigned long out = 0; \
unsigned long res_mem = 0; \
__asm__ volatile( \
"move $t0, %2" "\n\t" \
"move $t1, %3" "\n\t" \
"daddu $t0, $t0, $t1" "\n\t" \
instruction " $t3, ($t0), $t1" "\n\t" \
"move %0, $t3" "\n\t" \
"ld %1, 0($t0)" "\n\t" \
: "=&r" (out), "=&r"(res_mem) \
: "r" (mem) , "r" (offset) \
: "$12", "$13", "cc", "memory" \
); \
printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n", \
instruction, offset, out, res_mem); \
}
typedef enum {
BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD,
LAS, LASD, LAC, LACD
} cvm_op;
int main()
{
#if (_MIPS_ARCH_OCTEON2)
init_reg_val_double();
int i,j;
cvm_op op;
for (op = BADDU; op <= LACD; op++) {
switch(op){
/* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */
case BADDU: {
for(i = 4; i < N; i += 4)
for(j = 4; j < N; j += 4)
TEST2("baddu $t3, $t1, $t2", reg_val_double[i],
reg_val_double[j]);
break;
}
case POP: { /* Count Ones in a Word - POP */
for(j = 4; j < N; j += 4)
TEST2("pop $t3, $t1", reg_val_double[j], 0);
break;
}
case DPOP: { /* Count Ones in a Doubleword - DPOP */
for(j = 8; j < N; j += 8)
TEST2("dpop $t3, $t1", reg_val_double[j], 0);
break;
}
case SAA: { /* Atomic Add Word - saa rt, (base). */
copy_reg_val_double();
for(j = 4; j < N; j += 4)
TEST3("saa", j, reg_val_double_copy, reg_val_double[j]);
break;
}
case SAAD: { /* Atomic Add Double - saad rt, (base). */
copy_reg_val_double();
for(j = 8; j < N; j += 8)
TEST3("saad", j, reg_val_double_copy, reg_val_double[j]);
break;
}
case LAA: { /* Load Atomic Add Word - laa rd, (base), rt. */
copy_reg_val_double();
for(j = 4; j < N; j += 4)
TEST4_32("laa", j, reg_val_double_copy);
break;
}
case LAAD: { /* Load Atomic Add Double - laad rd, (base), rt */
copy_reg_val_double();
for(j = 8; j < N; j += 8)
TEST4_64("laad ", j, reg_val_double_copy);
break;
}
case LAW: { /* Load Atomic Swap Word - law rd, (base), rt */
copy_reg_val_double();
for(j = 4; j < N; j += 4)
TEST4_32("law", j, reg_val_double_copy);
break;
}
case LAWD: { /* Load Atomic Swap Double - lawd rd, (base), rt */
copy_reg_val_double();
for(j = 8; j < N; j += 8)
TEST4_64("lawd", j, reg_val_double_copy);
break;
}
case LAI: { /* Load Atomic Increment Word - lai rd, (base) */
copy_reg_val_double();
for(i = 4; i < N; i += 4)
TEST1_32("lai", i, reg_val_double_copy);
break;
}
case LAID: { /* Load Atomic Increment Double - laid rd, (base) */
copy_reg_val_double();
for(i = 8; i < N; i += 8)
TEST1_64("laid ", i, reg_val_double_copy);
break;
}
case LAD: { /* Load Atomic Decrement Word - lad rd, (base) */
copy_reg_val_double();
for(i = 4; i < N; i += 4)
TEST1_32("lad", i, reg_val_double_copy);
break;
}
case LADD: { /* Load Atomic Decrement Double - ladd rd, (base) */
copy_reg_val_double();
for(i = 8; i < N; i += 8)
TEST1_64("ladd",i, reg_val_double_copy);
break;
}
case LAS:{ /* Load Atomic Set Word - las rd, (base) */
copy_reg_val_double();
for(i = 4; i < N; i += 4)
TEST1_32("las",i, reg_val_double_copy);
break;
}
case LASD:{ /* Load Atomic Set Word - lasd rd, (base) */
copy_reg_val_double();
for(i = 8; i < N; i += 8)
TEST1_64("lasd",i, reg_val_double_copy);
break;
}
case LAC: { /* Load Atomic Clear Word - lac rd, (base) */
copy_reg_val_double();
for(i = 4; i < N; i += 4)
TEST1_32("lac",i, reg_val_double_copy);
break;
}
case LACD: { /* Load Atomic Clear Double - lacd rd, (base) */
copy_reg_val_double();
for(i = 8; i < N; i += 8)
TEST1_64("lacd",i, reg_val_double_copy);
break;
}
default:
printf("Nothing to be executed \n");
}
}
#endif
return 0;
}