/* This is an example of a program which does cavium atomic memory operations between two processes which share a page. This test is based on : memcheck/tests/atomic_incs.c */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> #include <unistd.h> #include <sys/wait.h> #include "tests/sys_mman.h" #define N 19 #define NNN 3456987 // Number of repetition. /* Expected values */ int p1_expd[N] = { 2156643710, 2156643710, 3456986, 6913974, 4288053322, 0, 4294967295, 6913974, 21777111, 3456986, 2153186724, 6913974, 21777111, 4294967295, 4288053323, // Test 14 4288053322, 4273190185, // Test 16 0, 0 }; // Test 18 long long int p2_expd[N] = { 12633614303292, 12633614303292, 3555751, 6913974, -6913974, 0, -1, 6913974, 23901514779351, 3456986, 11950752204196, 6913974, 23901514779351, -1, -6913973, // Test 15 -6913974, -23901514779351, // Test 17 0, 0 }; // Test 19 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) __attribute__((noinline)) void atomic_saa ( int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "saa $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_saad ( long long int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "saad $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_laa ( int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "laa $t3, ($t1), $t2" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_laad ( long long int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "laad $t3, ($t1), $t2" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2", "t3" ); #endif } __attribute__((noinline)) void atomic_law ( int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "law $t3, ($t1), $t2" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_lawd ( long long int* p, int n ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lawd $t3, ($t1), $t2" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2", "t3" ); #endif } __attribute__((noinline)) void atomic_lai ( int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lai $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_laid ( long long int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "laid $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_lad ( int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lad $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_ladd ( long long int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "ladd $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_lac ( int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lac $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_lacd ( long long int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lacd $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_las ( int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "las $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } __attribute__((noinline)) void atomic_lasd ( long long int* p ) { #if (_MIPS_ARCH_OCTEON2) unsigned long block[2] = { (unsigned long)p }; __asm__ __volatile__( "move $t0, %0" "\n\t" "ld $t1, 0($t0)" "\n\t" // p "ld $t2, 8($t0)" "\n\t" // n "lasd $t2, ($t1)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "memory", "t0", "t1", "t2" ); #endif } #define TRIOP_AND_SAA(instruction, base1, base2, n) \ { \ __asm__ __volatile__( \ instruction" $t0, (%0), %2" "\n\t" \ "saa $t0, (%1)" "\n\t" \ : /*out*/ \ : /*in*/ "r"(base1), "r"(base2), "r"(n) \ : /*trash*/ "memory", "t0" \ ); \ } #define TRIOP_AND_SAAD(instruction, base1, base2, n) \ { \ __asm__ __volatile__( \ instruction" $t0, (%0), %2" "\n\t" \ "saad $t0, (%1)" "\n\t" \ : /*out*/ \ : /*in*/ "r"(base1), "r"(base2), "r"(n) \ : /*trash*/ "memory", "t0" \ ); \ } #define BINOP_AND_SAA(instruction, base1, base2) \ { \ __asm__ __volatile__( \ instruction" $t0, (%0)" "\n\t" \ "saa $t0, (%1)" "\n\t" \ : /*out*/ \ : /*in*/ "r"(base1), "r"(base2) \ : /*trash*/ "memory", "t0" \ ); \ } #define BINOP_AND_SAAD(instruction, base1, base2) \ { \ __asm__ __volatile__( \ instruction" $t0, (%0)" "\n\t" \ "saad $t0, (%1)" "\n\t" \ : /*out*/ \ : /*in*/ "r"(base1), "r"(base2) \ : /*trash*/ "memory", "t0" \ ); \ } int main ( int argc, char** argv ) { #if (_MIPS_ARCH_OCTEON2) int i, status; char* page[N]; int* p1[N]; long long int* p2[N]; pid_t child, pc2; for (i = 0; i < N; i++) { page[i] = mmap( 0, sysconf(_SC_PAGESIZE), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); if (page[i] == MAP_FAILED) { perror("mmap failed"); exit(1); } p1[i] = (int*)(page[i] + 0); p2[i] = (long long int*)(page[i] + 256); assert( IS_8_ALIGNED(p1[i]) ); assert( IS_8_ALIGNED(p2[i]) ); memset(page[i], 0, 1024); memset(page[i], 0, 1024); *p1[i] = 0; *p2[i] = 0; } child = fork(); if (child == -1) { perror("fork() failed\n"); return 1; } if (child == 0) { /* --- CHILD --- */ for (i = 0; i < NNN; i++) { atomic_saa(p1[0], i); atomic_saad(p2[0], i + 98765 ); /* ensure we hit the upper 32 bits */ atomic_laa(p1[1], i); atomic_laad(p2[1], i + 98765 ); /* ensure we hit the upper 32 bits */ atomic_law(p1[2], i); atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */ atomic_lai(p1[3]); atomic_laid(p2[3]); atomic_lad(p1[4]); atomic_ladd(p2[4]); atomic_lac(p1[5]); atomic_lacd(p2[5]); atomic_las(p1[6]); atomic_lasd(p2[6]); TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) TRIOP_AND_SAA("law ", p1[9], p1[10], i) TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) BINOP_AND_SAA("lai ", p1[11], p1[12]) BINOP_AND_SAAD("laid ", p2[11], p2[12]) BINOP_AND_SAA("las ", p1[13], p1[14]) BINOP_AND_SAAD("lasd ", p2[13], p2[14]) BINOP_AND_SAA("lad ", p1[15], p1[16]) BINOP_AND_SAAD("ladd ", p2[15], p2[16]) BINOP_AND_SAA("lac ", p1[17], p1[18]) BINOP_AND_SAAD("lacd ", p2[17], p2[18]) } return 1; /* NOTREACHED */ } /* --- PARENT --- */ for (i = 0; i < NNN; i++) { atomic_saa(p1[0], i); atomic_saad(p2[0], i + 98765); /* ensure we hit the upper 32 bits */ atomic_laa(p1[1], i); atomic_laad(p2[1], i + 98765); /* ensure we hit the upper 32 bits */ atomic_law(p1[2], i); atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */ atomic_lai(p1[3]); atomic_laid(p2[3]); atomic_lad(p1[4]); atomic_ladd(p2[4]); atomic_lac(p1[5]); atomic_lacd(p2[5]); atomic_las(p1[6]); atomic_lasd(p2[6]); TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) TRIOP_AND_SAA("law ", p1[9], p1[10], i) TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) BINOP_AND_SAA("lai ", p1[11], p1[12]) BINOP_AND_SAAD("laid ", p2[11], p2[12]) BINOP_AND_SAA("las ", p1[13], p1[14]) BINOP_AND_SAAD("lasd ", p2[13], p2[14]) BINOP_AND_SAA("lad ", p1[15], p1[16]) BINOP_AND_SAAD("ladd ", p2[15], p2[16]) BINOP_AND_SAA("lac ", p1[17], p1[18]) BINOP_AND_SAAD("lacd ", p2[17], p2[18]) } pc2 = waitpid(child, &status, 0); assert(pc2 == child); /* assert that child finished normally */ assert(WIFEXITED(status)); printf("Store Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[0], *p2[0]); printf("Load Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[1], *p2[1]); printf("Load Atomic Swap: 32 bit %u, 64 bit %lld\n", *p1[2], *p2[2]); printf("Load Atomic Increment: 32 bit %u, 64 bit %lld\n", *p1[3], *p2[3]); printf("Load Atomic Decrement: 32 bit %u, 64 bit %lld\n", *p1[4], *p2[4]); printf("Load Atomic Clear: 32 bit %u, 64 bit %lld\n", *p1[5], *p2[5]); printf("Load Atomic Set: 32 bit %u, 64 bit %lld\n", *p1[6], *p2[6]); printf("laa and saa: base1: %u, base2: %u\n", *p1[7], *p1[8]); printf("laad and saad: base1: %lld, base2: %lld\n", *p2[7], *p2[8]); printf("law and saa: base1: %u, base2: %u\n", *p1[9], *p1[10]); printf("lawd and saad: base1: %lld, base2: %lld\n", *p2[9], *p2[10]); printf("lai and saa: base1: %u, base2: %u\n", *p1[11], *p1[12]); printf("laid and saad: base1: %lld, base2: %lld\n", *p2[11], *p2[12]); printf("las and saa: base1: %u, base2: %u\n", *p1[13], *p1[14]); printf("lasd and saad: base1: %lld, base2: %lld\n", *p2[13], *p2[14]); printf("lad and saa: base1: %u, base2: %u\n", *p1[15], *p1[16]); printf("ladd and saad: base1: %lld, base2: %lld\n", *p2[15], *p2[16]); printf("lac and saa: base1: %u, base2: %u\n", *p1[17], *p1[18]); printf("lacd and saad: base1: %lld, base2: %lld\n", *p2[17], *p2[18]); for (i = 0; i < N; i++) { if (p1_expd[i] == *p1[i] && p2_expd[i] == *p2[i]) { printf("PASS %d\n", i+1); } else { printf("FAIL %d -- see source code for expected values\n", i+1); } } printf("parent exits\n"); #endif return 0; }