C++程序  |  1144行  |  34.43 KB


#include <stdio.h>

typedef  unsigned long long int  ULong;
typedef  unsigned int            UInt;

__attribute__((noinline))
void do_andn64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "andn %2, %3, %0"         "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "andn %2, %3, %0"         "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_andn32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "andn %2, %3, %k0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "andn %2, %3, %k0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_mulx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
                 ULong arg1, ULong arg2 )
{
  ULong tem1, tem2, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "movabsq $0x5555555555555555, %1" "\n\t"
    "movq %4, %%rdx"          "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "popfq"                   "\n\t"
    "mulx %5, %1, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %3"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
    : "g" (arg1), "r" (arg2) : "cc", "rdx"
  );
  *res1 = tem1;
  *res2 = tem2;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "movabsq $0x5555555555555555, %1" "\n\t"
    "movq %4, %%rdx"          "\n\t"
    "pushfq"                  "\n\t"
    "popq %2"                 "\n\t"
    "mulx %5, %1, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "popq %3"                 "\n"
    : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
    : "g" (arg1), "m" (arg2) : "cc", "rdx"
  );
  if (*res1 != tem1 || *res2 != tem2)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_mulx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
                 UInt arg1, UInt arg2 )
{
  ULong tem1, tem2, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "movabsq $0x5555555555555555, %1" "\n\t"
    "movl %4, %%edx"          "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "popfq"                    "\n\t"
    "mulx %5, %k1, %k0"       "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %3"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
    : "g" (arg1), "r" (arg2) : "cc", "rdx"
  );
  *res1 = tem1;
  *res2 = tem2;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "movabsq $0x5555555555555555, %1" "\n\t"
    "movl %4, %%edx"          "\n\t"
    "pushfq"                  "\n\t"
    "popq %2"                 "\n\t"
    "mulx %5, %k1, %k0"       "\n\t"
    "pushfq"                  "\n\t"
    "popq %3"                 "\n"
    : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
    : "g" (arg1), "m" (arg2) : "cc", "rdx"
  );
  if (*res1 != tem1 || *res2 != tem2)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_sarx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "sarx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "sarx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_sarx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "sarx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "sarx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_shlx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shlx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shlx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_shlx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shlx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shlx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_shrx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shrx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shrx %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_shrx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shrx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "shrx %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_rorx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, ULong arg )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "rorx $12, %3, %0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
  );
  *res1 = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "rorx $67, %3, %0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
  );
  *res2 = tem;
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_rorx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, UInt arg )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "rorx $12, %3, %k0"       "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
  );
  *res1 = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "rorx $67, %3, %k0"       "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
  );
  *res2 = tem;
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_blsi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsi %2, %0"             "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsi %2, %0"             "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_blsi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsi %2, %k0"            "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsi %2, %k0"            "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_blsmsk64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsmsk %2, %0"           "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsmsk %2, %0"           "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_blsmsk32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsmsk %2, %k0"          "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsmsk %2, %k0"          "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_blsr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsr %2, %0"             "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsr %2, %0"             "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_blsr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsr %2, %k0"            "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "blsr %2, %k0"            "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_bextr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
                  ULong arg1, ULong arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bextr %2, %3, %0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bextr %2, %3, %0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_bextr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
                  UInt arg1, UInt arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bextr %2, %3, %k0"       "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bextr %2, %3, %k0"       "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_bzhi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
                 ULong arg1, ULong arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bzhi %2, %3, %0"         "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bzhi %2, %3, %0"         "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}

__attribute__((noinline))
void do_bzhi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
                 UInt arg1, UInt arg2 )
{
  ULong tem, flag;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bzhi %2, %3, %k0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  *flags = flag & 0x8d5;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "bzhi %2, %3, %k0"        "\n\t"
    "pushfq"		      "\n\t"
    "popq %1"                 "\n"
    : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
  );
  if (*res != tem || *flags != (flag & 0x8d5))
     printf ("Difference between r and m variants\n");
}


__attribute__((noinline))
void do_pdep64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pdep %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pdep %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_pdep32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pdep %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pdep %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


__attribute__((noinline))
void do_pext64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pext %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pext %3, %4, %0"         "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}

__attribute__((noinline))
void do_pext32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
{
  ULong tem, flag1, flag2, flag3, flag4;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pext %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    : "r" (arg1), "r" (arg2) : "cc"
  );
  *res = tem;
  __asm__ __volatile__(
    "movabsq $0x5555555555555555, %0" "\n\t"
    "pushfq"                  "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "movq (%%rsp), %1"        "\n\t"
    "popfq"                   "\n\t"
    "pext %3, %4, %k0"        "\n\t"
    "pushfq"                  "\n\t"
    "movq (%%rsp), %2"        "\n\t"
    "xorq $0x8d5, (%%rsp)"    "\n\t"
    "popfq"                   "\n"
    : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    : "m" (arg1), "r" (arg2) : "cc"
  );
  if (*res != tem)
     printf ("Difference between r and m variants\n");
  if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     printf ("Flags changed\n");
}


int main ( void )
{
   ULong w1, w2;

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_andn64(&flags, &res, w1, w2);
      printf("andn64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_andn32(&flags, &res, w1, w2);
      printf("andn32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res1, res2;
      do_mulx64(&res1, &res2, w1, w2);
      printf("mulx64 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res1, res2;
      do_mulx32(&res1, &res2, w1, w2);
      printf("mulx32 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_sarx64(&res, w1, w2);
      printf("sarx64 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_sarx32(&res, w1, w2);
      printf("sarx32 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_shlx64(&res, w1, w2);
      printf("shlx64 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_shlx32(&res, w1, w2);
      printf("shlx32 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_shrx64(&res, w1, w2);
      printf("shrx64 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_shrx32(&res, w1, w2);
      printf("shrx32 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res1, res2;
      do_rorx64(&res1, &res2, w1);
      printf("rorx64 %016llx -> %016llx %016llx\n", w1, res1, res2);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res1, res2;
      do_rorx32(&res1, &res2, w1);
      printf("rorx32 %016llx -> %016llx %016llx\n", w1, res1, res2);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsi64(&flags, &res, w1);
      printf("blsi64 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsi32(&flags, &res, w1);
      printf("blsi32 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsmsk64(&flags, &res, w1);
      printf("blsmsk64 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsmsk32(&flags, &res, w1);
      printf("blsmsk32 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsr64(&flags, &res, w1);
      printf("blsr64 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_blsr32(&flags, &res, w1);
      printf("blsr32 %016llx -> %016llx %04x\n", w1, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_bextr64(&flags, &res, w1, w2);
      printf("bextr64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_bextr32(&flags, &res, w1, w2);
      printf("bextr32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_bzhi64(&flags, &res, w1, w2);
      printf("bzhi64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      UInt  flags;
      do_bzhi32(&flags, &res, w1, w2);
      printf("bzhi32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_pdep64(&res, w1, w2);
      printf("pdep64 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_pdep32(&res, w1, w2);
      printf("pdep32 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_pext64(&res, w1, w2);
      printf("pext64 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   w1 = 0xFEDC192837475675ULL;
   w2 = 0x57657438291CDEF0ULL;
   while (1) {
      ULong res;
      do_pext32(&res, w1, w2);
      printf("pext32 %016llx %016llx -> %016llx\n", w1, w2, res);
      if (w1 == 0) break;
      w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
      w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   }

   return 0;
}