/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
   aspect. */

#include <string.h>
#include <stdio.h>
#include <assert.h>

typedef  unsigned int   UInt;
typedef  signed int     Int;
typedef  unsigned char  UChar;
typedef  signed char    Char;
typedef  unsigned long long int ULong;
typedef  UChar          Bool;
#define False ((Bool)0)
#define True  ((Bool)1)

//typedef  unsigned char  V128[16];
typedef
   union {
      UChar uChar[16];
      UInt  uInt[4];
   }
   V128;

#define SHIFT_O   11
#define SHIFT_S   7
#define SHIFT_Z   6
#define SHIFT_A   4
#define SHIFT_C   0
#define SHIFT_P   2

#define MASK_O    (1ULL << SHIFT_O)
#define MASK_S    (1ULL << SHIFT_S)
#define MASK_Z    (1ULL << SHIFT_Z)
#define MASK_A    (1ULL << SHIFT_A)
#define MASK_C    (1ULL << SHIFT_C)
#define MASK_P    (1ULL << SHIFT_P)


UInt clz32 ( UInt x )
{
   Int y, m, n;
   y = -(x >> 16);
   m = (y >> 16) & 16;
   n = 16 - m;
   x = x >> m;
   y = x - 0x100;
   m = (y >> 16) & 8;
   n = n + m;
   x = x << m;
   y = x - 0x1000;
   m = (y >> 16) & 4;
   n = n + m;
   x = x << m;
   y = x - 0x4000;
   m = (y >> 16) & 2;
   n = n + m;
   x = x << m;
   y = x >> 14;
   m = y & ~(y >> 1);
   return n + 2 - m;
}

UInt ctz32 ( UInt x )
{
   return 32 - clz32((~x) & (x-1));
}

void expand ( V128* dst, char* summary )
{
   Int i;
   assert( strlen(summary) == 16 );
   for (i = 0; i < 16; i++) {
      UChar xx = 0;
      UChar x = summary[15-i];
      if      (x >= '0' && x <= '9') { xx = x - '0'; }
      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
      else assert(0);

      assert(xx < 16);
      xx = (xx << 4) | xx;
      assert(xx < 256);
      dst->uChar[i] = xx;
   }
}

void try_istri ( char* which,
                 UInt(*h_fn)(V128*,V128*),
                 UInt(*s_fn)(V128*,V128*),
                 char* summL, char* summR )
{
   assert(strlen(which) == 2);
   V128 argL, argR;
   expand(&argL, summL);
   expand(&argR, summR);
   UInt h_res = h_fn(&argL, &argR);
   UInt s_res = s_fn(&argL, &argR);
   printf("istri %s  %s %s -> %08x %08x %s\n",
          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
}

UInt zmask_from_V128 ( V128* arg )
{
   UInt i, res = 0;
   for (i = 0; i < 16; i++) {
      res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
   }
   return res;
}

//////////////////////////////////////////////////////////
//                                                      //
//                       GENERAL                        //
//                                                      //
//////////////////////////////////////////////////////////


/* Given partial results from a pcmpXstrX operation (intRes1,
   basically), generate an I format (index value for ECX) output, and
   also the new OSZACP flags.
*/
static
void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
                                    /*OUT*/UInt* resOSZACP,
                                    UInt intRes1,
                                    UInt zmaskL, UInt zmaskR,
                                    UInt validL,
                                    UInt pol, UInt idx )
{
   assert((pol >> 2) == 0);
   assert((idx >> 1) == 0);

   UInt intRes2 = 0;
   switch (pol) {
      case 0: intRes2 = intRes1;          break; // pol +
      case 1: intRes2 = ~intRes1;         break; // pol -
      case 2: intRes2 = intRes1;          break; // pol m+
      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
   }
   intRes2 &= 0xFFFF;

   // generate ecx value
   UInt newECX = 0;
   if (idx) {
     // index of ms-1-bit
     newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
   } else {
     // index of ls-1-bit
     newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
   }

   *(UInt*)(&resV[0]) = newECX;

   // generate new flags, common to all ISTRI and ISTRM cases
   *resOSZACP    // A, P are zero
     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
}


/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
   variants.

   For xSTRI variants, the new ECX value is placed in the 32 bits
   pointed to by *resV.  For xSTRM variants, the result is a 128 bit
   value and is placed at *resV in the obvious way.

   For all variants, the new OSZACP value is placed at *resOSZACP.

   argLV and argRV are the vector args.  The caller must prepare a
   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
   must be 1 for each zero byte of of the respective arg.  For ESTRx
   variants this is derived from the explicit length indication, and
   must be 0 in all places except at the bit index corresponding to
   the valid length (0 .. 16).  If the valid length is 16 then the
   mask must be all zeroes.  In all cases, bits 31:16 must be zero.

   imm8 is the original immediate from the instruction.  isSTRM
   indicates whether this is a xSTRM or xSTRI variant, which controls
   how much of *res is written.

   If the given imm8 case can be handled, the return value is True.
   If not, False is returned, and neither *res not *resOSZACP are
   altered.
*/

Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
                     /*OUT*/UInt* resOSZACP,
                     V128* argLV,  V128* argRV,
                     UInt zmaskL, UInt zmaskR,
                     UInt imm8,   Bool isSTRM )
{
   assert(imm8 < 0x80);
   assert((zmaskL >> 16) == 0);
   assert((zmaskR >> 16) == 0);

   /* Explicitly reject any imm8 values that haven't been validated,
      even if they would probably work.  Life is too short to have
      unvalidated cases in the code base. */
   switch (imm8) {
      case 0x00: case 0x02:
      case 0x08: case 0x0A: case 0x0C: case 0x0E:
      case 0x10: case 0x12: case 0x14:
      case 0x18: case 0x1A:
      case 0x30:            case 0x34:
      case 0x38: case 0x3A:
      case 0x40: case 0x42: case 0x44: case 0x46:
                 case 0x4A:
                 case 0x62:
      case 0x70: case 0x72:
         break;
      default:
         return False;
   }

   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask

   /*----------------------------------------*/
   /*-- strcmp on byte data                --*/
   /*----------------------------------------*/

   if (agg == 2/*equal each, aka strcmp*/
       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
       && !isSTRM) {
      Int    i;
      UChar* argL = (UChar*)argLV;
      UChar* argR = (UChar*)argRV;
      UInt boolResII = 0;
      for (i = 15; i >= 0; i--) {
         UChar cL  = argL[i];
         UChar cR  = argR[i];
         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
      }
      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))

      // do invalidation, common to all equal-each cases
      UInt intRes1
         = (boolResII & validL & validR)  // if both valid, use cmpres
           | (~ (validL | validR));       // if both invalid, force 1
                                          // else force 0
      intRes1 &= 0xFFFF;

      // generate I-format output
      pcmpXstrX_WRK_gen_output_fmt_I(
         resV, resOSZACP,
         intRes1, zmaskL, zmaskR, validL, pol, idx
      );

      return True;
   }

   /*----------------------------------------*/
   /*-- set membership on byte data        --*/
   /*----------------------------------------*/

   if (agg == 0/*equal any, aka find chars in a set*/
       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
       && !isSTRM) {
      /* argL: the string,  argR: charset */
      UInt   si, ci;
      UChar* argL    = (UChar*)argLV;
      UChar* argR    = (UChar*)argRV;
      UInt   boolRes = 0;
      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))

      for (si = 0; si < 16; si++) {
         if ((validL & (1 << si)) == 0)
            // run off the end of the string.
            break;
         UInt m = 0;
         for (ci = 0; ci < 16; ci++) {
            if ((validR & (1 << ci)) == 0) break;
            if (argR[ci] == argL[si]) { m = 1; break; }
         }
         boolRes |= (m << si);
      }

      // boolRes is "pre-invalidated"
      UInt intRes1 = boolRes & 0xFFFF;
   
      // generate I-format output
      pcmpXstrX_WRK_gen_output_fmt_I(
         resV, resOSZACP,
         intRes1, zmaskL, zmaskR, validL, pol, idx
      );

      return True;
   }

   /*----------------------------------------*/
   /*-- substring search on byte data      --*/
   /*----------------------------------------*/

   if (agg == 3/*equal ordered, aka substring search*/
       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
       && !isSTRM) {

      /* argL: haystack,  argR: needle */
      UInt   ni, hi;
      UChar* argL    = (UChar*)argLV;
      UChar* argR    = (UChar*)argRV;
      UInt   boolRes = 0;
      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
      for (hi = 0; hi < 16; hi++) {
         UInt m = 1;
         for (ni = 0; ni < 16; ni++) {
            if ((validR & (1 << ni)) == 0) break;
            UInt i = ni + hi;
            if (i >= 16) break;
            if (argL[i] != argR[ni]) { m = 0; break; }
         }
         boolRes |= (m << hi);
         if ((validL & (1 << hi)) == 0)
            // run off the end of the haystack
            break;
      }

      // boolRes is "pre-invalidated"
      UInt intRes1 = boolRes & 0xFFFF;

      // generate I-format output
      pcmpXstrX_WRK_gen_output_fmt_I(
         resV, resOSZACP,
         intRes1, zmaskL, zmaskR, validL, pol, idx
      );

      return True;
   }

   /*----------------------------------------*/
   /*-- ranges, unsigned byte data         --*/
   /*----------------------------------------*/

   if (agg == 1/*ranges*/
       && fmt == 0/*ub*/
       && !isSTRM) {

      /* argL: string,  argR: range-pairs */
      UInt   ri, si;
      UChar* argL    = (UChar*)argLV;
      UChar* argR    = (UChar*)argRV;
      UInt   boolRes = 0;
      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
      for (si = 0; si < 16; si++) {
         if ((validL & (1 << si)) == 0)
            // run off the end of the string
            break;
         UInt m = 0;
         for (ri = 0; ri < 16; ri += 2) {
            if ((validR & (3 << ri)) != (3 << ri)) break;
            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 
               m = 1; break;
            }
         }
         boolRes |= (m << si);
      }

      // boolRes is "pre-invalidated"
      UInt intRes1 = boolRes & 0xFFFF;

      // generate I-format output
      pcmpXstrX_WRK_gen_output_fmt_I(
         resV, resOSZACP,
         intRes1, zmaskL, zmaskR, validL, pol, idx
      );

      return True;
   }

   /*----------------------------------------*/
   /*-- ranges, signed byte data           --*/
   /*----------------------------------------*/

   if (agg == 1/*ranges*/
       && fmt == 2/*sb*/
       && !isSTRM) {

      /* argL: string,  argR: range-pairs */
      UInt   ri, si;
      Char*  argL    = (Char*)argLV;
      Char*  argR    = (Char*)argRV;
      UInt   boolRes = 0;
      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
      for (si = 0; si < 16; si++) {
         if ((validL & (1 << si)) == 0)
            // run off the end of the string
            break;
         UInt m = 0;
         for (ri = 0; ri < 16; ri += 2) {
            if ((validR & (3 << ri)) != (3 << ri)) break;
            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 
               m = 1; break;
            }
         }
         boolRes |= (m << si);
      }

      // boolRes is "pre-invalidated"
      UInt intRes1 = boolRes & 0xFFFF;

      // generate I-format output
      pcmpXstrX_WRK_gen_output_fmt_I(
         resV, resOSZACP,
         intRes1, zmaskL, zmaskR, validL, pol, idx
      );

      return True;
   }

   return False;
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_4A                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_4A ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x4A, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_4A ( void )
{
   char* wot = "4A";
   UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
   UInt(*s)(V128*,V128*) = s_pcmpistri_4A;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}

//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_3A                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_3A ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x3A, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_3A ( void )
{
   char* wot = "3A";
   UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
   UInt(*s)(V128*,V128*) = s_pcmpistri_3A;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_0C                       //
//                                                      //
//////////////////////////////////////////////////////////

__attribute__((noinline))
UInt h_pcmpistri_0C ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res = 0, flags = 0;
   __asm__ __volatile__(
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
      //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
      //"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x0C, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_0C ( void )
{
   char* wot = "0C";
   UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
   UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
   
   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 

   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 
   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 
   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 

   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 
   try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 
   try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 

   try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 
   try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 
   try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 

   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_08                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_08 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x08, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_08 ( void )
{
   char* wot = "08";
   UInt(*h)(V128*,V128*) = h_pcmpistri_08;
   UInt(*s)(V128*,V128*) = s_pcmpistri_08;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_18                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_18 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x18, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_18 ( void )
{
   char* wot = "18";
   UInt(*h)(V128*,V128*) = h_pcmpistri_18;
   UInt(*s)(V128*,V128*) = s_pcmpistri_18;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_1A                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_1A ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x1A, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_1A ( void )
{
   char* wot = "1A";
   UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
   UInt(*s)(V128*,V128*) = s_pcmpistri_1A;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_02                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_02 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
//"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x02, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_02 ( void )
{
   char* wot = "02";
   UInt(*h)(V128*,V128*) = h_pcmpistri_02;
   UInt(*s)(V128*,V128*) = s_pcmpistri_02;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_12                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_12 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
//"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x12, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_12 ( void )
{
   char* wot = "12";
   UInt(*h)(V128*,V128*) = h_pcmpistri_12;
   UInt(*s)(V128*,V128*) = s_pcmpistri_12;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_44                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_44 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x44, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_44 ( void )
{
   char* wot = "44";
   UInt(*h)(V128*,V128*) = h_pcmpistri_44;
   UInt(*s)(V128*,V128*) = s_pcmpistri_44;

   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 
   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 
   try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 

   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");

   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");

   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");

   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_00                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_00 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
//"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x00, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_00 ( void )
{
   char* wot = "00";
   UInt(*h)(V128*,V128*) = h_pcmpistri_00;
   UInt(*s)(V128*,V128*) = s_pcmpistri_00;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_38                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_38 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x38, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_38 ( void )
{
   char* wot = "38";
   UInt(*h)(V128*,V128*) = h_pcmpistri_38;
   UInt(*s)(V128*,V128*) = s_pcmpistri_38;

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 

   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 

   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
}



//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_46                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_46 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x46, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_46 ( void )
{
   char* wot = "46";
   UInt(*h)(V128*,V128*) = h_pcmpistri_46;
   UInt(*s)(V128*,V128*) = s_pcmpistri_46;

   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 
   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 
   try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 

   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");

   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");

   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");

   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_30                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_30 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x30, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_30 ( void )
{
   char* wot = "30";
   UInt(*h)(V128*,V128*) = h_pcmpistri_30;
   UInt(*s)(V128*,V128*) = s_pcmpistri_30;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_40                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_40 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x40, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_40 ( void )
{
   char* wot = "40";
   UInt(*h)(V128*,V128*) = h_pcmpistri_40;
   UInt(*s)(V128*,V128*) = s_pcmpistri_40;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_42                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_42 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x42, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_42 ( void )
{
   char* wot = "42";
   UInt(*h)(V128*,V128*) = h_pcmpistri_42;
   UInt(*s)(V128*,V128*) = s_pcmpistri_42;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_0E                       //
//                                                      //
//////////////////////////////////////////////////////////

__attribute__((noinline))
UInt h_pcmpistri_0E ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res = 0, flags = 0;
   __asm__ __volatile__(
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x0E, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_0E ( void )
{
   char* wot = "0E";
   UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
   UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
   
   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 

   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 
   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 
   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 

   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 
   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 

   try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 
   try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 
   try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 

   try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 
   try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 
   try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 

   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_34                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_34 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x34, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_34 ( void )
{
   char* wot = "34";
   UInt(*h)(V128*,V128*) = h_pcmpistri_34;
   UInt(*s)(V128*,V128*) = s_pcmpistri_34;

   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 
   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 
   try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 

   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");

   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");

   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");

   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_14                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_14 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x14, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_14 ( void )
{
   char* wot = "14";
   UInt(*h)(V128*,V128*) = h_pcmpistri_14;
   UInt(*s)(V128*,V128*) = s_pcmpistri_14;

   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 
   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 
   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 
   try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 

   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 

   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 
   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");

   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");

   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");

   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_70                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_70 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x70,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x70, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_70 ( void )
{
   char* wot = "70";
   UInt(*h)(V128*,V128*) = h_pcmpistri_70;
   UInt(*s)(V128*,V128*) = s_pcmpistri_70;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_62                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_62 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x62,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x62, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_62 ( void )
{
   char* wot = "62";
   UInt(*h)(V128*,V128*) = h_pcmpistri_62;
   UInt(*s)(V128*,V128*) = s_pcmpistri_62;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_72                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_72 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x72,  %%xmm2, %%xmm11"   "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x72, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_72 ( void )
{
   char* wot = "72";
   UInt(*h)(V128*,V128*) = h_pcmpistri_72;
   UInt(*s)(V128*,V128*) = s_pcmpistri_72;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                       ISTRI_10                       //
//                                                      //
//////////////////////////////////////////////////////////

UInt h_pcmpistri_10 ( V128* argL, V128* argR )
{
   V128 block[2];
   memcpy(&block[0], argL, sizeof(V128));
   memcpy(&block[1], argR, sizeof(V128));
   ULong res, flags;
   __asm__ __volatile__(
      "subq      $1024,  %%rsp"             "\n\t"
      "movdqu    0(%2),  %%xmm2"            "\n\t"
      "movdqu    16(%2), %%xmm11"           "\n\t"
      "pcmpistri $0x10,  %%xmm2, %%xmm11"   "\n\t"
//"pcmpistrm $0x10, %%xmm2, %%xmm11"   "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
      "pushfq"                              "\n\t"
      "popq      %%rdx"                     "\n\t"
      "movq      %%rcx,  %0"                "\n\t"
      "movq      %%rdx,  %1"                "\n\t"
      "addq      $1024,  %%rsp"             "\n\t"
      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   );
   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}

UInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
{
   V128 resV;
   UInt resOSZACP, resECX;
   Bool ok
      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
                       zmask_from_V128(argLU),
                       zmask_from_V128(argRU),
                       0x10, False/*!isSTRM*/
        );
   assert(ok);
   resECX = resV.uInt[0];
   return (resOSZACP << 16) | resECX;
}

void istri_10 ( void )
{
   char* wot = "10";
   UInt(*h)(V128*,V128*) = h_pcmpistri_10;
   UInt(*s)(V128*,V128*) = s_pcmpistri_10;

   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 

   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 

   try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 

   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 

   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
}


//////////////////////////////////////////////////////////
//                                                      //
//                         main                         //
//                                                      //
//////////////////////////////////////////////////////////

int main ( void )
{
   istri_4A();
   istri_3A();
   istri_08();
   istri_18();
   istri_1A();
   istri_02();
   istri_0C();
   istri_12();
   istri_44();
   istri_00();
   istri_38();
   istri_46();
   istri_30();
   istri_40();
   istri_42();
   istri_0E();
   istri_14();
   istri_34();
   istri_70();
   istri_62();
   istri_72();
   istri_10();
   return 0;
}