#
# %mm <-> ireg64
#
movd mm.sd[1234,5678] r64.sd[1111,2222] => 1.sd[1234,5678]
movd r64.sd[1234,5678] mm.sd[1111,2222] => 1.sd[1234,5678]
#
# %mm <-> ireg32
#
movd r32.sd[1234] mm.sd[1111,2222] => 1.sd[1234,0]
movd mm.sd[1234,2222] r32.sd[1111] => 1.sd[1234]
#
# %mm <-> mem32
#
movd m32.sd[1234] mm.sd[1111,2222] => 1.sd[1234,0]
movd mm.sd[1234,2222] m32.sd[1111] => 1.sd[1234]
#
# %mm <-> mem64
#
movq m64.uq[0x012345678abcdef] mm.uq[0x1212121234343434] => 1.uq[0x012345678abcdef]
movq mm.uq[0x012345678abcdef] m64.uq[0x1212121234343434] => 1.uq[0x012345678abcdef]
#
# %mm -> %mm
#
movq mm.uq[0x012345678abcdef] mm.uq[0x1212121234343434] => 1.uq[0x012345678abcdef]
#
#
#
packssdw mm.sd[12345,123456] mm.sd[-12345,-123456] => 1.sw[-12345,-32768,12345,32767]
packssdw m64.sd[12345,123456] mm.sd[-12345,-123456] => 1.sw[-12345,-32768,12345,32767]
packsswb mm.sw[123,-123,1234,-1234] mm.sw[21,-21,321,-321] => 1.sb[21,-21,127,-128,123,-123,127,-128]
packsswb m64.sw[123,-123,1234,-1234] mm.sw[21,-21,321,-321] => 1.sb[21,-21,127,-128,123,-123,127,-128]
packuswb mm.sw[123,-123,1234,-1234] mm.sw[21,-21,321,-321] => 1.ub[21,0,255,0,123,0,255,0]
packuswb m64.sw[123,-123,1234,-1234] mm.sw[21,-21,321,-321] => 1.ub[21,0,255,0,123,0,255,0]
paddb mm.sb[12,34,56,78,21,43,65,87] mm.sb[8,7,6,5,4,3,2,1] => 1.sb[20,41,62,83,25,46,67,88]
paddb m64.sb[12,34,56,78,21,43,65,87] mm.sb[8,7,6,5,4,3,2,1] => 1.sb[20,41,62,83,25,46,67,88]
paddd mm.sd[12345678,87654321] mm.sd[8765,4321] => 1.sd[12354443,87658642]
paddd m64.sd[12345678,87654321] mm.sd[8765,4321] => 1.sd[12354443,87658642]
paddsb mm.sb[25,-25,50,-50,100,-100,125,-125] mm.sb[40,-40,30,-30,20,-20,10,-10] => 1.sb[65,-65,80,-80,120,-120,127,-128]
paddsb m64.sb[25,-25,50,-50,100,-100,125,-125] mm.sb[40,-40,30,-30,20,-20,10,-10] => 1.sb[65,-65,80,-80,120,-120,127,-128]
paddsw mm.sw[12345,-12345,32145,-32145] mm.sw[32145,-32145,-12345,12345] => 1.sw[32767,-32768,19800,-19800]
paddsw m64.sw[12345,-12345,32145,-32145] mm.sw[32145,-32145,-12345,12345] => 1.sw[32767,-32768,19800,-19800]
paddusb mm.ub[25,50,75,100,125,150,175,200] mm.ub[10,20,30,40,50,60,70,80] => 1.ub[35,70,105,140,175,210,245,255]
paddusb m64.ub[25,50,75,100,125,150,175,200] mm.ub[10,20,30,40,50,60,70,80] => 1.ub[35,70,105,140,175,210,245,255]
paddusw mm.uw[22222,33333,44444,55555] mm.uw[6666,7777,8888,9999] => 1.uw[28888,41110,53332,65535]
paddusw m64.uw[22222,33333,44444,55555] mm.uw[6666,7777,8888,9999] => 1.uw[28888,41110,53332,65535]
paddw mm.sw[1234,5678,4321,8765] mm.sw[87,65,43,21] => 1.sw[1321,5743,4364,8786]
paddw m64.sw[1234,5678,4321,8765] mm.sw[87,65,43,21] => 1.sw[1321,5743,4364,8786]
pand mm.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0x0121452188a84420]
pand m64.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0x0121452188a84420]
pandn mm.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfc98301064002000]
pandn m64.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfc98301064002000]
pcmpeqb mm.ub[11,22,33,44,55,66,77,88] mm.ub[11,11,33,33,55,55,77,77] => 1.ub[0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00]
pcmpeqb m64.ub[11,22,33,44,55,66,77,88] mm.ub[11,11,33,33,55,55,77,77] => 1.ub[0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00]
pcmpeqd mm.ud[11223344,55667788] mm.ud[11223344,11223344] => 1.ud[0xffffffff,0x00000000]
pcmpeqd m64.ud[11223344,55667788] mm.ud[11223344,11223344] => 1.ud[0xffffffff,0x00000000]
pcmpeqw mm.uw[1122,3344,5566,7788] mm.uw[1122,1122,5566,5566] => 1.uw[0xffff,0x0000,0xffff,0x0000]
pcmpeqw m64.uw[1122,3344,5566,7788] mm.uw[1122,1122,5566,5566] => 1.uw[0xffff,0x0000,0xffff,0x0000]
pcmpgtb mm.sb[-77,-55,-33,-11,11,33,55,77] mm.sb[77,55,33,11,-11,-33,-55,-77] => 1.ub[0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
pcmpgtb m64.sb[-77,-55,-33,-11,11,33,55,77] mm.sb[77,55,33,11,-11,-33,-55,-77] => 1.ub[0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
pcmpgtd mm.sd[-11111111,11111111] mm.sd[11111111,-11111111] => 1.ud[0xffffffff,0x00000000]
pcmpgtd m64.sd[-11111111,11111111] mm.sd[11111111,-11111111] => 1.ud[0xffffffff,0x00000000]
pcmpgtw mm.sw[-3333,-1111,1111,3333] mm.sw[3333,1111,-1111,-3333] => 1.uw[0xffff,0xffff,0x0000,0x0000]
pcmpgtw m64.sw[-3333,-1111,1111,3333] mm.sw[3333,1111,-1111,-3333] => 1.uw[0xffff,0xffff,0x0000,0x0000]
pmaddwd mm.sw[1234,5678,-4321,-8765] mm.sw[1111,-2222,3333,-4444] => 1.sd[-11245542,24549767]
pmaddwd m64.sw[1234,5678,-4321,-8765] mm.sw[1111,-2222,3333,-4444] => 1.sd[-11245542,24549767]
pmulhw mm.sw[1111,2222,-1111,-2222] mm.sw[3333,-4444,3333,-4444] => 1.uw[0x0038,0xff69,0xffc7,0x0096]
pmulhw m64.sw[1111,2222,-1111,-2222] mm.sw[3333,-4444,3333,-4444] => 1.uw[0x0038,0xff69,0xffc7,0x0096]
pmullw mm.sw[1111,2222,-1111,-2222] mm.sw[3333,-4444,3333,-4444] => 1.uw[0x80b3,0x5378,0x7f4d,0xac88]
pmullw m64.sw[1111,2222,-1111,-2222] mm.sw[3333,-4444,3333,-4444] => 1.uw[0x80b3,0x5378,0x7f4d,0xac88]
por mm.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfdbb7577edabedef]
por m64.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfdbb7577edabedef]
pslld imm8[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x12345670,0x9abcdef0]
pslld mm.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x12345670,0x9abcdef0]
pslld m64.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x12345670,0x9abcdef0]
psllq imm8[4] mm.uq[0x0123456789abcdef] => 1.uq[0x123456789abcdef0]
psllq mm.uq[4] mm.uq[0x0123456789abcdef] => 1.uq[0x123456789abcdef0]
psllq m64.uq[4] mm.uq[0x0123456789abcdef] => 1.uq[0x123456789abcdef0]
psllw imm8[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x1230,0x5670,0x9ab0,0xdef0]
psllw mm.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x1230,0x5670,0x9ab0,0xdef0]
psllw m64.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x1230,0x5670,0x9ab0,0xdef0]
psrad imm8[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0xf89abcde]
psrad mm.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0xf89abcde]
psrad m64.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0xf89abcde]
psraw imm8[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0xf89a,0xfcde]
psraw mm.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0xf89a,0xfcde]
psraw m64.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0xf89a,0xfcde]
psrld imm8[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0x089abcde]
psrld mm.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0x089abcde]
psrld m64.uq[4] mm.ud[0x01234567,0x89abcdef] => 1.ud[0x00123456,0x089abcde]
psrlq imm8[4] mm.uq[0x0123456789abcdef] => 1.uq[0x00123456789abcde]
psrlq mm.uq[4] mm.uq[0x0123456789abcdef] => 1.uq[0x00123456789abcde]
psrlq m64.uq[4] mm.uq[0x0123456789abcdef] => 1.uq[0x00123456789abcde]
psrlw imm8[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0x089a,0x0cde]
psrlw mm.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0x089a,0x0cde]
psrlw m64.uq[4] mm.uw[0x0123,0x4567,0x89ab,0xcdef] => 1.uw[0x0012,0x0456,0x089a,0x0cde]
psubb mm.sb[8,7,6,5,4,3,2,1] mm.sb[12,34,56,78,21,43,65,87] => 1.sb[4,27,50,73,17,40,63,86]
psubb m64.sb[8,7,6,5,4,3,2,1] mm.sb[12,34,56,78,21,43,65,87] => 1.sb[4,27,50,73,17,40,63,86]
psubd mm.sd[8765,4321] mm.sd[12345678,87654321] => 1.sd[12336913,87650000]
psubd m64.sd[8765,4321] mm.sd[12345678,87654321] => 1.sd[12336913,87650000]
psubsb mm.sb[-50,50,-40,40,-30,30,-20,20] mm.sb[25,-25,50,-50,100,-100,125,-125] => 1.sb[75,-75,90,-90,127,-128,127,-128]
psubsb m64.sb[-50,50,-40,40,-30,30,-20,20] mm.sb[25,-25,50,-50,100,-100,125,-125] => 1.sb[75,-75,90,-90,127,-128,127,-128]
psubsw mm.sw[-32145,32145,12345,-12345] mm.sw[12345,-12345,32145,-32145] => 1.sw[32767,-32768,19800,-19800]
psubsw m64.sw[-32145,32145,12345,-12345] mm.sw[12345,-12345,32145,-32145] => 1.sw[32767,-32768,19800,-19800]
psubusb mm.ub[11,22,33,44,55,66,77,88] mm.ub[88,77,66,55,44,33,22,11] => 1.ub[77,55,33,11,0,0,0,0]
psubusb m64.ub[11,22,33,44,55,66,77,88] mm.ub[88,77,66,55,44,33,22,11] => 1.ub[77,55,33,11,0,0,0,0]
psubusw mm.uw[1122,3344,5566,7788] mm.uw[8877,6655,4433,2211] => 1.uw[7755,3311,0,0]
psubusw m64.uw[1122,3344,5566,7788] mm.uw[8877,6655,4433,2211] => 1.uw[7755,3311,0,0]
psubw mm.sw[87,65,43,21] mm.sw[1234,5678,4321,8765] => 1.sw[1147,5613,4278,8744]
psubw m64.sw[87,65,43,21] mm.sw[1234,5678,4321,8765] => 1.sw[1147,5613,4278,8744]
punpckhbw mm.ub[12,34,56,78,21,43,65,87] mm.ub[11,22,33,44,55,66,77,88] => 1.ub[55,21,66,43,77,65,88,87]
punpckhbw m64.ub[12,34,56,78,21,43,65,87] mm.ub[11,22,33,44,55,66,77,88] => 1.ub[55,21,66,43,77,65,88,87]
punpckhdq mm.ud[12345678,21436587] mm.ud[11223344,55667788] => 1.ud[55667788,21436587]
punpckhdq m64.ud[12345678,21436587] mm.ud[11223344,55667788] => 1.ud[55667788,21436587]
punpckhwd mm.uw[1234,5678,2143,6587] mm.uw[1122,3344,5566,7788] => 1.uw[5566,2143,7788,6587]
punpckhwd m64.uw[1234,5678,2143,6587] mm.uw[1122,3344,5566,7788] => 1.uw[5566,2143,7788,6587]
punpcklbw mm.ub[12,34,56,78,21,43,65,87] mm.ub[11,22,33,44,55,66,77,88] => 1.ub[11,12,22,34,33,56,44,78]
punpcklbw m64.ub[12,34,56,78,21,43,65,87] mm.ub[11,22,33,44,55,66,77,88] => 1.ub[11,12,22,34,33,56,44,78]
punpckldq mm.ud[12345678,21436587] mm.ud[11223344,55667788] => 1.ud[11223344,12345678]
punpckldq m64.ud[12345678,21436587] mm.ud[11223344,55667788] => 1.ud[11223344,12345678]
punpcklwd mm.uw[1234,5678,2143,6587] mm.uw[1122,3344,5566,7788] => 1.uw[1122,1234,3344,5678]
punpcklwd m64.uw[1234,5678,2143,6587] mm.uw[1122,3344,5566,7788] => 1.uw[1122,1234,3344,5678]
pxor mm.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfc9a30566503a9cf]
pxor m64.uq[0xfdb97531eca86420] mm.uq[0x0123456789abcdef] => 1.uq[0xfc9a30566503a9cf]