; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s ; CHECK: vpaddq %ymm define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { %x = add <4 x i64> %i, %j ret <4 x i64> %x } ; CHECK: vpaddd %ymm define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { %x = add <8 x i32> %i, %j ret <8 x i32> %x } ; CHECK: vpaddw %ymm define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { %x = add <16 x i16> %i, %j ret <16 x i16> %x } ; CHECK: vpaddb %ymm define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { %x = add <32 x i8> %i, %j ret <32 x i8> %x } ; CHECK: vpsubq %ymm define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { %x = sub <4 x i64> %i, %j ret <4 x i64> %x } ; CHECK: vpsubd %ymm define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { %x = sub <8 x i32> %i, %j ret <8 x i32> %x } ; CHECK: vpsubw %ymm define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { %x = sub <16 x i16> %i, %j ret <16 x i16> %x } ; CHECK: vpsubb %ymm define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { %x = sub <32 x i8> %i, %j ret <32 x i8> %x } ; CHECK: vpmulld %ymm define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { %x = mul <8 x i32> %i, %j ret <8 x i32> %x } ; CHECK: vpmullw %ymm define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { %x = mul <16 x i16> %i, %j ret <16 x i16> %x } ; CHECK: mul-v16i8 ; CHECK: # BB#0: ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { %x = mul <16 x i8> %i, %j ret <16 x i8> %x } ; CHECK: mul-v32i8 ; CHECK: # BB#0: ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 ; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 ; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3 ; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3 ; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1 ; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; CHECK-NEXT: retq define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { %x = mul <32 x i8> %i, %j ret <32 x i8> %x } ; CHECK: mul-v4i64 ; CHECK: vpmuludq %ymm ; CHECK-NEXT: vpsrlq $32, %ymm ; CHECK-NEXT: vpmuludq %ymm ; CHECK-NEXT: vpsllq $32, %ymm ; CHECK-NEXT: vpaddq %ymm ; CHECK-NEXT: vpsrlq $32, %ymm ; CHECK-NEXT: vpmuludq %ymm ; CHECK-NEXT: vpsllq $32, %ymm ; CHECK-NEXT: vpaddq %ymm define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { %x = mul <4 x i64> %i, %j ret <4 x i64> %x } ; CHECK: mul_const1 ; CHECK: vpaddd ; CHECK: ret define <8 x i32> @mul_const1(<8 x i32> %x) { %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ret <8 x i32> %y } ; CHECK: mul_const2 ; CHECK: vpsllq $2 ; CHECK: ret define <4 x i64> @mul_const2(<4 x i64> %x) { %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4> ret <4 x i64> %y } ; CHECK: mul_const3 ; CHECK: vpsllw $3 ; CHECK: ret define <16 x i16> @mul_const3(<16 x i16> %x) { %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> ret <16 x i16> %y } ; CHECK: mul_const4 ; CHECK: vpxor ; CHECK: vpsubq ; CHECK: ret define <4 x i64> @mul_const4(<4 x i64> %x) { %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> ret <4 x i64> %y } ; CHECK: mul_const5 ; CHECK: vxorps ; CHECK-NEXT: ret define <8 x i32> @mul_const5(<8 x i32> %x) { %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> ret <8 x i32> %y } ; CHECK: mul_const6 ; CHECK: vpmulld ; CHECK: ret define <8 x i32> @mul_const6(<8 x i32> %x) { %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> ret <8 x i32> %y } ; CHECK: mul_const7 ; CHECK: vpaddq ; CHECK: vpaddq ; CHECK: ret define <8 x i64> @mul_const7(<8 x i64> %x) { %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> ret <8 x i64> %y } ; CHECK: mul_const8 ; CHECK: vpsllw $3 ; CHECK: ret define <8 x i16> @mul_const8(<8 x i16> %x) { %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> ret <8 x i16> %y } ; CHECK: mul_const9 ; CHECK: vpmulld ; CHECK: ret define <8 x i32> @mul_const9(<8 x i32> %x) { %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> ret <8 x i32> %y } ; CHECK: mul_const10 ; CHECK: vpmulld ; CHECK: ret define <4 x i32> @mul_const10(<4 x i32> %x) { ; %x * 0x01010101 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009> ret <4 x i32> %m } ; CHECK: mul_const11 ; CHECK: vpmulld ; CHECK: ret define <4 x i32> @mul_const11(<4 x i32> %x) { ; %x * 0x80808080 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152> ret <4 x i32> %m }