DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VPBLENDDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
case X86::INSERTPSrr:
case X86::VINSERTPSrr:
DestName = getRegName(MI->getOperand(0).getReg());
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
+ case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
IsUnary = false;
bool IsFakeUnary = false;
switch(N->getOpcode()) {
+ case X86ISD::BLENDI:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
// If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
// that instruction.
if (Subtarget->hasAVX2()) {
- int Scale = 8 / VT.getVectorNumElements();
+ int Scale = 4 / VT.getVectorNumElements();
BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= Size)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3]
; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: vpshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[3,2,1,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX2-NEXT: vpshufd {{.*}} # xmm3 = xmm3[0,1,0,1]
; AVX2-NEXT: vpshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
-; AVX2-NEXT: vpblendd $-16, %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm2[0,1],xmm3[2,3]
; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX2-NEXT: vpshuflw {{.*}} # xmm4 = xmm4[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5
; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 38, i32 38, i32 36, i32 36, i32 34, i32 34, i32 32, i32 32, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8, i32 54, i32 54, i32 52, i32 52, i32 50, i32 50, i32 48, i32 48, i32 30, i32 30, i32 28, i32 28, i32 26, i32 26, i32 24, i32 24>
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5
; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2
+; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 38, i32 38, i32 36, i32 36, i32 34, i32 34, i32 32, i32 32, i32 6, i32 6, i32 4, i32 4, i32 2, i32 2, i32 0, i32 0, i32 54, i32 54, i32 52, i32 52, i32 50, i32 50, i32 48, i32 48, i32 22, i32 22, i32 20, i32 20, i32 18, i32 18, i32 16, i32 16>