return true;
}
-/// \brief Emit SSE instructions to lower the select.
+/// \brief Emit SSE or AVX instructions to lower the select.
///
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
-/// SSE instructions are available.
+/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// Optimize conditions coming from a compare if both instructions are in the
// same basic block (values defined in other basic blocks may not have
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- static unsigned OpcTable[2][2][4] = {
- { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
- { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
- { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ // Choose the SSE instruction sequence based on data type (float or double).
+ static unsigned OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
};
- bool HasAVX = Subtarget->hasAVX();
unsigned *Opc = nullptr;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
- case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ case MVT::f32: Opc = &OpcTable[0][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][0]; break;
}
const Value *LHS = I->getOperand(1);
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
- CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
- LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
- RHSReg, RHSIsKill);
- unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned ResultReg;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, create 1 blendv instead of 3 logic instructions.
+ // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+ unsigned CmpOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+ unsigned BlendOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ } else {
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ }
updateValueMap(I, ResultReg);
return true;
}
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_oeq_f32
; AVX: vcmpeqss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oeq float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_oeq_f64
; AVX: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oeq double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ogt_f32
; AVX: vcmpltss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ogt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ogt_f64
; AVX: vcmpltsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ogt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_oge_f32
; AVX: vcmpless %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oge float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_oge_f64
; AVX: vcmplesd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oge double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_olt_f32
; AVX: vcmpltss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp olt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_olt_f64
; AVX: vcmpltsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp olt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ole_f32
; AVX: vcmpless %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ole float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ole_f64
; AVX: vcmplesd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ole double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ord_f32
; AVX: vcmpordss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ord float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ord_f64
; AVX: vcmpordsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ord double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uno_f32
; AVX: vcmpunordss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uno float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uno_f64
; AVX: vcmpunordsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uno double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ugt_f32
; AVX: vcmpnless %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ugt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ugt_f64
; AVX: vcmpnlesd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ugt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uge_f32
; AVX: vcmpnltss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uge float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uge_f64
; AVX: vcmpnltsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uge double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ult_f32
; AVX: vcmpnless %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ult float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ult_f64
; AVX: vcmpnlesd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ult double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ule_f32
; AVX: vcmpnltss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ule float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ule_f64
; AVX: vcmpnltsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ule double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_une_f32
; AVX: vcmpneqss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp une float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_une_f64
; AVX: vcmpneqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp une double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2