case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
- case ISD::SETCC: Res = WidenVecOp_SETCC(N, ResNo); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
MVT::Other,&StChain[0],StChain.size());
}
-SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N, unsigned ResNo) {
- assert(ResNo < 2 && "Invalid res num to widen");
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
- EVT VT = InOp0.getValueType();
DebugLoc dl = N->getDebugLoc();
// WARNING: In this code we widen the compare instruction with garbage.
// This garbage may contain denormal floats which may be slow. Is this a real
// concern ? Should we zero the unused lanes if this is a float compare ?
- SDValue Zero = DAG.getIntPtrConstant(0);
- EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
- N->getValueType(0).getVectorElementType(),
- VT.getVectorNumElements());
-
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- ResVT, InOp0, InOp1, N->getOperand(2));
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, N->getValueType(0),
- WideSETCC, Zero);
+ // Convert the result mask to the correct kind.
+ return DAG.getAnyExtOrTrunc(CC, dl, N->getValueType(0));
}
--- /dev/null
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that a <4 x float> compare is generated and that we are
+; not stuck in an endless loop.
+
+; CHECK: cmp_2_floats
+; CHECK: cmpordps
+; CHECK: ret
+
+define void @cmp_2_floats() {
+entry:
+ %0 = fcmp oeq <2 x float> undef, undef
+ %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+ store <2 x float> %1, <2 x float>* undef
+ ret void
+}
+
+; CHECK: cmp_2_doubles
+; CHECK: cmpordpd
+; CHECK: blendvpd
+; CHECK: ret
+define void @cmp_2_doubles() {
+entry:
+ %0 = fcmp oeq <2 x double> undef, undef
+ %1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
+ store <2 x double> %1, <2 x double>* undef
+ ret void
+}