bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
+ SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp);
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
- SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
- case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+ break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
- case ISD::CTTZ:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- case ISD::FTRUNC:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FTRUNC:
+ Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+ break;
}
}
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
}
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N,
+ unsigned TruncateOp) {
// The result type is legal, but the input type is illegal. If splitting
// ends up with the result type of each half still being legal, just
// do that. If, however, that would result in an illegal result type,
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
- SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+ SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
- return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+ return DAG.getNode(TruncateOp, DL, OutVT, InterVec);
}
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
; CHECK: fptosi_v4f64_to_v4i16
-; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
-; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
-; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
-; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
-; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
+; CHECK: xtn v0.4h, v[[MID]].4s
%tmp1 = load <4 x double>, <4 x double>* %ptr
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
ret <4 x i16> %tmp2
; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
-; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
-; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
-; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
-; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV1]].4h, v[[CONV0]].4h
-; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV3]].4h, v[[CONV2]].4h
-; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
+; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
+; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s
+; CHECK: xtn v0.8b, v[[TMP1]].8h
%tmp1 = load <8 x double>, <8 x double>* %ptr
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
ret <8 x i8> %tmp2
--- /dev/null
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define <8 x i8> @float_to_i8(<8 x float>* %in) {
+; CHECK-LABEL: float_to_i8:
+; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
+; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
+; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
+; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
+; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
+; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
+ %l = load <8 x float>, <8 x float>* %in
+ %scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
+ %conv = fptoui <8 x float> %scale to <8 x i8>
+ ret <8 x i8> %conv
+}
define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
; CHECK-LABEL: fix_double_to_i16:
-; CHECK: vcvt.s32.f64
-; CHECK: vcvt.s32.f64
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
%scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
%conv = fptoui <4 x double> %scale to <4 x i16>