setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
N->getOperand(0), ShiftCst), ShiftCst);
}
+// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
+// builtins) into loads with swaps.
+SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX load");
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ MMO = LD->getMemOperand();
+ // If the MMO suggests this isn't a load of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ Base = Intrin->getBasePtr();
+ MMO = Intrin->getMemOperand();
+ break;
+ }
+ }
+
+ MVT VecTy = N->getValueType(0).getSimpleVT();
+ SDValue LoadOps[] = { Chain, Base };
+ SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
+ DAG.getVTList(VecTy, MVT::Other),
+ LoadOps, VecTy, MMO);
+ DCI.AddToWorklist(Load.getNode());
+ Chain = Load.getValue(1);
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Load);
+ DCI.AddToWorklist(Swap.getNode());
+ return Swap;
+}
+
+// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
+// builtins) into stores with swaps.
+SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ unsigned SrcOpnd;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX store");
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ Chain = ST->getChain();
+ Base = ST->getBasePtr();
+ MMO = ST->getMemOperand();
+ SrcOpnd = 1;
+ // If the MMO suggests this isn't a store of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ // Intrin->getBasePtr() oddly does not get what we want.
+ Base = Intrin->getOperand(3);
+ MMO = Intrin->getMemOperand();
+ SrcOpnd = 2;
+ break;
+ }
+ }
+
+ SDValue Src = N->getOperand(SrcOpnd);
+ MVT VecTy = Src.getValueType().getSimpleVT();
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Src);
+ DCI.AddToWorklist(Swap.getNode());
+ Chain = Swap.getValue(1);
+ SDValue StoreOps[] = { Chain, Swap, Base };
+ SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
+ DAG.getVTList(MVT::Other),
+ StoreOps, VecTy, MMO);
+ DCI.AddToWorklist(Store.getNode());
+ return Store;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
}
}
break;
- case ISD::STORE:
+ case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
!cast<StoreSDNode>(N)->isTruncatingStore() &&
Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
+
+ // For little endian, VSX stores require generating xxswapd/lxvd2x.
+ EVT VT = N->getOperand(1).getValueType();
+ if (VT.isSimple()) {
+ MVT StoreVT = VT.getSimpleVT();
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
+ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
+ return expandVSXStoreForLE(N, DCI);
+ }
break;
+ }
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT VT = LD->getValueType(0);
+
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (VT.isSimple()) {
+ MVT LoadVT = VT.getSimpleVT();
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
+ LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
+ return expandVSXLoadForLE(N, DCI);
+ }
+
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
}
break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ return expandVSXLoadForLE(N, DCI);
+ }
+ }
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ // For little endian, VSX stores require generating xxswapd/stxvd2x.
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ return expandVSXStoreForLE(N, DCI);
+ }
+ }
+ break;
+ }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
/// operand identifies the operating system entry point.
SC,
+ /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+ /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
+ /// or stxvd2x instruction. The chain is necessary because the
+ /// sequence replaces a load and needs to provide the same number
+ /// of outputs.
+ XXSWAPD,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
/// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
/// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
- ADDI_TOC_L
+ ADDI_TOC_L,
+
+ /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to an lxvd2x instruction that will be followed by
+ /// an xxswapd.
+ LXVD2X,
+
+ /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to an stxvd2x instruction that will be preceded by
+ /// an xxswapd.
+ STXVD2X
};
}
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
+ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
let ParserMatchClass = PPCRegVSFRCAsmOperand;
}
+// Little-endian-specific nodes.
+def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
+ SDTCisSameAs<0, 1>
+]>;
+
+def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
}
def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
// Stores.
def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),