Parent->getOpcode() != ISD::PREFETCH &&
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores.
- Parent->getOpcode() != X86ISD::VZEXT_LOAD &&
Parent->getOpcode() != X86ISD::FLD &&
Parent->getOpcode() != X86ISD::FILD &&
Parent->getOpcode() != X86ISD::FILD_FLAG &&
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- DebugLoc &dl, SelectionDAG &DAG) {
+ DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+ Ops, 2, MVT::i32,
+ LDBase->getMemOperand());
+ return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode);
}
return SDValue();
}
// VZEXT_MOVL - Vector move low and zero extend.
VZEXT_MOVL,
- // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
// LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
LCMPXCHG_DAG,
- LCMPXCHG8_DAG
+ LCMPXCHG8_DAG,
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD
+
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
// thought as target memory ops!
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
define i32 @test1() nounwind readonly {
entry:
; X64: test2:
; X64: callq *%gs:(%rdi)
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+ %0 = load i64 addrspace(256)* %p
+ %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+ %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+ %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+
+; X32: pmovsxwd_1:
+; X32: movl 4(%esp), %eax
+; X32: pmovsxwd %gs:(%eax), %xmm0
+; X32: ret
+
+; X64: pmovsxwd_1:
+; X64: pmovsxwd %gs:(%rdi), %xmm0
+; X64: ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: jne
+; CHECK: je
; widening select v6i32 and then a sub