EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT))
+ return SDValue();
+
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
// Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway.
- SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align);
+ SDValue Load;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ } else
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load.getValue(0), Load.getValue(1) };
--- /dev/null
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: ldrh [[REG:r[0-9]+]]
+; CHECK: strh [[REG]]
+ %0 = load <3 x i16> * %srcA, align 8
+ %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+ store <2 x i16> %1, <2 x i16> * %dst, align 4
+ ret void
+}
+