int EltNo = (Offset - StartOffset) >> 2;
int NumElems = VT.getVectorNumElements();
- EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
- // Canonicalize it to a v4i32 or v8i32 shuffle.
SmallVector<int, 8> Mask;
for (int i = 0; i < NumElems; ++i)
Mask.push_back(EltNo);
- V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
- return DAG.getNode(ISD::BITCAST, dl, NVT,
- DAG.getVectorShuffle(CanonVT, dl, V1,
- DAG.getUNDEF(CanonVT),&Mask[0]));
+ return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
return SDValue();
def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
(i8 imm:$imm))),
(VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
(VPSHUFDmi addr:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
(VPSHUFDri VR128:$src1, imm:$imm)>;
def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
(i8 imm:$imm))),
(PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
(PSHUFDmi addr:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
(PSHUFDri VR128:$src1, imm:$imm)>;
%r = extractelement <8 x i32> %b, i32 2
ret i32 %r
}
+
+define <4 x float> @test11(<4 x float> %a) nounwind {
+; CHECK: pshufd $27
+ %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind {
+; CHECK: pshufd $27, (
+ %tmp0 = load <4 x float>* %a
+ %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x float> %tmp1
+}