[X86] Make a code path in EltsFromConsecutiveLoads work only on vectors it expects

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 21d1e03379f891d85d324ef0a014184eb6074a9e..955cdce550867e56457b54dda4c4cb607949dfb1 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6023,7 +6023,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
  
      return NewLd;
    }
-  if (NumElems == 4 && LastLoadedElt == 1 &&
+  
+  //TODO: The code below fires only for for loading the low v2i32 / v2f32
+  //of a v4i32 / v4f32. It's probably worth generalizing.
+  if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
        DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
      SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
      SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll

index 551ba616cce1cd26d341932da2a17647175eadce..af4d6fa61fd878c23390cb10b340e65577d2fb65 100644 (file)
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -14,6 +14,25 @@ define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly {
  ; ALL-NEXT: retq
  }
  
+; Test-case generated due to a crash when trying to treat loading the first
+; two i64s of a <4 x i64> as a load of two i32s.
+define <4 x i64> @merge_2_floats_into_4() {
+  %1 = load i64** undef, align 8
+  %2 = getelementptr inbounds i64* %1, i64 0
+  %3 = load i64* %2
+  %4 = insertelement <4 x i64> undef, i64 %3, i32 0
+  %5 = load i64** undef, align 8
+  %6 = getelementptr inbounds i64* %5, i64 1
+  %7 = load i64* %6
+  %8 = insertelement <4 x i64> %4, i64 %7, i32 1
+  %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i64> %9
+  
+; ALL-LABEL: merge_2_floats_into_4
+; ALL: vmovups
+; ALL-NEXT: retq
+}
+
  define <4 x float> @merge_4_floats(float* %ptr) {
    %a = load float* %ptr, align 8
    %vec = insertelement <4 x float> undef, float %a, i32 0
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Wed, 10 Dec 2014 08:46:12 +0000 (08:46 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vec_loadsingles.ll		patch \| blob \| history