From 89db49fb9b078c3442c1fee6c1385760c7287bfd Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 10 Dec 2014 08:46:12 +0000 Subject: [PATCH] [X86] Make a code path in EltsFromConsecutiveLoads work only on vectors it expects EltsFromConsecutiveLoads was apparently only ever called for 128-bit vectors, and assumed this implicitly. r223518 started calling it for AVX-sized vectors, causing the code path that had this assumption to crash. This adds a check to make this path fire only for 128-bit vectors. Differential Revision: http://reviews.llvm.org/D6579 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 ++++- test/CodeGen/X86/vec_loadsingles.ll | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 21d1e03379f..955cdce5508 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6023,7 +6023,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, return NewLd; } - if (NumElems == 4 && LastLoadedElt == 1 && + + //TODO: The code below fires only for for loading the low v2i32 / v2f32 + //of a v4i32 / v4f32. It's probably worth generalizing. + if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) && DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll index 551ba616cce..af4d6fa61fd 100644 --- a/test/CodeGen/X86/vec_loadsingles.ll +++ b/test/CodeGen/X86/vec_loadsingles.ll @@ -14,6 +14,25 @@ define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly { ; ALL-NEXT: retq } +; Test-case generated due to a crash when trying to treat loading the first +; two i64s of a <4 x i64> as a load of two i32s. +define <4 x i64> @merge_2_floats_into_4() { + %1 = load i64** undef, align 8 + %2 = getelementptr inbounds i64* %1, i64 0 + %3 = load i64* %2 + %4 = insertelement <4 x i64> undef, i64 %3, i32 0 + %5 = load i64** undef, align 8 + %6 = getelementptr inbounds i64* %5, i64 1 + %7 = load i64* %6 + %8 = insertelement <4 x i64> %4, i64 %7, i32 1 + %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> + ret <4 x i64> %9 + +; ALL-LABEL: merge_2_floats_into_4 +; ALL: vmovups +; ALL-NEXT: retq +} + define <4 x float> @merge_4_floats(float* %ptr) { %a = load float* %ptr, align 8 %vec = insertelement <4 x float> undef, float %a, i32 0 -- 2.34.1