From 810605370d53b5ded5243df2ca8bcdbb3ed04047 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 8 Jan 2016 01:39:16 +0000 Subject: [PATCH] [InstCombine] insert a new shuffle in a safe place (PR25999) Limit this transform to a basic block and guard against PHIs. Hopefully, this fixes the remaining failures in PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257133 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineVectorOps.cpp | 17 +++---- .../InstCombine/insert-extract-shuffle.ll | 50 +++++++++++++++++++ 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 54a9fbdbe82..5cde31a9162 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt, ConstantVector::get(ExtendMask)); // Insert the new shuffle after the vector operand of the extract is defined - // or at the start of the basic block, so any subsequent extracts can use it. - bool ReplaceAllExtUsers; - if (auto *ExtVecOpInst = dyn_cast(ExtVecOp)) { + // (as long as it's not a PHI) or at the start of the basic block of the + // extract, so any subsequent extracts in the same basic block can use it. + // TODO: Insert before the earliest ExtractElementInst that is replaced. + auto *ExtVecOpInst = dyn_cast(ExtVecOp); + if (ExtVecOpInst && !isa(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); - ReplaceAllExtUsers = true; - } else { - // TODO: Insert at start of function, so it's always safe to replace all? + else IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); - ReplaceAllExtUsers = false; - } // Replace extracts from the original narrow vector with extracts from the new // wide vector. for (User *U : ExtVecOp->users()) { ExtractElementInst *OldExt = dyn_cast(U); - if (!OldExt || - (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent())) + if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); NewExt->insertAfter(WideVec); diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll index 4223660db3d..47c2a139a47 100644 --- a/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -125,3 +125,53 @@ end: ret <8 x i16> %t6 } +; The widening shuffle must be inserted at a valid point (after the PHIs). + +define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @pr25999_phis1( +; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] +; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %tmp1, <2 x double> undef, <4 x i32> +; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> +; CHECK-NEXT: ret <4 x double> %tmp4 +bb1: + br i1 %c, label %bb2, label %bb3 + +bb2: + %r = call <2 x double> @dummy(<2 x double> %a) + br label %bb3 + +bb3: + %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] + %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] + %tmp3 = extractelement <2 x double> %tmp1, i32 0 + %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2 + ret <4 x double> %tmp4 +} + +declare <2 x double> @dummy(<2 x double>) + +define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @pr25999_phis2( +; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] +; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] +; CHECK-NEXT: %d = fadd <2 x double> %tmp1, %tmp1 +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %d, <2 x double> undef, <4 x i32> +; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> +; CHECK-NEXT: ret <4 x double> %tmp4 +bb1: + br i1 %c, label %bb2, label %bb3 + +bb2: + %r = call <2 x double> @dummy(<2 x double> %a) + br label %bb3 + +bb3: + %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] + %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] + %d = fadd <2 x double> %tmp1, %tmp1 + %tmp3 = extractelement <2 x double> %d, i32 0 + %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2 + ret <4 x double> %tmp4 +} + -- 2.34.1