From 488fbfc69e02c529ba7eeeb633a4e499fd6523c4 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 9 Sep 2008 18:11:14 +0000 Subject: [PATCH] Make SimplifyDemandedVectorElts simplify vectors with multiple users, and teach it about shufflevector instructions. Also, fix a subtle bug in SimplifyDemandedVectorElts' insertelement code. This is a patch that was originally written by Eli Friedman, with some fixes and cleanup by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55995 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/InstructionCombining.cpp | 116 +++++++++++++----- test/Transforms/InstCombine/pr2645.ll | 33 +++++ 2 files changed, 115 insertions(+), 34 deletions(-) create mode 100644 test/Transforms/InstCombine/pr2645.ll diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 6b25f52a818..3ec97dd9d11 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -1355,8 +1355,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, unsigned VWidth = cast(V->getType())->getNumElements(); assert(VWidth <= 64 && "Vector too wide to analyze!"); uint64_t EltMask = ~0ULL >> (64-VWidth); - assert(DemandedElts != EltMask && (DemandedElts & ~EltMask) == 0 && - "Invalid DemandedElts!"); + assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); if (isa(V)) { // If the entire vector is undefined, just return this info. @@ -1400,14 +1399,23 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, return ConstantVector::get(Elts); } - if (!V->hasOneUse()) { // Other users may use these bits. - if (Depth != 0) { // Not at the root. + // Limit search depth. + if (Depth == 10) + return false; + + // If multiple users are using the root value, procede with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) // TODO: Just compute the UndefElts information recursively. return false; - } - return false; - } else if (Depth == 10) { // Limit search depth. - return false; + + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; } Instruction *I = dyn_cast(V); @@ -1446,7 +1454,65 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. - UndefElts |= 1ULL << IdxNo; + UndefElts &= ~(1ULL << IdxNo); + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(I); + uint64_t LeftDemanded = 0, RightDemanded = 0; + for (unsigned i = 0; i < VWidth; i++) { + if (DemandedElts & (1ULL << i)) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal != -1u) { + assert(MaskVal < VWidth * 2 && + "shufflevector mask index out of range!"); + if (MaskVal < VWidth) + LeftDemanded |= 1ULL << MaskVal; + else + RightDemanded |= 1ULL << (MaskVal - VWidth); + } + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + uint64_t UndefElts3; + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, + UndefElts3, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + bool NewUndefElts = false; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1) { + uint64_t NewBit = 1ULL << i; + UndefElts |= NewBit; + } else if (MaskVal < VWidth) { + uint64_t NewBit = ((UndefElts2 >> MaskVal) & 1) << i; + NewUndefElts |= NewBit; + UndefElts |= NewBit; + } else { + uint64_t NewBit = ((UndefElts3 >> (MaskVal - VWidth)) & 1) << i; + NewUndefElts |= NewBit; + UndefElts |= NewBit; + } + } + + if (NewUndefElts) { + // Add additional discovered undefs. + std::vector Elts; + for (unsigned i = 0; i < VWidth; ++i) { + if (UndefElts & (1ULL << i)) + Elts.push_back(UndefValue::get(Type::Int32Ty)); + else + Elts.push_back(ConstantInt::get(Type::Int32Ty, + Shuffle->getMaskValue(i))); + } + I->setOperand(2, ConstantVector::get(Elts)); + MadeChange = true; + } break; } case Instruction::BitCast: { @@ -11224,31 +11290,13 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // Undefined shuffle mask -> undefined value. if (isa(SVI.getOperand(2))) return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - // If we have shuffle(x, undef, mask) and any elements of mask refer to - // the undef, change them to undefs. - if (isa(SVI.getOperand(1))) { - // Scan to see if there are any references to the RHS. If so, replace them - // with undef element refs and set MadeChange to true. - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e && Mask[i] != 2*e) { - Mask[i] = 2*e; - MadeChange = true; - } - } - - if (MadeChange) { - // Remap any references to RHS to use LHS. - std::vector Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] == 2*e) - Elts.push_back(UndefValue::get(Type::Int32Ty)); - else - Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i])); - } - SVI.setOperand(2, ConstantVector::get(Elts)); - } - } + + uint64_t UndefElts; + unsigned VWidth = cast(SVI.getType())->getNumElements(); + uint64_t AllOnesEltMask = ~0ULL >> (64-VWidth); + if (VWidth <= 64 && + SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) + MadeChange = true; // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). diff --git a/test/Transforms/InstCombine/pr2645.ll b/test/Transforms/InstCombine/pr2645.ll new file mode 100644 index 00000000000..04cc1852cb2 --- /dev/null +++ b/test/Transforms/InstCombine/pr2645.ll @@ -0,0 +1,33 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {insertelement <4 x float> undef} + +; Instcombine should be able to prove that none of the +; insertelement's first operand's elements are needed. + +define internal void @""(i8*) { +;