From: Chris Lattner Date: Sat, 14 Apr 2007 22:29:23 +0000 (+0000) Subject: Implement InstCombine/vec_demanded_elts.ll:test2. This allows us to turn X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=69878336519f09aadf976cf1b5c73dafbb54ce6f;p=oota-llvm.git Implement InstCombine/vec_demanded_elts.ll:test2. This allows us to turn unsigned test(float f) { return _mm_cvtsi128_si32( (__m128i) _mm_set_ss( f*f )); } into: _test: movss 4(%esp), %xmm0 mulss %xmm0, %xmm0 movd %xmm0, %eax ret instead of: _test: movss 4(%esp), %xmm0 mulss %xmm0, %xmm0 xorps %xmm1, %xmm1 movss %xmm0, %xmm1 movd %xmm1, %eax ret GCC gets: _test: subl $28, %esp movss 32(%esp), %xmm0 mulss %xmm0, %xmm0 xorps %xmm1, %xmm1 movss %xmm0, %xmm1 movaps %xmm1, %xmm0 movd %xmm0, 12(%esp) movl 12(%esp), %eax addl $28, %esp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36020 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 17d2f93ca65..912ab2db608 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -1489,7 +1489,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, UndefElts |= 1ULL << IdxNo; break; } + case Instruction::BitCast: { + // Packed->packed casts only. + const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + uint64_t InputDemandedElts = 0; + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts & (1ULL << OutIdx)) + InputDemandedElts |= 1ULL << (OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts & (1ULL << InIdx/Ratio)) + InputDemandedElts |= 1ULL << InIdx; + } + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2 & (1ULL << (OutIdx/Ratio))) + UndefElts |= 1ULL << OutIdx; + } else if (VWidth < InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if ((UndefElts2 & (1ULL << InIdx)) == 0) // Not undef? + UndefElts &= ~(1ULL << (InIdx/Ratio)); // Clear undef bit. + } + break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: