From 2bce43140280644b88443f306a2858a5718a89f5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 10 Dec 2015 17:09:28 +0000 Subject: [PATCH] [InstCombine] fold bitcasts around an extractelement (3rd try) This is a redo of r255137 (reverted at r255227) which was a redo of r255124 (reverted at r255126) with a fixed check for a scalar source type and an added test for the failure that caused the revert. Original commit message: Example: bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float ---> extractelement <2 x float> %X, i32 1 This is part of fixing PR25543: https://llvm.org/bugs/show_bug.cgi?id=25543 The next step will be to generalize this fold: trunc ( lshr ( bitcast X) ) -> extractelement (X) Ie, I'm hoping to replace the existing transform of: bitcast ( trunc ( lshr ( bitcast X))) added by: http://reviews.llvm.org/rL112232 with 2 less specific transforms to catch the case in the bug report. Differential Revision: http://reviews.llvm.org/D14879 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255261 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCasts.cpp | 39 +++++++++++++++++ test/Transforms/InstCombine/bitcast.ll | 42 +++++++++++++++---- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 4afe1bb243f..dcd86db036b 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1715,6 +1715,42 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, return Result; } +/// Given a bitcasted source operand fed into an extract element instruction and +/// then bitcasted again to a scalar type, eliminate at least one bitcast by +/// changing the vector type of the extractelement instruction. +/// Example: +/// bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float +/// ---> +/// extractelement <2 x float> %X, i32 1 +static Instruction *foldBitCastExtElt(BitCastInst &BitCast, InstCombiner &IC, + const DataLayout &DL) { + Type *DestType = BitCast.getType(); + if (DestType->isVectorTy()) + return nullptr; + + // TODO: Create and use a pattern matcher for ExtractElementInst. + auto *ExtElt = dyn_cast(BitCast.getOperand(0)); + if (!ExtElt || !ExtElt->hasOneUse()) + return nullptr; + + Value *InnerBitCast = nullptr; + if (!match(ExtElt->getOperand(0), m_BitCast(m_Value(InnerBitCast)))) + return nullptr; + + // If the source is not a vector or its element type doesn't match the result + // type, bitcast it to a vector type that we can extract from. + Type *SourceType = InnerBitCast->getType(); + if (SourceType->getScalarType() != DestType) { + unsigned VecWidth = SourceType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned NumElts = VecWidth / DestWidth; + SourceType = VectorType::get(DestType, NumElts); + InnerBitCast = IC.Builder->CreateBitCast(InnerBitCast, SourceType, "bc"); + } + + return ExtractElementInst::Create(InnerBitCast, ExtElt->getOperand(1)); +} + static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy, unsigned ShiftAmt, InstCombiner &IC, const DataLayout &DL) { @@ -1886,6 +1922,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } + if (Instruction *I = foldBitCastExtElt(CI, *this, DL)) + return I; + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll index 2a8194e5303..bccd19cc32e 100644 --- a/test/Transforms/InstCombine/bitcast.ll +++ b/test/Transforms/InstCombine/bitcast.ll @@ -64,7 +64,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) { ; CHECK-NEXT: ret float %add } -; TODO: Both bitcasts are unnecessary; change the extractelement. +; Both bitcasts are unnecessary; change the extractelement. define float @bitcast_extelt1(<2 x float> %A) { %bc1 = bitcast <2 x float> %A to <2 x i32> @@ -73,13 +73,11 @@ define float @bitcast_extelt1(<2 x float> %A) { ret float %bc2 ; CHECK-LABEL: @bitcast_extelt1( -; CHECK-NEXT: %bc1 = bitcast <2 x float> %A to <2 x i32> -; CHECK-NEXT: %ext = extractelement <2 x i32> %bc1, i32 0 -; CHECK-NEXT: %bc2 = bitcast i32 %ext to float +; CHECK-NEXT: %bc2 = extractelement <2 x float> %A, i32 0 ; CHECK-NEXT: ret float %bc2 } -; TODO: Second bitcast can be folded into the first. +; Second bitcast can be folded into the first. define i64 @bitcast_extelt2(<4 x float> %A) { %bc1 = bitcast <4 x float> %A to <2 x double> @@ -88,12 +86,40 @@ define i64 @bitcast_extelt2(<4 x float> %A) { ret i64 %bc2 ; CHECK-LABEL: @bitcast_extelt2( -; CHECK-NEXT: %bc1 = bitcast <4 x float> %A to <2 x double> -; CHECK-NEXT: %ext = extractelement <2 x double> %bc1, i32 1 -; CHECK-NEXT: %bc2 = bitcast double %ext to i64 +; CHECK-NEXT: %bc = bitcast <4 x float> %A to <2 x i64> +; CHECK-NEXT: %bc2 = extractelement <2 x i64> %bc, i32 1 ; CHECK-NEXT: ret i64 %bc2 } +; TODO: This should return %A. + +define <2 x i32> @bitcast_extelt3(<2 x i32> %A) { + %bc1 = bitcast <2 x i32> %A to <1 x i64> + %ext = extractelement <1 x i64> %bc1, i32 0 + %bc2 = bitcast i64 %ext to <2 x i32> + ret <2 x i32> %bc2 + +; CHECK-LABEL: @bitcast_extelt3( +; CHECK-NEXT: %bc1 = bitcast <2 x i32> %A to <1 x i64> +; CHECK-NEXT: %ext = extractelement <1 x i64> %bc1, i32 0 +; CHECK-NEXT: %bc2 = bitcast i64 %ext to <2 x i32> +; CHECK-NEXT: ret <2 x i32> %bc2 +} + +; Handle the case where the input is not a vector. + +define double @bitcast_extelt4(i128 %A) { + %bc1 = bitcast i128 %A to <2 x i64> + %ext = extractelement <2 x i64> %bc1, i32 0 + %bc2 = bitcast i64 %ext to double + ret double %bc2 + +; CHECK-LABEL: @bitcast_extelt4( +; CHECK-NEXT: %bc = bitcast i128 %A to <2 x double> +; CHECK-NEXT: %bc2 = extractelement <2 x double> %bc, i32 0 +; CHECK-NEXT: ret double %bc2 +} + define <2 x i32> @test4(i32 %A, i32 %B){ %tmp38 = zext i32 %A to i64 %tmp32 = zext i32 %B to i64 -- 2.34.1