From 944d86558eae35e40e9f8a6bbfd626bea939abf5 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 27 Jan 2015 15:58:14 +0000 Subject: [PATCH] [InstCombine] Teach how to fold a select into a cttz/ctlz with the 'is_zero_undef' flag. This patch teaches the Instruction Combiner how to fold a cttz/ctlz followed by a icmp plus select into a single cttz/ctlz with flag 'is_zero_undef' cleared. Added test InstCombine/select-cmp-cttz-ctlz.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227197 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineSelect.cpp | 63 ++++ test/Transforms/InstCombine/intrinsics.ll | 6 +- .../InstCombine/select-cmp-cttz-ctlz.ll | 300 ++++++++++++++++++ 3 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 45d51210baf..69380fc41d3 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -437,6 +437,66 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, return Builder->CreateOr(V, Y); } +/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single +/// call to cttz/ctlz with flag 'is_zero_undef' cleared. +/// +/// For example, we can fold the following code sequence: +/// \code +/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) +/// %1 = icmp ne i32 %x, 0 +/// %2 = select i1 %1, i32 %0, i32 32 +/// \code +/// +/// into: +/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, + InstCombiner::BuilderTy *Builder) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + + // Check if the condition value compares a value for equality against zero. + if (!ICI->isEquality() || !match(CmpRHS, m_Zero())) + return nullptr; + + Value *Count = FalseVal; + Value *ValueOnZero = TrueVal; + if (Pred == ICmpInst::ICMP_NE) + std::swap(Count, ValueOnZero); + + // Skip zero extend/truncate. + Value *V = nullptr; + if (match(Count, m_ZExt(m_Value(V))) || + match(Count, m_Trunc(m_Value(V)))) + Count = V; + + // Check if the value propagated on zero is a constant number equal to the + // sizeof in bits of 'Count'. + unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits(); + if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits))) + return nullptr; + + // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the + // input to the cttz/ctlz is used as LHS for the compare instruction. + if (match(Count, m_Intrinsic(m_Specific(CmpLHS))) || + match(Count, m_Intrinsic(m_Specific(CmpLHS)))) { + IntrinsicInst *II = cast(Count); + IRBuilder<> Builder(II); + if (cast(II->getArgOperand(1))->isOne()) { + // Explicitly clear the 'undef_on_zero' flag. + IntrinsicInst *NewI = cast(II->clone()); + Type *Ty = NewI->getArgOperand(1)->getType(); + NewI->setArgOperand(1, Constant::getNullValue(Ty)); + Builder.Insert(NewI); + Count = NewI; + } + + return Builder.CreateZExtOrTrunc(Count, ValueOnZero->getType()); + } + + return nullptr; +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -665,6 +725,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return ReplaceInstUsesWith(SI, V); + if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) + return ReplaceInstUsesWith(SI, V); + return Changed ? &SI : nullptr; } diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 8e7742f8c34..2791adf8feb 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -349,7 +349,8 @@ define i32 @ctlz_select(i32 %Value) nounwind { ret i32 %s ; CHECK-LABEL: @ctlz_select( -; CHECK: select i1 %tobool, i32 %ctlz, i32 32 +; CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 %Value, i1 false) +; CHECK-NEXT: ret i32 } define i32 @cttz_select(i32 %Value) nounwind { @@ -359,5 +360,6 @@ define i32 @cttz_select(i32 %Value) nounwind { ret i32 %s ; CHECK-LABEL: @cttz_select( -; CHECK: select i1 %tobool, i32 %cttz, i32 32 +; CHECK-NEXT: call i32 @llvm.cttz.i32(i32 %Value, i1 false) +; CHECK-NEXT: ret i32 } diff --git a/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll new file mode 100644 index 00000000000..390e6bc51f8 --- /dev/null +++ b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll @@ -0,0 +1,300 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; This test is to verify that the instruction combiner is able to fold +; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with +; the 'is_zero_undef' flag cleared. + +define i16 @test1(i16 %x) { +; CHECK-LABEL: @test1( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i16 %0, i16 16 + ret i16 %cond +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: @test2( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i32 %0, i32 32 + ret i32 %cond +} + +define i64 @test3(i64 %x) { +; CHECK-LABEL: @test3( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i64 %0, i64 64 + ret i64 %cond +} + +define i16 @test4(i16 %x) { +; CHECK-LABEL: @test4( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %tobool = icmp eq i16 %x, 0 + %cond = select i1 %tobool, i16 16, i16 %0 + ret i16 %cond +} + +define i32 @test5(i32 %x) { +; CHECK-LABEL: @test5( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %tobool = icmp eq i32 %x, 0 + %cond = select i1 %tobool, i32 32, i32 %0 + ret i32 %cond +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: @test6( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %tobool = icmp eq i64 %x, 0 + %cond = select i1 %tobool, i64 64, i64 %0 + ret i64 %cond +} + +define i16 @test1b(i16 %x) { +; CHECK-LABEL: @test1b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i16 %0, i16 16 + ret i16 %cond +} + +define i32 @test2b(i32 %x) { +; CHECK-LABEL: @test2b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i32 %0, i32 32 + ret i32 %cond +} + +define i64 @test3b(i64 %x) { +; CHECK-LABEL: @test3b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i64 %0, i64 64 + ret i64 %cond +} + +define i16 @test4b(i16 %x) { +; CHECK-LABEL: @test4b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %tobool = icmp eq i16 %x, 0 + %cond = select i1 %tobool, i16 16, i16 %0 + ret i16 %cond +} + +define i32 @test5b(i32 %x) { +; CHECK-LABEL: @test5b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %tobool = icmp eq i32 %x, 0 + %cond = select i1 %tobool, i32 32, i32 %0 + ret i32 %cond +} + +define i64 @test6b(i64 %x) { +; CHECK-LABEL: @test6b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %tobool = icmp eq i64 %x, 0 + %cond = select i1 %tobool, i64 64, i64 %0 + ret i64 %cond +} + +define i32 @test1c(i16 %x) { +; CHECK-LABEL: @test1c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %cast2 = zext i16 %0 to i32 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i32 %cast2, i32 16 + ret i32 %cond +} + +define i64 @test2c(i16 %x) { +; CHECK-LABEL: @test2c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %conv = zext i16 %0 to i64 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i64 %conv, i64 16 + ret i64 %cond +} + +define i64 @test3c(i32 %x) { +; CHECK-LABEL: @test3c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %conv = zext i32 %0 to i64 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i64 %conv, i64 32 + ret i64 %cond +} + +define i32 @test4c(i16 %x) { +; CHECK-LABEL: @test4c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %cast = zext i16 %0 to i32 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 16 + ret i32 %cond +} + +define i64 @test5c(i16 %x) { +; CHECK-LABEL: @test5c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %cast = zext i16 %0 to i64 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i64 %cast, i64 16 + ret i64 %cond +} + +define i64 @test6c(i32 %x) { +; CHECK-LABEL: @test6c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %cast = zext i32 %0 to i64 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i64 %cast, i64 32 + ret i64 %cond +} + +define i16 @test1d(i64 %x) { +; CHECK-LABEL: @test1d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %conv = trunc i64 %0 to i16 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i16 %conv, i16 64 + ret i16 %cond +} + +define i32 @test2d(i64 %x) { +; CHECK-LABEL: @test2d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 64 + ret i32 %cond +} + +define i16 @test3d(i32 %x) { +; CHECK-LABEL: @test3d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 32 + ret i16 %cond +} + +define i16 @test4d(i64 %x) { +; CHECK-LABEL: @test4d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i16 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 64 + ret i16 %cond +} + +define i32 @test5d(i64 %x) { +; CHECK-LABEL: @test5d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 64 + ret i32 %cond +} + +define i16 @test6d(i32 %x) { +; CHECK-LABEL: @test6d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 32 + ret i16 %cond +} + +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1) -- 2.34.1