From fd48a80e144fdca6e5c7e6091942f70c2ea4ea8f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 Mar 2015 21:09:33 +0000 Subject: [PATCH] [SimplifyLibCalls] Turn memchr(const, C, const) into a bitfield check. strchr("123!", C) != nullptr is a common pattern to check if C is one of 1, 2, 3 or !. If the largest element of the string is smaller than the target's register size we can easily create a bitfield and just do a simple test for set membership. int foo(char C) { return strchr("123!", C) != nullptr; } now becomes cmpl $64, %edi ## range check sbbb %al, %al movabsq $0xE000200000001, %rcx btq %rdi, %rcx ## bit test sbbb %cl, %cl andb %al, %cl ## and the two conditions andb $1, %cl movzbl %cl, %eax ## returning an int ret (imho the backend should expand this into a series of branches, but that's a different story) The code is currently limited to bit fields that fit in a register, so usually 64 or 32 bits. Sadly, this misses anything using alpha chars or {}. This could be fixed by just emitting a i128 bit field, but that can generate really ugly code so we have to find a better way. To some degree this is also recreating switch lowering logic, but we can't simply emit a switch instruction and thus change the CFG within instcombine. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/README.txt | 3 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 54 ++++++++++++++++-- test/Transforms/InstCombine/memchr.ll | 69 ++++++++++++++++++++++- test/Transforms/InstCombine/strchr-1.ll | 19 ++++++- 4 files changed, 138 insertions(+), 7 deletions(-) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 0fa56e66747..52a334a6174 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1268,7 +1268,8 @@ int foo (void) { .. else if (strchr ("<>", *intel_parser.op_string) -Those should be turned into a switch. +Those should be turned into a switch. SimplifyLibCalls only gets the second +case. //===---------------------------------------------------------------------===// diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 0d0b77a6a3e..f6cc431656b 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -762,11 +762,9 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { if (LenC && LenC->isNullValue()) return Constant::getNullValue(CI->getType()); - // Check if all arguments are constants. If so, we can constant fold. + // From now on we need at least constant length and string. StringRef Str; - if (!CharC || !LenC || - !getConstantStringInfo(SrcStr, Str, /*Offset=*/0, - /*TrimAtNul=*/false)) + if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) return nullptr; // Truncate the string to LenC. If Str is smaller than LenC we will still only @@ -774,6 +772,54 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { // return null if we don't find the char. Str = Str.substr(0, LenC->getZExtValue()); + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // after bounds check. + if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + unsigned char Max = *std::max_element(Str.begin(), Str.end()); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // First check that the bit field access is within bounds. + Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + } + + // Check if all arguments are constants. If so, we can constant fold. + if (!CharC) + return nullptr; + // Compute the offset. size_t I = Str.find(CharC->getSExtValue() & 0xFF); if (I == StringRef::npos) // Didn't find the char. memchr returns null. diff --git a/test/Transforms/InstCombine/memchr.ll b/test/Transforms/InstCombine/memchr.ll index 4cadbfa8eca..6783249877d 100644 --- a/test/Transforms/InstCombine/memchr.ll +++ b/test/Transforms/InstCombine/memchr.ll @@ -1,11 +1,14 @@ ; Test that the memchr library call simplifier works correctly. ; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" @hello = constant [14 x i8] c"hello world\5Cn\00" @hellonull = constant [14 x i8] c"hello\00world\5Cn\00" @null = constant [1 x i8] zeroinitializer +@newlines = constant [3 x i8] c"\0D\0A\00" +@single = constant [2 x i8] c"\1F\00" +@spaces = constant [4 x i8] c" \0D\0A\00" @chp = global i8* zeroinitializer declare i8* @memchr(i8*, i32, i32) @@ -119,3 +122,67 @@ define void @test9() { store i8* %dst, i8** @chp ret void } + +define void @test10() { +; CHECK-LABEL: @test10 +; CHECK: store i8* null, i8** @chp, align 4 +; CHECK-NOT: call i8* @memchr +; CHECK: ret void + + %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0 + %dst = call i8* @memchr(i8* %str, i32 119, i32 6) + store i8* %dst, i8** @chp + ret void +} + +; Check transformation memchr("\r\n", C, 2) != nullptr -> (C & 9216) != 0 +define i1 @test11(i32 %C) { +; CHECK-LABEL: @test11 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 %C to i16 +; CHECK-NEXT: %memchr.bounds = icmp ult i16 [[TRUNC]], 16 +; CHECK-NEXT: [[SHL:%.*]] = shl i16 1, [[TRUNC]] +; CHECK-NEXT: [[AND:%.*]] = and i16 [[SHL]], 9216 +; CHECK-NEXT: %memchr.bits = icmp ne i16 [[AND]], 0 +; CHECK-NEXT: %memchr = and i1 %memchr.bounds, %memchr.bits +; CHECK-NEXT: ret i1 %memchr + + %dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @newlines, i64 0, i64 0), i32 %C, i32 2) + %cmp = icmp ne i8* %dst, null + ret i1 %cmp +} + +; No 64 bits here +define i1 @test12(i32 %C) { +; CHECK-LABEL: @test12 +; CHECK-NEXT: %dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i32 0, i32 0), i32 %C, i32 3) +; CHECK-NEXT: %cmp = icmp ne i8* %dst, null +; CHECK-NEXT: ret i1 %cmp + + %dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i64 0, i64 0), i32 %C, i32 3) + %cmp = icmp ne i8* %dst, null + ret i1 %cmp +} + +define i1 @test13(i32 %C) { +; CHECK-LABEL: @test13 +; CHECK-NEXT: %memchr.bounds = icmp ult i32 %C, 32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %C +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -2147483647 +; CHECK-NEXT: %memchr.bits = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: %memchr = and i1 %memchr.bounds, %memchr.bits +; CHECK-NEXT: ret i1 %memchr + + %dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 2) + %cmp = icmp ne i8* %dst, null + ret i1 %cmp +} + +define i1 @test14(i32 %C) { +; CHECK-LABEL: @test14 +; CHECK-NEXT: icmp eq i32 %C, 31 +; CHECK-NEXT: ret + + %dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 1) + %cmp = icmp ne i8* %dst, null + ret i1 %cmp +} diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll index 27c9a59fa3b..6c10ebdfcc4 100644 --- a/test/Transforms/InstCombine/strchr-1.ll +++ b/test/Transforms/InstCombine/strchr-1.ll @@ -1,10 +1,11 @@ ; Test that the strchr library call simplifier works correctly. ; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" @hello = constant [14 x i8] c"hello world\5Cn\00" @null = constant [1 x i8] zeroinitializer +@newlines = constant [3 x i8] c"\0D\0A\00" @chp = global i8* zeroinitializer declare i8* @strchr(i8*, i32) @@ -76,3 +77,19 @@ define void @test_simplify6(i8* %str) { store i8* %dst, i8** @chp ret void } + +; Check transformation strchr("\r\n", C) != nullptr -> (C & 9217) != 0 +define i1 @test_simplify7(i32 %C) { +; CHECK-LABEL: @test_simplify7 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 %C to i16 +; CHECK-NEXT: %memchr.bounds = icmp ult i16 [[TRUNC]], 16 +; CHECK-NEXT: [[SHL:%.*]] = shl i16 1, [[TRUNC]] +; CHECK-NEXT: [[AND:%.*]] = and i16 [[SHL]], 9217 +; CHECK-NEXT: %memchr.bits = icmp ne i16 [[AND]], 0 +; CHECK-NEXT: %memchr1 = and i1 %memchr.bounds, %memchr.bits +; CHECK-NEXT: ret i1 %memchr1 + + %dst = call i8* @strchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @newlines, i64 0, i64 0), i32 %C) + %cmp = icmp ne i8* %dst, null + ret i1 %cmp +} -- 2.34.1