From: Chad Rosier Date: Fri, 28 Aug 2015 18:30:18 +0000 (+0000) Subject: Optimize memcmp(x,y,n)==0 for small n and suitably aligned x/y. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=55189b7fcaa03fc2e8ff04a58a58cdb653d0ab91;p=oota-llvm.git Optimize memcmp(x,y,n)==0 for small n and suitably aligned x/y. http://reviews.llvm.org/D6952 PR20673 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246313 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6bcbde756bc..a830c9784b9 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -862,6 +863,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return B.CreateSub(LHSV, RHSV, "chardiff"); } + // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { + + IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); + unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); + + if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && + getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { + + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + + Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + + return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); + } + } + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) StringRef LHSStr, RHSStr; if (getConstantStringInfo(LHS, LHSStr) && diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll index db15bd66b71..f9ff479e3ad 100644 --- a/test/Transforms/InstCombine/memcmp-1.ll +++ b/test/Transforms/InstCombine/memcmp-1.ll @@ -2,7 +2,7 @@ ; ; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64" @foo = constant [4 x i8] c"foo\00" @hel = constant [4 x i8] c"hel\00" @@ -70,3 +70,54 @@ define i32 @test_simplify6() { ret i32 %ret ; CHECK: ret i32 -1 } + +; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2 + +define i1 @test_simplify7(i64 %x, i64 %y) { +; CHECK-LABEL: @test_simplify7( + %x.addr = alloca i64, align 8 + %y.addr = alloca i64, align 8 + store i64 %x, i64* %x.addr, align 8 + store i64 %y, i64* %y.addr, align 8 + %xptr = bitcast i64* %x.addr to i8* + %yptr = bitcast i64* %y.addr to i8* + %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +; CHECK: %cmp = icmp eq i64 %x, %y +; CHECK: ret i1 %cmp +} + +; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2 + +define i1 @test_simplify8(i32 %x, i32 %y) { +; CHECK-LABEL: @test_simplify8( + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %xptr = bitcast i32* %x.addr to i8* + %yptr = bitcast i32* %y.addr to i8* + %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +; CHECK: %cmp = icmp eq i32 %x, %y +; CHECK: ret i1 %cmp +} + +; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2 + +define i1 @test_simplify9(i16 %x, i16 %y) { +; CHECK-LABEL: @test_simplify9( + %x.addr = alloca i16, align 2 + %y.addr = alloca i16, align 2 + store i16 %x, i16* %x.addr, align 2 + store i16 %y, i16* %y.addr, align 2 + %xptr = bitcast i16* %x.addr to i8* + %yptr = bitcast i16* %y.addr to i8* + %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +; CHECK: %cmp = icmp eq i16 %x, %y +; CHECK: ret i1 %cmp +}