From: Juergen Ributzka Date: Wed, 17 Sep 2014 21:55:55 +0000 (+0000) Subject: [FastISel][AArch64] Custom lower sdiv by power-of-2. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7516444a2697991b3f3018310550c0e502733a02;p=oota-llvm.git [FastISel][AArch64] Custom lower sdiv by power-of-2. Emit an optimized instruction sequence for sdiv by power-of-2 depending on the exact flag. This fixes rdar://problem/18224511. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217986 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 33d17ef704f..347e0364d1d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -133,6 +133,7 @@ private: bool selectShift(const Instruction *I); bool selectBitCast(const Instruction *I); bool selectFRem(const Instruction *I); + bool selectSDiv(const Instruction *I); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) { return true; } +bool AArch64FastISel::selectSDiv(const Instruction *I) { + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + if (!isa(I->getOperand(1))) + return selectBinaryOp(I, ISD::SDIV); + + const APInt &C = cast(I->getOperand(1))->getValue(); + if ((VT != MVT::i32 && VT != MVT::i64) || !C || + !(C.isPowerOf2() || (-C).isPowerOf2())) + return selectBinaryOp(I, ISD::SDIV); + + unsigned Lg2 = C.countTrailingZeros(); + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + if (!Src0Reg) + return false; + bool Src0IsKill = hasTrivialKill(I->getOperand(0)); + + if (cast(I)->isExact()) { + unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); + if (!ResultReg) + return false; + updateValueMap(I, ResultReg); + return true; + } + + unsigned Pow2MinusOne = (1 << Lg2) - 1; + unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg, + /*IsKill=*/false, Pow2MinusOne); + if (!AddReg) + return false; + + // (Src0 < 0) ? Pow2 - 1 : 0; + if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) + return false; + + unsigned SelectOpc; + const TargetRegisterClass *RC; + if (VT == MVT::i64) { + SelectOpc = AArch64::CSELXr; + RC = &AArch64::GPR64RegClass; + } else { + SelectOpc = AArch64::CSELWr; + RC = &AArch64::GPR32RegClass; + } + unsigned SelectReg = + fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, + Src0IsKill, AArch64CC::LT); + if (!SelectReg) + return false; + + // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also + // negate the result. + unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + unsigned ResultReg; + if (C.isNegative()) + ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, + SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); + else + ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); + + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: @@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectAddSub(I); case Instruction::Mul: return selectMul(I); + case Instruction::SDiv: + return selectSDiv(I); case Instruction::SRem: if (!selectBinaryOp(I, ISD::SREM)) return selectRem(I, ISD::SREM); diff --git a/test/CodeGen/AArch64/fast-isel-sdiv.ll b/test/CodeGen/AArch64/fast-isel-sdiv.ll new file mode 100644 index 00000000000..30807767fa7 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-sdiv.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s + +define i32 @sdiv_i32_exact(i32 %a) { +; CHECK-LABEL: sdiv_i32_exact +; CHECK: asr {{w[0-9]+}}, w0, #3 + %1 = sdiv exact i32 %a, 8 + ret i32 %1 +} + +define i32 @sdiv_i32_pos(i32 %a) { +; CHECK-LABEL: sdiv_i32_pos +; CHECK: add [[REG1:w[0-9]+]], w0, #7 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt +; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3 + %1 = sdiv i32 %a, 8 + ret i32 %1 +} + +define i32 @sdiv_i32_neg(i32 %a) { +; CHECK-LABEL: sdiv_i32_neg +; CHECK: add [[REG1:w[0-9]+]], w0, #7 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt +; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3 + %1 = sdiv i32 %a, -8 + ret i32 %1 +} + +define i64 @sdiv_i64_exact(i64 %a) { +; CHECK-LABEL: sdiv_i64_exact +; CHECK: asr {{x[0-9]+}}, x0, #4 + %1 = sdiv exact i64 %a, 16 + ret i64 %1 +} + +define i64 @sdiv_i64_pos(i64 %a) { +; CHECK-LABEL: sdiv_i64_pos +; CHECK: add [[REG1:x[0-9]+]], x0, #15 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt +; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4 + %1 = sdiv i64 %a, 16 + ret i64 %1 +} + +define i64 @sdiv_i64_neg(i64 %a) { +; CHECK-LABEL: sdiv_i64_neg +; CHECK: add [[REG1:x[0-9]+]], x0, #15 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt +; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4 + %1 = sdiv i64 %a, -16 + ret i64 %1 +}