From 5e34dffb9c90316758a4c31d2fa4ccf9a243ccb5 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Thu, 21 Aug 2014 23:06:07 +0000 Subject: [PATCH] [FastISel][AArch64] Add support for variable shift. This adds the missing variable shift support for value type i8, i16, and i32. This fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 173 +++++++++++++++++++----- test/CodeGen/AArch64/fast-isel-shift.ll | 124 +++++++++++++++-- 2 files changed, 253 insertions(+), 44 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index aab2b4b01f2..39da012580b 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -128,7 +128,7 @@ private: bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); bool SelectMul(const Instruction *I); - bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic); + bool SelectShift(const Instruction *I); bool SelectBitCast(const Instruction *I); // Utility helper routines. @@ -193,9 +193,15 @@ private: unsigned Op1, bool Op1IsKill); unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm); + unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill); + unsigned emitLSL_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm); + unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill); + unsigned emitLSR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm); + unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill); + unsigned emitASR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm); unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT); unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT); @@ -703,8 +709,8 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) { Addr.getOffsetReg(), /*TODO:IsKill=*/false, Addr.getShift()); else - ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(), - /*Op0IsKill=*/false, Addr.getShift()); + ResultReg = emitLSL_ri(MVT::i64, Addr.getOffsetReg(), /*Op0IsKill=*/false, + Addr.getShift()); if (!ResultReg) return false; @@ -2366,7 +2372,7 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { if (VT == MVT::i32) { MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); - unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32); + unsigned ShiftReg = emitLSR_ri(MVT::i64, MulReg, /*IsKill=*/false, 32); MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, AArch64::sub_32); ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, @@ -2653,8 +2659,34 @@ unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, AArch64::XZR, /*IsKill=*/true); } -unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, - uint64_t Shift) { +unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill) { + unsigned Opc = 0; + bool NeedTrunc = false; + uint64_t Mask = 0; + switch (RetVT.SimpleTy) { + default: return 0; + case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; + case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; + case MVT::i32: Opc = AArch64::LSLVWr; break; + case MVT::i64: Opc = AArch64::LSLVXr; break; + } + + const TargetRegisterClass *RC = + (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + if (NeedTrunc) { + Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op1IsKill = true; + } + unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, + Op1IsKill); + if (NeedTrunc) + ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + return ResultReg; +} + +unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, + uint64_t Shift) { unsigned Opc, ImmR, ImmS; switch (RetVT.SimpleTy) { default: return 0; @@ -2673,8 +2705,35 @@ unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); } -unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, - uint64_t Shift) { +unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill) { + unsigned Opc = 0; + bool NeedTrunc = false; + uint64_t Mask = 0; + switch (RetVT.SimpleTy) { + default: return 0; + case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; + case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; + case MVT::i32: Opc = AArch64::LSRVWr; break; + case MVT::i64: Opc = AArch64::LSRVXr; break; + } + + const TargetRegisterClass *RC = + (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + if (NeedTrunc) { + Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); + Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op0IsKill = Op1IsKill = true; + } + unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, + Op1IsKill); + if (NeedTrunc) + ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + return ResultReg; +} + +unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, + uint64_t Shift) { unsigned Opc, ImmS; switch (RetVT.SimpleTy) { default: return 0; @@ -2689,8 +2748,35 @@ unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS); } -unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, - uint64_t Shift) { +unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, + unsigned Op1Reg, bool Op1IsKill) { + unsigned Opc = 0; + bool NeedTrunc = false; + uint64_t Mask = 0; + switch (RetVT.SimpleTy) { + default: return 0; + case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; + case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; + case MVT::i32: Opc = AArch64::ASRVWr; break; + case MVT::i64: Opc = AArch64::ASRVXr; break; + } + + const TargetRegisterClass *RC = + (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + if (NeedTrunc) { + Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); + Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op0IsKill = Op1IsKill = true; + } + unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, + Op1IsKill); + if (NeedTrunc) + ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + return ResultReg; +} + +unsigned AArch64FastISel::emitASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, + uint64_t Shift) { unsigned Opc, ImmS; switch (RetVT.SimpleTy) { default: return 0; @@ -2892,31 +2978,56 @@ bool AArch64FastISel::SelectMul(const Instruction *I) { return true; } -bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift, - bool IsArithmetic) { +bool AArch64FastISel::SelectShift(const Instruction *I) { EVT RetEVT = TLI.getValueType(I->getType(), true); if (!RetEVT.isSimple()) return false; MVT RetVT = RetEVT.getSimpleVT(); - if (!isa(I->getOperand(1))) - return false; - unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (!Op0Reg) return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); - uint64_t ShiftVal = cast(I->getOperand(1))->getZExtValue(); + if (const auto *C = dyn_cast(I->getOperand(1))) { + unsigned ResultReg = 0; + uint64_t ShiftVal = C->getZExtValue(); + switch (I->getOpcode()) { + default: llvm_unreachable("Unexpected instruction."); + case Instruction::Shl: + ResultReg = emitLSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); + break; + case Instruction::AShr: + ResultReg = emitASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); + break; + case Instruction::LShr: + ResultReg = emitLSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); + break; + } + if (!ResultReg) + return false; - unsigned ResultReg; - if (IsLeftShift) - ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); - else { - if (IsArithmetic) - ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); - else - ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal); + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (!Op1Reg) + return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = 0; + switch (I->getOpcode()) { + default: llvm_unreachable("Unexpected instruction."); + case Instruction::Shl: + ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); + break; + case Instruction::AShr: + ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); + break; + case Instruction::LShr: + ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); + break; } if (!ResultReg) @@ -3012,12 +3123,10 @@ bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) { // selector -> improve FastISel tblgen. case Instruction::Mul: return SelectMul(I); - case Instruction::Shl: - return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false); - case Instruction::LShr: - return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false); + case Instruction::Shl: // fall-through + case Instruction::LShr: // fall-through case Instruction::AShr: - return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true); + return SelectShift(I); case Instruction::BitCast: return SelectBitCast(I); } diff --git a/test/CodeGen/AArch64/fast-isel-shift.ll b/test/CodeGen/AArch64/fast-isel-shift.ll index 9b71930ea06..210b4ed9b92 100644 --- a/test/CodeGen/AArch64/fast-isel-shift.ll +++ b/test/CodeGen/AArch64/fast-isel-shift.ll @@ -1,87 +1,187 @@ ; RUN: llc -fast-isel -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s +; CHECK-LABEL: lslv_i8 +; CHECK: and [[REG1:w[0-9]+]], w1, #0xff +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xff +define zeroext i8 @lslv_i8(i8 %a, i8 %b) { + %1 = shl i8 %a, %b + ret i8 %1 +} + ; CHECK-LABEL: lsl_i8 -; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 +; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 define zeroext i8 @lsl_i8(i8 %a) { %1 = shl i8 %a, 4 ret i8 %1 } +; CHECK-LABEL: lslv_i16 +; CHECK: and [[REG1:w[0-9]+]], w1, #0xffff +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xffff +define zeroext i16 @lslv_i16(i16 %a, i16 %b) { + %1 = shl i16 %a, %b + ret i16 %1 +} + ; CHECK-LABEL: lsl_i16 -; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 +; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 define zeroext i16 @lsl_i16(i16 %a) { %1 = shl i16 %a, 8 ret i16 %1 } +; CHECK-LABEL: lslv_i32 +; CHECK: lsl {{w[0-9]*}}, w0, w1 +define zeroext i32 @lslv_i32(i32 %a, i32 %b) { + %1 = shl i32 %a, %b + ret i32 %1 +} + ; CHECK-LABEL: lsl_i32 -; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16 +; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16 define zeroext i32 @lsl_i32(i32 %a) { %1 = shl i32 %a, 16 ret i32 %1 } +; CHECK-LABEL: lslv_i64 +; CHECK: lsl {{x[0-9]*}}, x0, x1 +define i64 @lslv_i64(i64 %a, i64 %b) { + %1 = shl i64 %a, %b + ret i64 %1 +} + ; FIXME: This shouldn't use the variable shift version. ; CHECK-LABEL: lsl_i64 -; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} +; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} define i64 @lsl_i64(i64 %a) { %1 = shl i64 %a, 32 ret i64 %1 } +; CHECK-LABEL: lsrv_i8 +; CHECK: and [[REG1:w[0-9]+]], w0, #0xff +; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff +; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff +define zeroext i8 @lsrv_i8(i8 %a, i8 %b) { + %1 = lshr i8 %a, %b + ret i8 %1 +} + ; CHECK-LABEL: lsr_i8 -; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 +; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 define zeroext i8 @lsr_i8(i8 %a) { %1 = lshr i8 %a, 4 ret i8 %1 } +; CHECK-LABEL: lsrv_i16 +; CHECK: and [[REG1:w[0-9]+]], w0, #0xffff +; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff +; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff +define zeroext i16 @lsrv_i16(i16 %a, i16 %b) { + %1 = lshr i16 %a, %b + ret i16 %1 +} + ; CHECK-LABEL: lsr_i16 -; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 +; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 define zeroext i16 @lsr_i16(i16 %a) { %1 = lshr i16 %a, 8 ret i16 %1 } +; CHECK-LABEL: lsrv_i32 +; CHECK: lsr {{w[0-9]*}}, w0, w1 +define zeroext i32 @lsrv_i32(i32 %a, i32 %b) { + %1 = lshr i32 %a, %b + ret i32 %1 +} + ; CHECK-LABEL: lsr_i32 -; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16 +; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16 define zeroext i32 @lsr_i32(i32 %a) { %1 = lshr i32 %a, 16 ret i32 %1 } +; CHECK-LABEL: lsrv_i64 +; CHECK: lsr {{x[0-9]*}}, x0, x1 +define i64 @lsrv_i64(i64 %a, i64 %b) { + %1 = lshr i64 %a, %b + ret i64 %1 +} + ; FIXME: This shouldn't use the variable shift version. ; CHECK-LABEL: lsr_i64 -; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} +; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} define i64 @lsr_i64(i64 %a) { %1 = lshr i64 %a, 32 ret i64 %1 } +; CHECK-LABEL: asrv_i8 +; CHECK: sxtb [[REG1:w[0-9]+]], w0 +; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff +; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff +define zeroext i8 @asrv_i8(i8 %a, i8 %b) { + %1 = ashr i8 %a, %b + ret i8 %1 +} + ; CHECK-LABEL: asr_i8 -; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 +; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4 define zeroext i8 @asr_i8(i8 %a) { %1 = ashr i8 %a, 4 ret i8 %1 } +; CHECK-LABEL: asrv_i16 +; CHECK: sxth [[REG1:w[0-9]+]], w0 +; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff +; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff +define zeroext i16 @asrv_i16(i16 %a, i16 %b) { + %1 = ashr i16 %a, %b + ret i16 %1 +} + ; CHECK-LABEL: asr_i16 -; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 +; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8 define zeroext i16 @asr_i16(i16 %a) { %1 = ashr i16 %a, 8 ret i16 %1 } +; CHECK-LABEL: asrv_i32 +; CHECK: asr {{w[0-9]*}}, w0, w1 +define zeroext i32 @asrv_i32(i32 %a, i32 %b) { + %1 = ashr i32 %a, %b + ret i32 %1 +} + ; CHECK-LABEL: asr_i32 -; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16 +; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16 define zeroext i32 @asr_i32(i32 %a) { %1 = ashr i32 %a, 16 ret i32 %1 } +; CHECK-LABEL: asrv_i64 +; CHECK: asr {{x[0-9]*}}, x0, x1 +define i64 @asrv_i64(i64 %a, i64 %b) { + %1 = ashr i64 %a, %b + ret i64 %1 +} + ; FIXME: This shouldn't use the variable shift version. ; CHECK-LABEL: asr_i64 -; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} +; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}} define i64 @asr_i64(i64 %a) { %1 = ashr i64 %a, 32 ret i64 %1 -- 2.34.1