From: Juergen Ributzka Date: Tue, 30 Sep 2014 00:49:58 +0000 (+0000) Subject: [FastISel][AArch64] Fold sign-/zero-extends into the load instruction. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a0af4b027199fa5c77e8b1a28b9df769db261958;p=oota-llvm.git [FastISel][AArch64] Fold sign-/zero-extends into the load instruction. The sign-/zero-extension of the loaded value can be performed by the memory instruction for free. If the result of the load has only one use and the use is a sign-/zero-extend, then we emit the proper load instruction. The extend is only a register copy and will be optimized away later on. Other instructions that consume the sign-/zero-extended value are also made aware of this fact, so they don't fold the extend too. This fixes rdar://problem/18495928. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218653 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index fb4f1cb7237..f20dcae7168 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -177,7 +177,7 @@ private: bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); - bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, + bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true, MachineMemOperand *MMO = nullptr); bool emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); @@ -255,6 +255,23 @@ public: #include "AArch64GenCallingConv.inc" +/// \brief Check if the sign-/zero-extend will be a noop. +static bool isIntExtFree(const Instruction *I) { + assert((isa(I) || isa(I)) && + "Unexpected integer extend instruction."); + bool IsZExt = isa(I); + + if (const auto *LI = dyn_cast(I->getOperand(0))) + if (LI->hasOneUse()) + return true; + + if (const auto *Arg = dyn_cast(I->getOperand(0))) + if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) + return true; + + return false; +} + /// \brief Determine the implicit scale factor that is applied by a memory /// operation for a given value type. static unsigned getImplicitScaleFactor(MVT VT) { @@ -585,72 +602,74 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (Addr.getOffsetReg()) break; - if (const auto *CI = dyn_cast(U->getOperand(1))) { - unsigned Val = CI->getZExtValue(); - if (Val < 1 || Val > 3) - break; + const auto *CI = dyn_cast(U->getOperand(1)); + if (!CI) + break; - uint64_t NumBytes = 0; - if (Ty && Ty->isSized()) { - uint64_t NumBits = DL.getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } + unsigned Val = CI->getZExtValue(); + if (Val < 1 || Val > 3) + break; - if (NumBytes != (1ULL << Val)) - break; + uint64_t NumBytes = 0; + if (Ty && Ty->isSized()) { + uint64_t NumBits = DL.getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } + + if (NumBytes != (1ULL << Val)) + break; - Addr.setShift(Val); - Addr.setExtendType(AArch64_AM::LSL); + Addr.setShift(Val); + Addr.setExtendType(AArch64_AM::LSL); - const Value *Src = U->getOperand(0); - if (const auto *I = dyn_cast(Src)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - Src = I; + const Value *Src = U->getOperand(0); + if (const auto *I = dyn_cast(Src)) + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) + Src = I; - if (const auto *ZE = dyn_cast(Src)) { - if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(Src)) { + if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { Addr.setExtendType(AArch64_AM::UXTW); Src = ZE->getOperand(0); - } - } else if (const auto *SE = dyn_cast(Src)) { - if (SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - Src = SE->getOperand(0); - } } + } else if (const auto *SE = dyn_cast(Src)) { + if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } - if (const auto *AI = dyn_cast(Src)) - if (AI->getOpcode() == Instruction::And) { - const Value *LHS = AI->getOperand(0); - const Value *RHS = AI->getOperand(1); - - if (const auto *C = dyn_cast(LHS)) - if (C->getValue() == 0xffffffff) - std::swap(LHS, RHS); - - if (const auto *C = dyn_cast(RHS)) - if (C->getValue() == 0xffffffff) { - Addr.setExtendType(AArch64_AM::UXTW); - unsigned Reg = getRegForValue(LHS); - if (!Reg) - return false; - bool RegIsKill = hasTrivialKill(LHS); - Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, - AArch64::sub_32); - Addr.setOffsetReg(Reg); - return true; - } - } + if (const auto *AI = dyn_cast(Src)) + if (AI->getOpcode() == Instruction::And) { + const Value *LHS = AI->getOperand(0); + const Value *RHS = AI->getOperand(1); - unsigned Reg = getRegForValue(Src); - if (!Reg) - return false; - Addr.setOffsetReg(Reg); - return true; - } - break; + if (const auto *C = dyn_cast(LHS)) + if (C->getValue() == 0xffffffff) + std::swap(LHS, RHS); + + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 0xffffffff) { + Addr.setExtendType(AArch64_AM::UXTW); + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + bool RegIsKill = hasTrivialKill(LHS); + Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, + AArch64::sub_32); + Addr.setOffsetReg(Reg); + return true; + } + } + + unsigned Reg = getRegForValue(Src); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; } case Instruction::Mul: { if (Addr.getOffsetReg()) @@ -692,13 +711,15 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) Src = I; + + // Fold the zext or sext when it won't become a noop. if (const auto *ZE = dyn_cast(Src)) { - if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { + if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { Addr.setExtendType(AArch64_AM::UXTW); Src = ZE->getOperand(0); } } else if (const auto *SE = dyn_cast(Src)) { - if (SE->getOperand(0)->getType()->isIntegerTy(32)) { + if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { Addr.setExtendType(AArch64_AM::SXTW); Src = SE->getOperand(0); } @@ -1568,7 +1589,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, } bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, - MachineMemOperand *MMO) { + bool WantZExt, MachineMemOperand *MMO) { // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) return false; @@ -1585,20 +1606,38 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, ScaleFactor = 1; } - static const unsigned OpcTable[4][6] = { - { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi, - AArch64::LDURSi, AArch64::LDURDi }, - { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui, - AArch64::LDRSui, AArch64::LDRDui }, - { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX, - AArch64::LDRSroX, AArch64::LDRDroX }, - { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW, - AArch64::LDRSroW, AArch64::LDRDroW } + static const unsigned GPOpcTable[2][4][4] = { + // Sign-extend. + { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi, + AArch64::LDURXi }, + { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui, + AArch64::LDRXui }, + { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX, + AArch64::LDRXroX }, + { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW, + AArch64::LDRXroW }, + }, + // Zero-extend. + { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, + AArch64::LDURXi }, + { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, + AArch64::LDRXui }, + { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, + AArch64::LDRXroX }, + { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, + AArch64::LDRXroW } + } + }; + + static const unsigned FPOpcTable[4][2] = { + { AArch64::LDURSi, AArch64::LDURDi }, + { AArch64::LDRSui, AArch64::LDRDui }, + { AArch64::LDRSroX, AArch64::LDRDroX }, + { AArch64::LDRSroW, AArch64::LDRDroW } }; unsigned Opc; const TargetRegisterClass *RC; - bool VTIsi1 = false; bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && Addr.getOffsetReg(); unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; @@ -1607,14 +1646,33 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, Idx++; switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: VTIsi1 = true; // Intentional fall-through. - case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break; - case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break; - case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break; - case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break; - case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break; - case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break; + default: + llvm_unreachable("Unexpected value type."); + case MVT::i1: // Intentional fall-through. + case MVT::i8: + Opc = GPOpcTable[WantZExt][Idx][0]; + RC = &AArch64::GPR32RegClass; + break; + case MVT::i16: + Opc = GPOpcTable[WantZExt][Idx][1]; + RC = &AArch64::GPR32RegClass; + break; + case MVT::i32: + Opc = GPOpcTable[WantZExt][Idx][2]; + RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; + break; + case MVT::i64: + Opc = GPOpcTable[WantZExt][Idx][3]; + RC = &AArch64::GPR64RegClass; + break; + case MVT::f32: + Opc = FPOpcTable[Idx][0]; + RC = &AArch64::FPR32RegClass; + break; + case MVT::f64: + Opc = FPOpcTable[Idx][1]; + RC = &AArch64::FPR64RegClass; + break; } // Create the base instruction, then add the operands. @@ -1623,8 +1681,14 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, TII.get(Opc), ResultReg); addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); + // For 32bit loads we do sign-extending loads to 64bit and then extract the + // subreg. In the end this is just a NOOP. + if (VT == MVT::i32 && !WantZExt) + ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true, + AArch64::sub_32); + // Loading an i1 requires special handling. - if (VTIsi1) { + if (VT == MVT::i1) { unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); ResultReg = ANDReg; @@ -1701,8 +1765,12 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { if (!computeAddress(I->getOperand(0), Addr, I->getType())) return false; + bool WantZExt = true; + if (I->hasOneUse() && isa(I->use_begin()->getUser())) + WantZExt = false; + unsigned ResultReg; - if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I))) + if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I))) return false; updateValueMap(I, ResultReg); @@ -3776,46 +3844,60 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, } bool AArch64FastISel::selectIntExt(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Src = I->getOperand(0); - Type *SrcTy = Src->getType(); - - unsigned SrcReg = getRegForValue(Src); - if (!SrcReg) + assert((isa(I) || isa(I)) && + "Unexpected integer extend instruction."); + MVT RetVT; + MVT SrcVT; + if (!isTypeSupported(I->getType(), RetVT)) return false; - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) - return false; - if (!DestEVT.isSimple()) + if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - unsigned ResultReg = 0; + if (isIntExtFree(I)) { + unsigned SrcReg = getRegForValue(I->getOperand(0)); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - bool IsZExt = isa(I); - // Check if it is an argument and if it is already zero/sign-extended. - if (const auto *Arg = dyn_cast(Src)) { - if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { - if (DestVT == MVT::i64) { - ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBREG_TO_REG), ResultReg) + const TargetRegisterClass *RC = (RetVT == MVT::i64) ? + &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned ResultReg = createResultReg(RC); + if (RetVT == MVT::i64 && SrcVT != MVT::i64) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), ResultReg) .addImm(0) - .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)) .addImm(AArch64::sub_32); - } else - ResultReg = SrcReg; + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)); } + updateValueMap(I, ResultReg); + return true; + } + + unsigned SrcReg = getRegForValue(I->getOperand(0)); + if (!SrcReg) + return false; + bool SrcRegIsKill = hasTrivialKill(I->getOperand(0)); + + unsigned ResultReg = 0; + if (isIntExtFree(I)) { + if (RetVT == MVT::i64) { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), ResultReg) + .addImm(0) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)) + .addImm(AArch64::sub_32); + } else + ResultReg = SrcReg; } if (!ResultReg) - ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt); + ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa(I)); if (!ResultReg) return false; @@ -3891,18 +3973,22 @@ bool AArch64FastISel::selectMul(const Instruction *I) { MVT SrcVT = VT; bool IsZExt = true; if (const auto *ZExt = dyn_cast(Src0)) { - MVT VT; - if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { - SrcVT = VT; - IsZExt = true; - Src0 = ZExt->getOperand(0); + if (!isIntExtFree(ZExt)) { + MVT VT; + if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = true; + Src0 = ZExt->getOperand(0); + } } } else if (const auto *SExt = dyn_cast(Src0)) { - MVT VT; - if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { - SrcVT = VT; - IsZExt = false; - Src0 = SExt->getOperand(0); + if (!isIntExtFree(SExt)) { + MVT VT; + if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = false; + Src0 = SExt->getOperand(0); + } } } @@ -3954,18 +4040,22 @@ bool AArch64FastISel::selectShift(const Instruction *I) { bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast(Op0)) { - MVT TmpVT; - if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { - SrcVT = TmpVT; - IsZExt = true; - Op0 = ZExt->getOperand(0); + if (!isIntExtFree(ZExt)) { + MVT TmpVT; + if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { + SrcVT = TmpVT; + IsZExt = true; + Op0 = ZExt->getOperand(0); + } } } else if (const auto *SExt = dyn_cast(Op0)) { - MVT TmpVT; - if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { - SrcVT = TmpVT; - IsZExt = false; - Op0 = SExt->getOperand(0); + if (!isIntExtFree(SExt)) { + MVT TmpVT; + if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { + SrcVT = TmpVT; + IsZExt = false; + Op0 = SExt->getOperand(0); + } } } @@ -4213,13 +4303,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { case Instruction::FPToUI: return selectFPToInt(I, /*Signed=*/false); case Instruction::ZExt: - if (!selectCast(I, ISD::ZERO_EXTEND)) - return selectIntExt(I); - return true; case Instruction::SExt: - if (!selectCast(I, ISD::SIGN_EXTEND)) - return selectIntExt(I); - return true; + return selectIntExt(I); case Instruction::Trunc: if (!selectCast(I, ISD::TRUNCATE)) return selectTrunc(I); diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index a6c7bef3725..e5151847a59 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -17,7 +17,6 @@ entry: ; CHECK: ldrh w0, [sp, #12] ; CHECK: strb w0, [sp, #15] ; CHECK: ldrb w0, [sp, #15] -; CHECK: uxtb w0, w0 ; CHECK: add sp, sp, #16 ; CHECK: ret %a.addr = alloca i8, align 1 @@ -51,14 +50,11 @@ entry: ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] ; CHECK: ldrb w0, [sp, #15] -; CHECK: uxtb w0, w0 ; CHECK: strh w0, [sp, #12] ; CHECK: ldrh w0, [sp, #12] -; CHECK: uxth w0, w0 ; CHECK: str w0, [sp, #8] ; CHECK: ldr w0, [sp, #8] ; CHECK: mov x3, x0 -; CHECK: ubfx x3, x3, #0, #32 ; CHECK: str x3, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret @@ -109,15 +105,11 @@ entry: ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrb w0, [sp, #15] -; CHECK: sxtb w0, w0 +; CHECK: ldrsb w0, [sp, #15] ; CHECK: strh w0, [sp, #12] -; CHECK: ldrh w0, [sp, #12] -; CHECK: sxth w0, w0 +; CHECK: ldrsh w0, [sp, #12] ; CHECK: str w0, [sp, #8] -; CHECK: ldr w0, [sp, #8] -; CHECK: mov x3, x0 -; CHECK: sxtw x3, w3 +; CHECK: ldrsw x3, [sp, #8] ; CHECK: str x3, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret diff --git a/test/CodeGen/AArch64/fast-isel-int-ext.ll b/test/CodeGen/AArch64/fast-isel-int-ext.ll new file mode 100644 index 00000000000..115b96d7806 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-int-ext.ll @@ -0,0 +1,190 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s + +; +; Test that we only use the sign/zero extend in the address calculation when +; necessary. +; +; SHIFT +; +define i64 @load_addr_shift_zext1(i32 zeroext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_zext1 +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = zext i32 %a to i64 + %2 = shl i64 %1, 3 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_shift_zext2(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_zext2 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3{{\]}} + %1 = zext i32 %a to i64 + %2 = shl i64 %1, 3 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_shift_sext1(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_sext1 +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = sext i32 %a to i64 + %2 = shl i64 %1, 3 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_sext2 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3] + %1 = sext i32 %a to i64 + %2 = shl i64 %1, 3 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +; +; MUL +; +define i64 @load_addr_mul_zext1(i32 zeroext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_zext1 +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_mul_zext2(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_zext2 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3] + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_mul_sext1(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_sext1 +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = sext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_sext2 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3] + %1 = sext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +; Test folding of the sign-/zero-extend into the load instruction. +define i32 @load_zext_i8_to_i32(i8* %a) { +; CHECK-LABEL: load_zext_i8_to_i32 +; CHECK: ldrb w0, [x0] +; CHECK-NOT: uxtb + %1 = load i8* %a + %2 = zext i8 %1 to i32 + ret i32 %2 +} + +define i32 @load_zext_i16_to_i32(i16* %a) { +; CHECK-LABEL: load_zext_i16_to_i32 +; CHECK: ldrh w0, [x0] +; CHECK-NOT: uxth + %1 = load i16* %a + %2 = zext i16 %1 to i32 + ret i32 %2 +} + +define i64 @load_zext_i8_to_i64(i8* %a) { +; CHECK-LABEL: load_zext_i8_to_i64 +; CHECK: ldrb w0, [x0] +; CHECK-NOT: uxtb + %1 = load i8* %a + %2 = zext i8 %1 to i64 + ret i64 %2 +} + +define i64 @load_zext_i16_to_i64(i16* %a) { +; CHECK-LABEL: load_zext_i16_to_i64 +; CHECK: ldrh w0, [x0] +; CHECK-NOT: uxth + %1 = load i16* %a + %2 = zext i16 %1 to i64 + ret i64 %2 +} + +define i64 @load_zext_i32_to_i64(i32* %a) { +; CHECK-LABEL: load_zext_i32_to_i64 +; CHECK: ldr w0, [x0] +; CHECK-NOT: uxtw + %1 = load i32* %a + %2 = zext i32 %1 to i64 + ret i64 %2 +} + +define i32 @load_sext_i8_to_i32(i8* %a) { +; CHECK-LABEL: load_sext_i8_to_i32 +; CHECK: ldrsb w0, [x0] +; CHECK-NOT: sxtb + %1 = load i8* %a + %2 = sext i8 %1 to i32 + ret i32 %2 +} + +define i32 @load_sext_i16_to_i32(i16* %a) { +; CHECK-LABEL: load_sext_i16_to_i32 +; CHECK: ldrsh w0, [x0] +; CHECK-NOT: sxth + %1 = load i16* %a + %2 = sext i16 %1 to i32 + ret i32 %2 +} + +define i64 @load_sext_i8_to_i64(i8* %a) { +; CHECK-LABEL: load_sext_i8_to_i64 +; CHECK: ldrsb w0, [x0] +; CHECK-NOT: sxtb + %1 = load i8* %a + %2 = sext i8 %1 to i64 + ret i64 %2 +} + +define i64 @load_sext_i16_to_i64(i16* %a) { +; CHECK-LABEL: load_sext_i16_to_i64 +; CHECK: ldrsh w0, [x0] +; CHECK-NOT: sxth + %1 = load i16* %a + %2 = sext i16 %1 to i64 + ret i64 %2 +} + +define i64 @load_sext_i32_to_i64(i32* %a) { +; CHECK-LABEL: load_sext_i32_to_i64 +; CHECK: ldrsw x0, [x0] +; CHECK-NOT: sxtw + %1 = load i32* %a + %2 = sext i32 %1 to i64 + ret i64 %2 +} +