From a26b1bdcc8d69f504357beda5168df292d2c84b3 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 27 Aug 2014 23:09:40 +0000 Subject: [PATCH] Revert "[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation." Quentin pointed out that this is not the correct approach and there is a better and easier solution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216632 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 108 +++------------ .../AArch64/fast-isel-addressing-modes.ll | 130 ------------------ 2 files changed, 16 insertions(+), 222 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index aa959d19e16..df294bdc149 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -134,11 +134,7 @@ private: // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); - bool isLegalToFoldAddress(const Value *Obj); - bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); - bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty); - bool computeAddressBase(const Value *Obj, Address &Addr); - + bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); bool ComputeCallAddress(const Value *V, Address &Addr); bool SimplifyAddress(Address &Addr, MVT VT); void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, @@ -420,68 +416,9 @@ unsigned AArch64FastISel::TargetMaterializeFloatZero(const ConstantFP* CFP) { return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); } -bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) { - // Look through BitCast, IntToPtr, and PtrToInt. - const User *U = nullptr; - unsigned Opcode = Instruction::UserOp1; - if (const auto *I = dyn_cast(Obj)) { - // Bail out if the result is used in a different basic block. - if (FuncInfo.isExportedInst(I)) - return false; - - Opcode = I->getOpcode(); - U = I; - } else if (const auto *CE = dyn_cast(Obj)) { - Opcode = CE->getOpcode(); - U = CE; - } - - switch (Opcode) { - default: - break; - case Instruction::BitCast: - return isLegalToFoldAddress(U->getOperand(0)); - case Instruction::IntToPtr: - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return isLegalToFoldAddress(U->getOperand(0)); - break; - case Instruction::PtrToInt: - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return isLegalToFoldAddress(U->getOperand(0)); - break; - } - - // Allocas never kill their operands, so it is safe to fold it. - if (isa(Obj) || !isa(Obj)) - return true; - - const auto *I = cast(Obj); - // Trivial case - the memory instruction is the only user. - if (I->hasOneUse()) - return true; - - // Check all users - if all of them are memory instructions that FastISel - // can handle, then it is safe to fold the instruction. - for (auto *U : I->users()) - if (!isa(U) && !isa(U)) - return false; - - return true; -} - // Computes the address to get to an object. -bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, - Type *Ty) { - // Don't fold instructions into the memory operation if their result is - // exported to another basic block or has more than one use - except if all - // uses are memory operations. - if (isLegalToFoldAddress(Obj)) - return computeAddressRecursively(Obj, Addr, Ty); - return computeAddressBase(Obj, Addr); -} - -bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, - Type *Ty) { +bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty) +{ const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { @@ -508,18 +445,18 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, break; case Instruction::BitCast: { // Look through bitcasts. - return computeAddressRecursively(U->getOperand(0), Addr, Ty); + return ComputeAddress(U->getOperand(0), Addr, Ty); } case Instruction::IntToPtr: { // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return computeAddressRecursively(U->getOperand(0), Addr, Ty); + return ComputeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::PtrToInt: { - // Look past no-op ptrtoints. Don't increment recursion level. + // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return computeAddressRecursively(U->getOperand(0), Addr, Ty); + return ComputeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::GetElementPtr: { @@ -561,7 +498,7 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, // Try to grab the base operand now. Addr.setOffset(TmpOffset); - if (computeAddressRecursively(U->getOperand(0), Addr, Ty)) + if (ComputeAddress(U->getOperand(0), Addr, Ty)) return true; // We failed, restore everything and try the other options. @@ -582,9 +519,6 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, break; } case Instruction::Add: { - if (!U->hasOneUse()) - break; - // Adds of constants are common and easy enough. const Value *LHS = U->getOperand(0); const Value *RHS = U->getOperand(1); @@ -594,21 +528,17 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, if (const ConstantInt *CI = dyn_cast(RHS)) { Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue()); - return computeAddressRecursively(LHS, Addr, Ty); + return ComputeAddress(LHS, Addr, Ty); } Address Backup = Addr; - if (computeAddressRecursively(LHS, Addr, Ty) && - computeAddressRecursively(RHS, Addr, Ty)) + if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty)) return true; Addr = Backup; break; } case Instruction::Shl: - if (!U->hasOneUse()) - break; - if (Addr.getOffsetReg()) break; @@ -631,10 +561,8 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, Addr.setShift(Val); Addr.setExtendType(AArch64_AM::LSL); - // Only try to fold the operand if it has one use. if (const auto *I = dyn_cast(U->getOperand(0))) - if (I->hasOneUse() && - (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)) + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) U = I; if (const auto *ZE = dyn_cast(U)) @@ -654,10 +582,6 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr, break; } - return computeAddressBase(Obj, Addr); -} - -bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) { if (Addr.getReg()) { if (!Addr.getOffsetReg()) { unsigned Reg = getRegForValue(Obj); @@ -1428,7 +1352,7 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) { // See if we can handle this address. Address Addr; - if (!computeAddress(I->getOperand(0), Addr, I->getType())) + if (!ComputeAddress(I->getOperand(0), Addr, I->getType())) return false; unsigned ResultReg; @@ -1545,7 +1469,7 @@ bool AArch64FastISel::SelectStore(const Instruction *I) { // See if we can handle this address. Address Addr; - if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) + if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) return false; if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) @@ -2453,7 +2377,7 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { if (MTI->isVolatile()) return false; - // Disable inlining for memmove before calls to computeAddress. Otherwise, + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, // we would emit dead code because we don't currently handle memmoves. bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); if (isa(MTI->getLength()) && IsMemCpy) { @@ -2463,8 +2387,8 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { unsigned Alignment = MTI->getAlignment(); if (IsMemCpySmall(Len, Alignment)) { Address Dest, Src; - if (!computeAddress(MTI->getRawDest(), Dest) || - !computeAddress(MTI->getRawSource(), Src)) + if (!ComputeAddress(MTI->getRawDest(), Dest) || + !ComputeAddress(MTI->getRawSource(), Src)) return false; if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll index 5792c69da08..750e081d423 100644 --- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -281,50 +281,6 @@ define i64 @load_breg_immoff_8(i64 %a) { ret i64 %3 } -; Allow folding of the address if it is used by memory instructions only. -define void @load_breg_immoff_9(i64 %a) { -; FAST-LABEL: load_breg_immoff_9 -; FAST: ldr {{x[0-9]+}}, [x0, #96] -; FAST: str {{x[0-9]+}}, [x0, #96] - %1 = add i64 %a, 96 - %2 = inttoptr i64 %1 to i64* - %3 = load i64* %2 - %4 = add i64 %3, 1 - store i64 %4, i64* %2 - ret void -} - -; Don't fold if the add result leaves the basic block - even if the user is a -; memory operation. -define i64 @load_breg_immoff_10(i64 %a, i1 %c) { -; FAST-LABEL: load_breg_immoff_10 -; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}} -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}} - %1 = add i64 %a, 96 - %2 = inttoptr i64 %1 to i64* - %3 = load i64* %2 - br i1 %c, label %bb1, label %bb2 -bb1: - %4 = shl i64 %1, 3 - %5 = inttoptr i64 %4 to i64* - %res = load i64* %5 - ret i64 %res -bb2: - ret i64 %3 -} - -; Don't allow folding of the address if it is used by non-memory instructions. -define i64 @load_breg_immoff_11(i64 %a) { -; FAST-LABEL: load_breg_immoff_11 -; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}} -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}} - %1 = add i64 %a, 96 - %2 = inttoptr i64 %1 to i64* - %3 = load i64* %2 - %4 = add i64 %1, %3 - ret i64 %4 -} - ; Load Base Register + Register Offset define i64 @load_breg_offreg_1(i64 %a, i64 %b) { ; CHECK-LABEL: load_breg_offreg_1 @@ -345,33 +301,6 @@ define i64 @load_breg_offreg_2(i64 %a, i64 %b) { ret i64 %3 } -; Don't fold if the add result leaves the basic block. -define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) { -; FAST-LABEL: load_breg_offreg_3 -; FAST: add [[REG:x[0-9]+]], x0, x1 -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}} - %1 = add i64 %a, %b - %2 = inttoptr i64 %1 to i64* - %3 = load i64* %2 - br i1 %c, label %bb1, label %bb2 -bb1: - %res = load i64* %2 - ret i64 %res -bb2: - ret i64 %3 -} - -define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) { -; FAST-LABEL: load_breg_offreg_4 -; FAST: add [[REG:x[0-9]+]], x0, x1 -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}} - %1 = add i64 %a, %b - %2 = inttoptr i64 %1 to i64* - %3 = load i64* %2 - %4 = add i64 %1, %3 - ret i64 %4 -} - ; Load Base Register + Register Offset + Immediate Offset define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) { ; CHECK-LABEL: load_breg_offreg_immoff_1 @@ -476,35 +405,6 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) { ret i32 %5 } -; Don't fold if the shift result leaves the basic block. -define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) { -; FAST-LABEL: load_breg_shift_offreg_6 -; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}} -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}} - %1 = shl i64 %a, 3 - %2 = add i64 %b, %1 - %3 = inttoptr i64 %2 to i64* - %4 = load i64* %3 - br i1 %c, label %bb1, label %bb2 -bb1: - %5 = inttoptr i64 %1 to i64* - %res = load i64* %5 - ret i64 %res -bb2: - ret i64 %4 -} - -define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) { -; FAST-LABEL: load_breg_shift_offreg_7 -; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}} -; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}} - %1 = shl i64 %a, 3 - %2 = add i64 %b, %1 - %3 = inttoptr i64 %2 to i64* - %4 = load i64* %3 - %5 = add i64 %1, %4 - ret i64 %5 -} ; Load Base Register + Scaled Register Offset + Sign/Zero extension define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) { @@ -529,36 +429,6 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) { ret i32 %5 } -; Don't fold if the zext result leaves the basic block. -define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) { -; FAST-LABEL: load_breg_zext_shift_offreg_3 -; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}} - %1 = zext i32 %a to i64 - %2 = shl i64 %1, 3 - %3 = add i64 %b, %2 - %4 = inttoptr i64 %3 to i64* - %5 = load i64* %4 - br i1 %c, label %bb1, label %bb2 -bb1: - %6 = inttoptr i64 %1 to i64* - %res = load i64* %6 - ret i64 %res -bb2: - ret i64 %5 -} - -define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) { -; FAST-LABEL: load_breg_zext_shift_offreg_4 -; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}} - %1 = zext i32 %a to i64 - %2 = shl i64 %1, 3 - %3 = add i64 %b, %2 - %4 = inttoptr i64 %3 to i64* - %5 = load i64* %4 - %6 = add i64 %1, %5 - ret i64 %6 -} - define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) { ; CHECK-LABEL: load_breg_sext_shift_offreg_1 ; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2] -- 2.34.1