From: Tim Northover Date: Wed, 16 Apr 2014 11:52:51 +0000 (+0000) Subject: ARM64: use 32-bit moves for constants where possible. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fef8e383eb8c0dc534c7cac98b3670dec2cc86fb;p=oota-llvm.git ARM64: use 32-bit moves for constants where possible. If we know that a particular 64-bit constant has all high bits zero, then we can rely on the fact that 32-bit ARM64 instructions automatically zero out the high bits of an x-register. This gives the expansion logic less constraints to satisfy and so sometimes allows it to pick better sequences. Came up while porting test/CodeGen/AArch64/movw-consts.ll: this will allow a 32-bit MOVN to be used in @test8 soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206379 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp index 5d2afae3fff..7d2a97f8e72 100644 --- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp @@ -28,7 +28,7 @@ namespace { class ARM64DeadRegisterDefinitions : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; - bool implicitlyDefinesSubReg(unsigned Reg, const MachineInstr &MI); + bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI); bool processMachineBasicBlock(MachineBasicBlock &MBB); bool usesFrameIndex(const MachineInstr &MI); public: @@ -47,12 +47,11 @@ public: char ARM64DeadRegisterDefinitions::ID = 0; } // end anonymous namespace -bool -ARM64DeadRegisterDefinitions::implicitlyDefinesSubReg(unsigned Reg, - const MachineInstr &MI) { +bool ARM64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( + unsigned Reg, const MachineInstr &MI) { for (const MachineOperand &MO : MI.implicit_operands()) if (MO.isReg() && MO.isDef()) - if (TRI->isSubRegister(Reg, MO.getReg())) + if (TRI->regsOverlap(Reg, MO.getReg())) return true; return false; } @@ -86,9 +85,10 @@ ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); continue; } - // Don't change the register if there's an implicit def of a subreg. - if (implicitlyDefinesSubReg(MO.getReg(), MI)) { - DEBUG(dbgs() << " Ignoring, implicitly defines subregister.\n"); + // Don't change the register if there's an implicit def of a subreg or + // supperreg. + if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) { + DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n"); continue; } // Make sure the instruction take a register class that contains diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 9f599eb2355..69f1e2c5a6d 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -395,6 +395,22 @@ def MOVi64imm Sched<[WriteImm]>; } // isReMaterializable, isCodeGenOnly +// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the +// eventual expansion code fewer bits to worry about getting right. Marshalling +// the types is a little tricky though: +def i64imm_32bit : ImmLeaf; + +def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i32); +}]>; + +def : Pat<(i64 i64imm_32bit:$src), + (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; + +// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK +// sequences. def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, tglobaladdr:$g1, tglobaladdr:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll index cda33a9bf0e..39307bcb94f 100644 --- a/test/CodeGen/AArch64/cond-sel.ll +++ b/test/CodeGen/AArch64/cond-sel.ll @@ -46,11 +46,11 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r ; CHECK-NOFP-NOT: fcmp %val2 = select i1 %tst2, i64 9, i64 15 store i64 %val2, i64* @var64 -; CHECK-AARCH64: movz [[CONST15:x[0-9]+]], #15 -; CHECK-ARM64: orr [[CONST15:x[0-9]+]], xzr, #0xf -; CHECK: movz [[CONST9:x[0-9]+]], #9 -; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq -; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs +; CHECK-AARCH64: movz x[[CONST15:[0-9]+]], #15 +; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf +; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #9 +; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq +; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs ret void ; CHECK: ret diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll index 765e868cfc0..fb363a9591b 100644 --- a/test/CodeGen/AArch64/i128-align.ll +++ b/test/CodeGen/AArch64/i128-align.ll @@ -14,7 +14,7 @@ define i64 @check_size() { %diff = sub i64 %endi, %starti ret i64 %diff -; CHECK: {{movz x0, #48|orr x0, xzr, #0x30}} +; CHECK: {{movz x0, #48|orr w0, wzr, #0x30}} } define i64 @check_field() { @@ -26,5 +26,5 @@ define i64 @check_field() { %diff = sub i64 %endi, %starti ret i64 %diff -; CHECK: {{movz x0, #16|orr x0, xzr, #0x10}} +; CHECK: {{movz x0, #16|orr w0, wzr, #0x10}} } diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/ARM64/aapcs.ll index fc1266ccfed..e4889b77166 100644 --- a/test/CodeGen/ARM64/aapcs.ll +++ b/test/CodeGen/ARM64/aapcs.ll @@ -80,7 +80,7 @@ declare void @variadic(i32 %a, ...) define void @test_variadic() { call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0) ; CHECK: fmov d0, #2.0 -; CHECK: orr x1, xzr, #0x1 +; CHECK: orr w1, wzr, #0x1 ; CHECK: bl variadic ret void } diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/ARM64/atomic.ll index 4a957b8954e..13502f474da 100644 --- a/test/CodeGen/ARM64/atomic.ll +++ b/test/CodeGen/ARM64/atomic.ll @@ -17,14 +17,14 @@ define i32 @val_compare_and_swap(i32* %p) { define i64 @val_compare_and_swap_64(i64* %p) { ; CHECK-LABEL: val_compare_and_swap_64: -; CHECK: orr [[NEWVAL_REG:x[0-9]+]], xzr, #0x4 -; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: orr w[[NEWVAL_REG:[0-9]+]], wzr, #0x4 +; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldxr [[RESULT:x[0-9]+]], [x0] -; CHECK: cmp [[RESULT]], [[OLDVAL_REG]] +; CHECK: cmp [[RESULT]], x[[OLDVAL_REG]] ; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] -; CHECK-NOT: stxr [[NEWVAL_REG]], [[NEWVAL_REG]] -; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] +; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] ; CHECK: [[LABEL2]]: %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic @@ -47,10 +47,10 @@ define i32 @fetch_and_nand(i32* %p) { define i64 @fetch_and_nand_64(i64* %p) { ; CHECK-LABEL: fetch_and_nand_64: -; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x0] -; CHECK: bic [[SCRATCH2_REG:x[0-9]+]], [[OLDVAL_REG]], [[DEST_REG]] +; CHECK: bic [[SCRATCH2_REG:x[0-9]+]], x[[OLDVAL_REG]], [[DEST_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] ; CHECK: mov x0, [[DEST_REG]] @@ -74,10 +74,10 @@ define i32 @fetch_and_or(i32* %p) { define i64 @fetch_and_or_64(i64* %p) { ; CHECK: fetch_and_or_64: -; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x0] -; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], [[OLDVAL_REG]] +; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], x[[OLDVAL_REG]] ; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] ; CHECK: mov x0, [[DEST_REG]] diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/ARM64/bitfield-extract.ll index 96b6967a973..40dee710aa9 100644 --- a/test/CodeGen/ARM64/bitfield-extract.ll +++ b/test/CodeGen/ARM64/bitfield-extract.ll @@ -376,10 +376,10 @@ entry: ; CHECK-LABEL: fct17: ; CHECK: ldr [[REG1:x[0-9]+]], ; Create the constant -; CHECK: movz [[REGCST:x[0-9]+]], #26, lsl #16 -; CHECK: movk [[REGCST]], #33120 +; CHECK: movz w[[REGCST:[0-9]+]], #26, lsl #16 +; CHECK: movk w[[REGCST]], #33120 ; Do the masking -; CHECK: and [[REG2:x[0-9]+]], [[REG1]], [[REGCST]] +; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]] ; CHECK-NEXT: bfm [[REG2]], x1, #16, #18 ; lsr is an alias of ubfm ; CHECK-NEXT: ubfm [[REG3:x[0-9]+]], [[REG2]], #2, #61 diff --git a/test/CodeGen/ARM64/const-addr.ll b/test/CodeGen/ARM64/const-addr.ll index c77a6db8fe5..977628a0921 100644 --- a/test/CodeGen/ARM64/const-addr.ll +++ b/test/CodeGen/ARM64/const-addr.ll @@ -5,8 +5,8 @@ ; Test if the constant base address gets only materialized once. define i32 @test1() nounwind { ; CHECK-LABEL: test1 -; CHECK: movz x8, #1039, lsl #16 -; CHECK-NEXT: movk x8, #49152 +; CHECK: movz w8, #1039, lsl #16 +; CHECK-NEXT: movk w8, #49152 ; CHECK-NEXT: ldp w9, w10, [x8, #4] ; CHECK: ldr w8, [x8, #12] %at = inttoptr i64 68141056 to %T* diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll index cbf17698970..d0ee61c1c1e 100644 --- a/test/CodeGen/ARM64/csel.ll +++ b/test/CodeGen/ARM64/csel.ll @@ -126,7 +126,7 @@ define i64 @foo10(i64 %v) nounwind readnone optsize ssp { entry: ; CHECK-LABEL: foo10: ; CHECK: cmp x0, #0 -; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 ; CHECK: csinv x0, x[[REG]], x[[REG]], ne %tobool = icmp ne i64 %v, 0 %cond = select i1 %tobool, i64 4, i64 -5 @@ -148,7 +148,7 @@ define i64 @foo12(i64 %v) nounwind readnone optsize ssp { entry: ; CHECK-LABEL: foo12: ; CHECK: cmp x0, #0 -; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 ; CHECK: csneg x0, x[[REG]], x[[REG]], ne %tobool = icmp ne i64 %v, 0 %cond = select i1 %tobool, i64 4, i64 -4 @@ -203,7 +203,7 @@ define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp { entry: ; CHECK-LABEL: foo17: ; CHECK: cmp x0, x1 -; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 ; CHECK: csinc x0, x[[REG]], x[[REG]], le %cmp = icmp sgt i64 %a, %b %. = select i1 %cmp, i64 2, i64 1 @@ -214,7 +214,7 @@ define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp { entry: ; CHECK-LABEL: foo18: ; CHECK: cmp x0, x1 -; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 ; CHECK: csinc x0, x[[REG]], x[[REG]], gt %cmp = icmp sgt i64 %a, %b %. = select i1 %cmp, i64 1, i64 2 diff --git a/test/CodeGen/ARM64/long-shift.ll b/test/CodeGen/ARM64/long-shift.ll index 6f37044d1ab..caa486a815b 100644 --- a/test/CodeGen/ARM64/long-shift.ll +++ b/test/CodeGen/ARM64/long-shift.ll @@ -3,8 +3,8 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: ; CHECK: lslv [[XREG_0:x[0-9]+]], x1, x2 -; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2 +; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 ; CHECK-NEXT: lsrv [[XREG_3:x[0-9]+]], x0, [[XREG_2]] ; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]] ; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64 @@ -20,10 +20,10 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { } define i128 @ashr(i128 %r, i128 %s) nounwind readnone { -; CHECK: ashr: +; CHECK-LABEL: ashr: ; CHECK: lsrv [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2 +; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 ; CHECK-NEXT: lslv [[XREG_3:x[0-9]+]], x1, [[XREG_2]] ; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] ; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 @@ -40,10 +40,10 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { } define i128 @lshr(i128 %r, i128 %s) nounwind readnone { -; CHECK: lshr: +; CHECK-LABEL: lshr: ; CHECK: lsrv [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2 +; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 ; CHECK-NEXT: lslv [[XREG_3:x[0-9]+]], x1, [[XREG_2]] ; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] ; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 diff --git a/test/CodeGen/ARM64/patchpoint.ll b/test/CodeGen/ARM64/patchpoint.ll index 9e5ed6f40a4..dd555b0f3e5 100644 --- a/test/CodeGen/ARM64/patchpoint.ll +++ b/test/CodeGen/ARM64/patchpoint.ll @@ -67,11 +67,11 @@ define i64 @jscall_patchpoint_codegen2(i64 %callee) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen2: ; CHECK: Ltmp -; CHECK: orr x{{.+}}, xzr, #0x6 +; CHECK: orr w{{.+}}, wzr, #0x6 ; CHECK-NEXT: str x{{.+}}, [sp, #24] ; CHECK-NEXT: orr w{{.+}}, wzr, #0x4 ; CHECK-NEXT: str w{{.+}}, [sp, #16] -; CHECK-NEXT: orr x{{.+}}, xzr, #0x2 +; CHECK-NEXT: orr w{{.+}}, wzr, #0x2 ; CHECK-NEXT: str x{{.+}}, [sp] ; CHECK: Ltmp ; CHECK-NEXT: movz x16, #65535, lsl #32 @@ -88,15 +88,15 @@ define i64 @jscall_patchpoint_codegen3(i64 %callee) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen3: ; CHECK: Ltmp -; CHECK: movz x{{.+}}, #10 +; CHECK: movz w{{.+}}, #10 ; CHECK-NEXT: str x{{.+}}, [sp, #48] ; CHECK-NEXT: orr w{{.+}}, wzr, #0x8 ; CHECK-NEXT: str w{{.+}}, [sp, #36] -; CHECK-NEXT: orr x{{.+}}, xzr, #0x6 +; CHECK-NEXT: orr w{{.+}}, wzr, #0x6 ; CHECK-NEXT: str x{{.+}}, [sp, #24] ; CHECK-NEXT: orr w{{.+}}, wzr, #0x4 ; CHECK-NEXT: str w{{.+}}, [sp, #16] -; CHECK-NEXT: orr x{{.+}}, xzr, #0x2 +; CHECK-NEXT: orr w{{.+}}, wzr, #0x2 ; CHECK-NEXT: str x{{.+}}, [sp] ; CHECK: Ltmp ; CHECK-NEXT: movz x16, #65535, lsl #32