From 0ef99720c578bb348b0ee9bef862fedcc441b3a1 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 5 Jan 2015 18:09:06 +0000 Subject: [PATCH] [PowerPC] Remove zexts after byte-swapping loads lhbrx and lwbrx not only load their data with byte swapping, but also clear the upper 32 bits (at least). As a result, they can be added to the PPCISelDAGToDAG peephole optimization as frontier instructions for the removal of unnecessary zero extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 9 ++++++++ lib/Target/PowerPC/PPCInstr64Bit.td | 7 ++++++ test/CodeGen/PowerPC/rm-zext.ll | 30 ++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 7d7f557e433..3e787bedd27 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -3729,6 +3729,13 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, return true; } + // LHBRX and LWBRX always clear the higher-order bits. + if (Op32.getMachineOpcode() == PPC::LHBRX || + Op32.getMachineOpcode() == PPC::LWBRX) { + ToPromote.insert(Op32.getNode()); + return true; + } + // Next, check for those instructions we can look through. // Assuming the mask does not wrap around, then the higher-order bits are @@ -3916,6 +3923,8 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { case PPC::SRW: NewOpcode = PPC::SRW8; break; case PPC::LI: NewOpcode = PPC::LI8; break; case PPC::LIS: NewOpcode = PPC::LIS8; break; + case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; + case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 75f4656d01b..29b13f8a53a 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -843,6 +843,13 @@ def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src), "ldbrx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; +let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in { +def LHBRX8 : XForm_1<31, 790, (outs g8rc:$rD), (ins memrr:$src), + "lhbrx $rD, $src", IIC_LdStLoad, []>; +def LWBRX8 : XForm_1<31, 534, (outs g8rc:$rD), (ins memrr:$src), + "lwbrx $rD, $src", IIC_LdStLoad, []>; +} + let mayLoad = 1, hasSideEffects = 0 in { def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", IIC_LdStLDU, diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll index 8fab38ad4cf..291b7919280 100644 --- a/test/CodeGen/PowerPC/rm-zext.ll +++ b/test/CodeGen/PowerPC/rm-zext.ll @@ -39,5 +39,35 @@ entry: ; CHECK: blr } +; Function Attrs: nounwind readnone +declare i32 @llvm.bswap.i32(i32) #1 + +; Function Attrs: nounwind readonly +define zeroext i32 @bs32(i32* nocapture readonly %x) #0 { +entry: + %0 = load i32* %x, align 4 + %1 = tail call i32 @llvm.bswap.i32(i32 %0) + ret i32 %1 + +; CHECK-LABEL: @bs32 +; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32 +; CHECK: blr +} + +; Function Attrs: nounwind readonly +define zeroext i16 @bs16(i16* nocapture readonly %x) #0 { +entry: + %0 = load i16* %x, align 2 + %1 = tail call i16 @llvm.bswap.i16(i16 %0) + ret i16 %1 + +; CHECK-LABEL: @bs16 +; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +declare i16 @llvm.bswap.i16(i16) #1 + attributes #0 = { nounwind readnone } -- 2.34.1