From d9989384592a3bd9dd374470a723ca8303071a2d Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 10 Jul 2006 20:56:58 +0000 Subject: [PATCH] Implement Regression/CodeGen/PowerPC/bswap-load-store.ll by folding bswaps into i16/i32 load/stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@29089 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 4 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 59 +++++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.h | 14 ++++- lib/Target/PowerPC/PPCInstrInfo.td | 27 ++++++++++ lib/Target/PowerPC/README.txt | 5 -- 5 files changed, 103 insertions(+), 6 deletions(-) diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 6208d7dc91d..d2f81c673f9 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -178,6 +178,7 @@ getHazardType(SDNode *Node) { case PPC::LHZ: case PPC::LHZX: case PPC::LVEHX: + case PPC::LHBRX: LoadSize = 2; break; case PPC::LFS: @@ -188,6 +189,7 @@ getHazardType(SDNode *Node) { case PPC::LWA: case PPC::LWAX: case PPC::LVEWX: + case PPC::LWBRX: LoadSize = 4; break; case PPC::LFD: @@ -233,6 +235,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) { case PPC::STHX: case PPC::STH: case PPC::STVEHX: + case PPC::STHBRX: ThisStoreSize = 2; break; case PPC::STFS: @@ -243,6 +246,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) { case PPC::STW: case PPC::STVEWX: case PPC::STFIWX: + case PPC::STWBRX: ThisStoreSize = 4; break; case PPC::STD_32: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index aacadd8500a..bc764bcd689 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -266,6 +266,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine(ISD::BSWAP); computeRegisterProperties(); } @@ -296,6 +297,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; + case PPCISD::LBRX: return "PPCISD::LBRX"; + case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; } } @@ -2344,6 +2347,56 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, DCI.AddToWorklist(Val.Val); return Val; } + + // Turn STORE (BSWAP) -> sthbrx/stwbrx. + if (N->getOperand(1).getOpcode() == ISD::BSWAP && + N->getOperand(1).Val->hasOneUse() && + (N->getOperand(1).getValueType() == MVT::i32 || + N->getOperand(1).getValueType() == MVT::i16)) { + SDOperand BSwapOp = N->getOperand(1).getOperand(0); + // Do an any-extend to 32-bits if this is a half-word input. + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); + + return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, + N->getOperand(2), N->getOperand(3), + DAG.getValueType(N->getOperand(1).getValueType())); + } + break; + case ISD::BSWAP: + // Turn BSWAP (LOAD) -> lhbrx/lwbrx. + if (N->getOperand(0).getOpcode() == ISD::LOAD && + N->getOperand(0).hasOneUse() && + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + SDOperand Load = N->getOperand(0); + // Create the byte-swapping load. + std::vector VTs; + VTs.push_back(MVT::i32); + VTs.push_back(MVT::Other); + std::vector Ops; + Ops.push_back(Load.getOperand(0)); // Chain + Ops.push_back(Load.getOperand(1)); // Ptr + Ops.push_back(Load.getOperand(2)); // SrcValue + Ops.push_back(DAG.getValueType(N->getValueType(0))); // VT + SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops); + + // If this is an i16 load, insert the truncate. + SDOperand ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDOperand(N, 0); + } + break; case PPCISD::VCMP: { // If a VCMPo node already exists with exactly the same operands as this @@ -2477,6 +2530,12 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, KnownOne = 0; switch (Op.getOpcode()) { default: break; + case PPCISD::LBRX: { + // lhbrx is known to have the top bits cleared out. + if (cast(Op.getOperand(3))->getVT() == MVT::i16) + KnownZero = 0xFFFF0000; + break; + } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Op.getOperand(0))->getValue()) { default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index dbd25e5c85e..d9defe23241 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -111,7 +111,19 @@ namespace llvm { /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. - COND_BRANCH + COND_BRANCH, + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a + /// byte-swapping store instruction. It byte-swaps the low "Type" bits of + /// the GPRC input, then stores it through Ptr. Type can be either i16 or + /// i32. + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a + /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, + /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 + /// or i32. + LBRX }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 911d966c3df..e6354dff5a9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -37,6 +37,13 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ SDTCisVT<1, i32>, SDTCisVT<2, OtherVT> ]>; +def SDT_PPClbrx : SDTypeProfile<1, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +]>; +def SDT_PPCstbrx : SDTypeProfile<0, 4, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -88,6 +95,9 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInFlag]>; +def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain]>; +def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. // @@ -464,6 +474,15 @@ def LHZX : XForm_1<31, 279, (ops GPRC:$rD, memrr:$src), def LWZX : XForm_1<31, 23, (ops GPRC:$rD, memrr:$src), "lwzx $rD, $src", LdStGeneral, [(set GPRC:$rD, (load xaddr:$src))]>; + + +def LHBRX : XForm_1<31, 790, (ops GPRC:$rD, memrr:$src), + "lhbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xaddr:$src,srcvalue:$dummy, i16))]>; +def LWBRX : XForm_1<31, 534, (ops GPRC:$rD, memrr:$src), + "lwbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xaddr:$src,srcvalue:$dummy, i32))]>; + } let PPC970_Unit = 1 in { // FXU Operations. @@ -517,6 +536,14 @@ def STWX : XForm_8<31, 151, (ops GPRC:$rS, memrr:$dst), def STWUX : XForm_8<31, 183, (ops GPRC:$rS, GPRC:$rA, GPRC:$rB), "stwux $rS, $rA, $rB", LdStGeneral, []>; +def STHBRX: XForm_8<31, 918, (ops GPRC:$rS, memrr:$dst), + "sthbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xaddr:$dst, srcvalue:$dummy, i16)]>, + PPC970_DGroup_Cracked; +def STWBRX: XForm_8<31, 662, (ops GPRC:$rS, memrr:$dst), + "stwbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xaddr:$dst, srcvalue:$dummy, i32)]>, + PPC970_DGroup_Cracked; } let PPC970_Unit = 1 in { // FXU Operations. def SRAWI : XForm_10<31, 824, (ops GPRC:$rA, GPRC:$rS, u5imm:$SH), diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index f95f94d4443..ffe979c603d 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -3,7 +3,6 @@ TODO: * gpr0 allocation * implement do-loop -> bdnz transform -* implement powerpc-64 for darwin ===-------------------------------------------------------------------------=== @@ -238,10 +237,6 @@ just fastcc. ===-------------------------------------------------------------------------=== -Generate lwbrx and other byteswapping load/store instructions when reasonable. - -===-------------------------------------------------------------------------=== - Compile this: int foo(int a) { -- 2.34.1