From d7c4a4a6c048d4174b8795598f50fd76c30731ed Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Wed, 11 May 2005 23:43:56 +0000 Subject: [PATCH] Necessary changes to codegen cttz efficiently on PowerPC 1. Teach LegalizeDAG how to better legalize CTTZ if the target doesn't have CTPOP, but does have CTLZ 2. Teach PPC32 how to do sub x, const -> add x, -const for valid consts 3. Teach PPC32 how to do and (xor a, -1) b -> andc b, a 4. Teach PPC32 that ISD::CTLZ -> PPC::CNTLZW git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@21880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 16 ++++++++--- lib/Target/PowerPC/PPCISelPattern.cpp | 34 +++++++++++++++++++----- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a511202a534..08e8d776fd4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1156,15 +1156,25 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } case ISD::CTTZ: { - // for now, we use: { return popcount(~x & (x - 1)); } - // but see also http://www.hackersdelight.org/HDcode/ntz.cc + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc MVT::ValueType VT = Tmp1.getValueType(); Tmp2 = DAG.getConstant(~0ULL, VT); Tmp3 = DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::XOR, VT, Tmp1, Tmp2), DAG.getNode(ISD::SUB, VT, Tmp1, DAG.getConstant(1, VT))); - Result = LegalizeOp(DAG.getNode(ISD::CTPOP, VT, Tmp3)); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead + if (TLI.getOperationAction(ISD::CTPOP, VT) != TargetLowering::Legal && + TLI.getOperationAction(ISD::CTLZ, VT) == TargetLowering::Legal) { + Result = LegalizeOp(DAG.getNode(ISD::SUB, VT, + DAG.getConstant(getSizeInBits(VT), VT), + DAG.getNode(ISD::CTLZ, VT, Tmp3))); + } else { + Result = LegalizeOp(DAG.getNode(ISD::CTPOP, VT, Tmp3)); + } break; } default: diff --git a/lib/Target/PowerPC/PPCISelPattern.cpp b/lib/Target/PowerPC/PPCISelPattern.cpp index 6d687ef9f3e..536a07710de 100644 --- a/lib/Target/PowerPC/PPCISelPattern.cpp +++ b/lib/Target/PowerPC/PPCISelPattern.cpp @@ -69,10 +69,9 @@ namespace { setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); - //PowerPC has these, but they are not implemented + //PowerPC does not have CTPOP or CTTZ setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ , MVT::i32 , Expand); setSetCCResultContents(ZeroOrOneSetCCResult); addLegalFPImmediate(+0.0); // Necessary for FSEL @@ -654,9 +653,13 @@ static unsigned getImmediateForOpcode(SDOperand N, unsigned Opcode, if ((v & 0x0000FFFF) == 0) { Imm = v >> 16; return 2; } break; case ISD::MUL: - case ISD::SUB: if (v <= 32767 && v >= -32768) { Imm = v & 0xFFFF; return 1; } break; + case ISD::SUB: + // handle subtract-from separately from subtract, since subi is really addi + if (U && v <= 32767 && v >= -32768) { Imm = v & 0xFFFF; return 1; } + if (!U && v <= 32768 && v >= -32767) { Imm = (-v) & 0xFFFF; return 1; } + break; case ISD::SETCC: if (U && (v >= 0 && v <= 65535)) { Imm = v & 0xFFFF; return 1; } if (!U && (v <= 32767 && v >= -32768)) { Imm = v & 0xFFFF; return 1; } @@ -1817,6 +1820,11 @@ unsigned ISel::SelectExpr(SDOperand N, bool Recording) { } return Result; + case ISD::CTLZ: + Tmp1 = SelectExpr(N.getOperand(0)); + BuildMI(BB, PPC::CNTLZW, 1, Result).addReg(Tmp1); + return Result; + case ISD::ADD: assert (DestType == MVT::i32 && "Only do arithmetic on i32s!"); Tmp1 = SelectExpr(N.getOperand(0)); @@ -1851,6 +1859,16 @@ unsigned ISel::SelectExpr(SDOperand N, bool Recording) { switch(getImmediateForOpcode(N.getOperand(1), opcode, Tmp2)) { default: assert(0 && "unhandled result code"); case 0: // No immediate + // Check for andc: and, (xor a, -1), b + if (N.getOperand(0).getOpcode() == ISD::XOR && + N.getOperand(0).getOperand(1).getOpcode() == ISD::Constant && + cast(N.getOperand(0).getOperand(1))->isAllOnesValue()) { + Tmp1 = SelectExpr(N.getOperand(0).getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, PPC::ANDC, 2, Result).addReg(Tmp2).addReg(Tmp1); + return Result; + } + // It wasn't and-with-complement, emit a regular and Tmp1 = SelectExpr(N.getOperand(0)); Tmp2 = SelectExpr(N.getOperand(1)); Opc = Recording ? PPC::ANDo : PPC::AND; @@ -1976,11 +1994,15 @@ unsigned ISel::SelectExpr(SDOperand N, bool Recording) { } case ISD::SUB: - Tmp2 = SelectExpr(N.getOperand(1)); - if (1 == getImmediateForOpcode(N.getOperand(0), opcode, Tmp1)) + if (1 == getImmediateForOpcode(N.getOperand(0), opcode, Tmp1, true)) { + Tmp2 = SelectExpr(N.getOperand(1)); BuildMI(BB, PPC::SUBFIC, 2, Result).addReg(Tmp2).addSImm(Tmp1); - else { + } else if (1 == getImmediateForOpcode(N.getOperand(1), opcode, Tmp2)) { Tmp1 = SelectExpr(N.getOperand(0)); + BuildMI(BB, PPC::ADDI, 2, Result).addReg(Tmp1).addSImm(Tmp2); + } else { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); BuildMI(BB, PPC::SUBF, 2, Result).addReg(Tmp2).addReg(Tmp1); } return Result; -- 2.34.1