From: Hal Finkel Date: Sat, 3 Jan 2015 01:16:37 +0000 (+0000) Subject: [PowerPC] Add support for the CMPB instruction X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=958b670c3404071548deb75df55c0b8a1a6e3e13;p=oota-llvm.git [PowerPC] Add support for the CMPB instruction Newer POWER cores, and the A2, support the cmpb instruction. This instruction compares its operands, treating each of the 8 bytes in the GPRs separately, returning a 'mask' result of 0 (for false) or -1 (for true) in each byte. Code generation support is added, in the form of a PPCISelDAGToDAG DAG-preprocessing routine, that recognizes patterns close to what the instruction computes (either exactly, or related by a constant masking operation), and generates the cmpb instruction (along with any necessary constant masking operation). This can be expanded if use cases arise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225106 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 4c9581927d7..a7fd62c0730 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -88,6 +88,8 @@ def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; +def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", + "Enable the cmpb instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true", @@ -116,7 +118,6 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", // Note: Future features to add when support is extended to more // recent ISA levels: // -// CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions // POPCNTB p5 through p7 popcntb and related instructions @@ -258,7 +259,7 @@ def : ProcessorModel<"a2", PPCA2Model, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, @@ -266,7 +267,7 @@ def : ProcessorModel<"a2q", PPCA2Model, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, @@ -292,14 +293,14 @@ def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, - FeatureSTFIWX, FeatureLFIWAX, + FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, @@ -308,7 +309,7 @@ def : ProcessorModel<"pwr7", P7Model, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P8Model, @@ -317,7 +318,7 @@ def : ProcessorModel<"pwr8", P8Model, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index efc0b7fa2f0..c6dc50143c0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -83,6 +83,7 @@ namespace { return true; } + void PreprocessISelDAG() override; void PostprocessISelDAG() override; /// getI32Imm - Return a target constant with the specified value, of type @@ -215,6 +216,8 @@ private: void PeepholePPC64ZExt(); void PeepholeCROps(); + SDValue combineToCMPB(SDNode *N); + bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); }; @@ -684,7 +687,6 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) { return SelectInt64(CurDAG, dl, Imm); } - namespace { class BitPermutationSelector { struct ValueBit { @@ -2872,6 +2874,254 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +// If the target supports the cmpb instruction, do the idiom recognition here. +// We don't do this as a DAG combine because we don't want to do it as nodes +// are being combined (because we might miss part of the eventual idiom). We +// don't want to do it during instruction selection because we want to reuse +// the logic for lowering the masking operations already part of the +// instruction selector. +SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { + SDLoc dl(N); + + assert(N->getOpcode() == ISD::OR && + "Only OR nodes are supported for CMPB"); + + SDValue Res; + if (!PPCSubTarget->hasCMPB()) + return Res; + + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return Res; + + EVT VT = N->getValueType(0); + + SDValue RHS, LHS; + bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + uint64_t Mask = 0, Alt = 0; + + auto IsByteSelectCC = [this](SDValue O, unsigned &b, + uint64_t &Mask, uint64_t &Alt, + SDValue &LHS, SDValue &RHS) { + if (O.getOpcode() != ISD::SELECT_CC) + return false; + ISD::CondCode CC = cast(O.getOperand(4))->get(); + + if (!isa(O.getOperand(2)) || + !isa(O.getOperand(3))) + return false; + + uint64_t PM = O.getConstantOperandVal(2); + uint64_t PAlt = O.getConstantOperandVal(3); + for (b = 0; b < 8; ++b) { + uint64_t Mask = UINT64_C(0xFF) << (8*b); + if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) + break; + } + + if (b == 8) + return false; + Mask |= PM; + Alt |= PAlt; + + if (!isa(O.getOperand(1)) || + O.getConstantOperandVal(1) != 0) { + SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + + if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && + Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && + isa(Op0.getOperand(1))) { + + unsigned Bits = Op0.getValueType().getSizeInBits(); + if (b != Bits/8-1) + return false; + if (Op0.getConstantOperandVal(1) != Bits-8) + return false; + + LHS = Op0.getOperand(0); + RHS = Op1.getOperand(0); + return true; + } + + // When we have small integers (i16 to be specific), the form present + // post-legalization uses SETULT in the SELECT_CC for the + // higher-order byte, depending on the fact that the + // even-higher-order bytes are known to all be zero, for example: + // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult + // (so when the second byte is the same, because all higher-order + // bits from bytes 3 and 4 are known to be zero, the result of the + // xor can be at most 255) + if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && + isa(O.getOperand(1))) { + + uint64_t ULim = O.getConstantOperandVal(1); + if (ULim != (UINT64_C(1) << b*8)) + return false; + + // Now we need to make sure that the upper bytes are known to be + // zero. + unsigned Bits = Op0.getValueType().getSizeInBits(); + if (!CurDAG->MaskedValueIsZero(Op0, + APInt::getHighBitsSet(Bits, Bits - (b+1)*8))) + return false; + + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1); + return true; + } + + return false; + } + + if (CC != ISD::SETEQ) + return false; + + SDValue Op = O.getOperand(0); + if (Op.getOpcode() == ISD::AND) { + if (!isa(Op.getOperand(1))) + return false; + if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) + return false; + + SDValue XOR = Op.getOperand(0); + if (XOR.getOpcode() == ISD::TRUNCATE) + XOR = XOR.getOperand(0); + if (XOR.getOpcode() != ISD::XOR) + return false; + + LHS = XOR.getOperand(0); + RHS = XOR.getOperand(1); + return true; + } else if (Op.getOpcode() == ISD::SRL) { + if (!isa(Op.getOperand(1))) + return false; + unsigned Bits = Op.getValueType().getSizeInBits(); + if (b != Bits/8-1) + return false; + if (Op.getConstantOperandVal(1) != Bits-8) + return false; + + SDValue XOR = Op.getOperand(0); + if (XOR.getOpcode() == ISD::TRUNCATE) + XOR = XOR.getOperand(0); + if (XOR.getOpcode() != ISD::XOR) + return false; + + LHS = XOR.getOperand(0); + RHS = XOR.getOperand(1); + return true; + } + + return false; + }; + + SmallVector Queue(1, SDValue(N, 0)); + while (!Queue.empty()) { + SDValue V = Queue.pop_back_val(); + + for (const SDValue &O : V.getNode()->ops()) { + unsigned b; + uint64_t M = 0, A = 0; + SDValue OLHS, ORHS; + if (O.getOpcode() == ISD::OR) { + Queue.push_back(O); + } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { + if (!LHS) { + LHS = OLHS; + RHS = ORHS; + BytesFound[b] = true; + Mask |= M; + Alt |= A; + } else if ((LHS == ORHS && RHS == OLHS) || + (RHS == ORHS && LHS == OLHS)) { + BytesFound[b] = true; + Mask |= M; + Alt |= A; + } else { + return Res; + } + } else { + return Res; + } + } + } + + unsigned LastB = 0, BCnt = 0; + for (unsigned i = 0; i < 8; ++i) + if (BytesFound[LastB]) { + ++BCnt; + LastB = i; + } + + if (!LastB || BCnt < 2) + return Res; + + // Because we'll be zero-extending the output anyway if don't have a specific + // value for each input byte (via the Mask), we can 'anyext' the inputs. + if (LHS.getValueType() != VT) { + LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); + RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); + } + + Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); + + bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); + if (NonTrivialMask && !Alt) { + // Res = Mask & CMPB + Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT)); + } else if (Alt) { + // Res = (CMPB & Mask) | (~CMPB & Alt) + // Which, as suggested here: + // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge + // can be written as: + // Res = Alt ^ ((Alt ^ Mask) & CMPB) + // useful because the (Alt ^ Mask) can be pre-computed. + Res = CurDAG->getNode(ISD::AND, dl, VT, Res, + CurDAG->getConstant(Mask ^ Alt, VT)); + Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT)); + } + + return Res; +} + +void PPCDAGToDAGISel::PreprocessISelDAG() { + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + bool MadeChange = false; + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + if (N->use_empty()) + continue; + + SDValue Res; + switch (N->getOpcode()) { + default: break; + case ISD::OR: + Res = combineToCMPB(N); + break; + } + + if (Res) { + DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(Res.getNode()->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + MadeChange = true; + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} + /// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index e430b07d143..32f958ebad7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -759,6 +759,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 089cfe73c47..b4b11d846e3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -61,6 +61,9 @@ namespace llvm { /// VPERM, + /// The CMPB instruction (takes two operands of i32 or i64). + CMPB, + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 7d1249c2ce7..75f4656d01b 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -591,6 +591,11 @@ def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; +let isCodeGenOnly = 1, isCommutable = 1 in +def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cmpb $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>; + // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4c01b2dee0c..aec1385aaf5 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -118,6 +118,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; + // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift // amounts. These nodes are generated by the multi-precision shift code. def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; @@ -1865,6 +1867,11 @@ defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), "extsh", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; + +let isCommutable = 1 in +def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "cmpb $rA, $rS, $rB", IIC_IntGeneral, + [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>; } let isCompare = 1, hasSideEffects = 0 in { def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 8f0e3a49ee9..fb47c910c2f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() { HasFPCVT = false; HasISEL = false; HasPOPCNTD = false; + HasCMPB = false; HasLDBRX = false; IsBookE = false; HasOnlyMSYNC = false; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 1df19c3e1eb..de5f92a971b 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -102,6 +102,7 @@ protected: bool HasFPCVT; bool HasISEL; bool HasPOPCNTD; + bool HasCMPB; bool HasLDBRX; bool IsBookE; bool HasOnlyMSYNC; @@ -220,6 +221,7 @@ public: bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } bool hasOnlyMSYNC() const { return HasOnlyMSYNC; } diff --git a/test/CodeGen/PowerPC/cmpb-ppc32.ll b/test/CodeGen/PowerPC/cmpb-ppc32.ll new file mode 100644 index 00000000000..639ed887b97 --- /dev/null +++ b/test/CodeGen/PowerPC/cmpb-ppc32.ll @@ -0,0 +1,50 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-p:32:32-i64:64-n32" +target triple = "powerpc-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 { +entry: + %0 = xor i16 %y, %x + %1 = and i16 %0, 255 + %cmp = icmp eq i16 %1, 0 + %cmp20 = icmp ult i16 %0, 256 + %conv25 = select i1 %cmp, i32 255, i32 0 + %conv27 = select i1 %cmp20, i32 65280, i32 0 + %or = or i32 %conv25, %conv27 + %conv29 = trunc i32 %or to i16 + ret i16 %conv29 + +; CHECK-LABEL: @test16 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rlwinm 3, [[REG1]], 0, 16, 31 +; CHECK: blr +} + +define i32 @test32(i32 %x, i32 %y) #0 { +entry: + %0 = xor i32 %y, %x + %1 = and i32 %0, 255 + %cmp = icmp eq i32 %1, 0 + %2 = and i32 %0, 65280 + %cmp28 = icmp eq i32 %2, 0 + %3 = and i32 %0, 16711680 + %cmp34 = icmp eq i32 %3, 0 + %cmp40 = icmp ult i32 %0, 16777216 + %conv44 = select i1 %cmp, i32 255, i32 0 + %conv45 = select i1 %cmp28, i32 65280, i32 0 + %conv47 = select i1 %cmp34, i32 16711680, i32 0 + %conv50 = select i1 %cmp40, i32 -16777216, i32 0 + %or = or i32 %conv45, %conv50 + %or49 = or i32 %or, %conv44 + %or52 = or i32 %or49, %conv47 + ret i32 %or52 + +; CHECK-LABEL: @test32 +; CHECK: cmpb 3, 4, 3 +; CHECK-NOT: rlwinm +; CHECK: blr +} + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/PowerPC/cmpb.ll b/test/CodeGen/PowerPC/cmpb.ll new file mode 100644 index 00000000000..7d0c0ab3316 --- /dev/null +++ b/test/CodeGen/PowerPC/cmpb.ll @@ -0,0 +1,204 @@ +; RUN: llc -mcpu pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 { +entry: + %0 = xor i16 %y, %x + %1 = and i16 %0, 255 + %cmp = icmp eq i16 %1, 0 + %cmp20 = icmp ult i16 %0, 256 + %conv25 = select i1 %cmp, i32 255, i32 0 + %conv27 = select i1 %cmp20, i32 65280, i32 0 + %or = or i32 %conv25, %conv27 + %conv29 = trunc i32 %or to i16 + ret i16 %conv29 + +; CHECK-LABEL: @test16 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl 3, [[REG1]], 0, 48 +; CHECK: blr +} + +define zeroext i16 @test16p1(i16 zeroext %x, i16 zeroext %y) #0 { +entry: + %0 = xor i16 %y, %x + %1 = and i16 %0, 255 + %cmp = icmp eq i16 %1, 0 + %cmp20 = icmp ult i16 %0, 256 + %conv28 = select i1 %cmp, i32 5, i32 0 + %conv30 = select i1 %cmp20, i32 65280, i32 0 + %or = or i32 %conv28, %conv30 + %conv32 = trunc i32 %or to i16 + ret i16 %conv32 + +; CHECK-LABEL: @test16p1 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: andi. 3, [[REG1]], 65285 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i16 @test16p2(i16 zeroext %x, i16 zeroext %y) #0 { +entry: + %0 = xor i16 %y, %x + %1 = and i16 %0, 255 + %cmp = icmp eq i16 %1, 0 + %cmp20 = icmp ult i16 %0, 256 + %conv28 = select i1 %cmp, i32 255, i32 0 + %conv30 = select i1 %cmp20, i32 1280, i32 0 + %or = or i32 %conv28, %conv30 + %conv32 = trunc i32 %or to i16 + ret i16 %conv32 + +; CHECK-LABEL: @test16p2 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: andi. 3, [[REG1]], 1535 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i16 @test16p3(i16 zeroext %x, i16 zeroext %y) #0 { +entry: + %0 = xor i16 %y, %x + %1 = and i16 %0, 255 + %cmp = icmp eq i16 %1, 0 + %cmp20 = icmp ult i16 %0, 256 + %conv27 = select i1 %cmp, i32 255, i32 0 + %conv29 = select i1 %cmp20, i32 1024, i32 1280 + %or = or i32 %conv27, %conv29 + %conv31 = trunc i32 %or to i16 + ret i16 %conv31 + +; CHECK-LABEL: @test16p3 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 0, 55 +; CHECK: xori 3, [[REG2]], 1280 +; CHECK: blr +} + +define zeroext i32 @test32(i32 zeroext %x, i32 zeroext %y) #0 { +entry: + %0 = xor i32 %y, %x + %1 = and i32 %0, 255 + %cmp = icmp eq i32 %1, 0 + %2 = and i32 %0, 65280 + %cmp28 = icmp eq i32 %2, 0 + %3 = and i32 %0, 16711680 + %cmp34 = icmp eq i32 %3, 0 + %cmp40 = icmp ult i32 %0, 16777216 + %conv44 = select i1 %cmp, i32 255, i32 0 + %conv45 = select i1 %cmp28, i32 65280, i32 0 + %conv47 = select i1 %cmp34, i32 16711680, i32 0 + %conv50 = select i1 %cmp40, i32 -16777216, i32 0 + %or = or i32 %conv45, %conv50 + %or49 = or i32 %or, %conv44 + %or52 = or i32 %or49, %conv47 + ret i32 %or52 + +; CHECK-LABEL: @test32 +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl 3, [[REG1]], 0, 32 +; CHECK: blr +} + +define zeroext i32 @test32p1(i32 zeroext %x, i32 zeroext %y) #0 { +entry: + %0 = xor i32 %y, %x + %1 = and i32 %0, 255 + %cmp = icmp eq i32 %1, 0 + %2 = and i32 %0, 65280 + %cmp28 = icmp eq i32 %2, 0 + %3 = and i32 %0, 16711680 + %cmp34 = icmp eq i32 %3, 0 + %cmp40 = icmp ult i32 %0, 16777216 + %conv47 = select i1 %cmp, i32 255, i32 0 + %conv48 = select i1 %cmp28, i32 65280, i32 0 + %conv50 = select i1 %cmp34, i32 458752, i32 0 + %conv53 = select i1 %cmp40, i32 -16777216, i32 0 + %or = or i32 %conv48, %conv53 + %or52 = or i32 %or, %conv47 + %or55 = or i32 %or52, %conv50 + ret i32 %or55 + +; CHECK-LABEL: @test32p1 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287 +; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 +; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: blr +} + +define zeroext i32 @test32p2(i32 zeroext %x, i32 zeroext %y) #0 { +entry: + %0 = xor i32 %y, %x + %1 = and i32 %0, 255 + %cmp = icmp eq i32 %1, 0 + %2 = and i32 %0, 65280 + %cmp22 = icmp eq i32 %2, 0 + %cmp28 = icmp ult i32 %0, 16777216 + %conv32 = select i1 %cmp, i32 255, i32 0 + %conv33 = select i1 %cmp22, i32 65280, i32 0 + %conv35 = select i1 %cmp28, i32 -16777216, i32 0 + %or = or i32 %conv33, %conv35 + %or37 = or i32 %or, %conv32 + ret i32 %or37 + +; CHECK-LABEL: @test32p2 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280 +; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 +; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: blr +} + +define i64 @test64(i64 %x, i64 %y) #0 { +entry: + %shr19 = lshr i64 %x, 56 + %conv21 = trunc i64 %shr19 to i32 + %shr43 = lshr i64 %y, 56 + %conv45 = trunc i64 %shr43 to i32 + %0 = xor i64 %y, %x + %1 = and i64 %0, 255 + %cmp = icmp eq i64 %1, 0 + %2 = and i64 %0, 65280 + %cmp52 = icmp eq i64 %2, 0 + %3 = and i64 %0, 16711680 + %cmp58 = icmp eq i64 %3, 0 + %4 = and i64 %0, 4278190080 + %cmp64 = icmp eq i64 %4, 0 + %5 = and i64 %0, 1095216660480 + %cmp70 = icmp eq i64 %5, 0 + %6 = and i64 %0, 280375465082880 + %cmp76 = icmp eq i64 %6, 0 + %7 = and i64 %0, 71776119061217280 + %cmp82 = icmp eq i64 %7, 0 + %cmp88 = icmp eq i32 %conv21, %conv45 + %conv92 = select i1 %cmp, i64 255, i64 0 + %conv93 = select i1 %cmp52, i64 65280, i64 0 + %or = or i64 %conv92, %conv93 + %conv95 = select i1 %cmp58, i64 16711680, i64 0 + %or97 = or i64 %or, %conv95 + %conv98 = select i1 %cmp64, i64 4278190080, i64 0 + %or100 = or i64 %or97, %conv98 + %conv101 = select i1 %cmp70, i64 1095216660480, i64 0 + %or103 = or i64 %or100, %conv101 + %conv104 = select i1 %cmp76, i64 280375465082880, i64 0 + %or106 = or i64 %or103, %conv104 + %conv107 = select i1 %cmp82, i64 71776119061217280, i64 0 + %or109 = or i64 %or106, %conv107 + %conv110 = select i1 %cmp88, i64 -72057594037927936, i64 0 + %or112 = or i64 %or109, %conv110 + ret i64 %or112 + +; CHECK-LABEL: @test64 +; CHECK: cmpb 3, 3, 4 +; CHECK-NOT: rldicl +; CHECK: blr +} + +attributes #0 = { nounwind readnone } + diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt index 2e2e7c1e650..e99d49bbf45 100644 --- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt +++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt @@ -499,6 +499,9 @@ # CHECK: popcntd 2, 3 0x7c 0x62 0x03 0xf4 +# CHECK: cmpb 7, 21, 4 +0x7e 0xa7 0x23 0xf8 + # CHECK: rlwinm 2, 3, 4, 5, 6 0x54 0x62 0x21 0x4c diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s index d483f9df6fe..6d09dfd8e5f 100644 --- a/test/MC/PowerPC/ppc64-encoding.s +++ b/test/MC/PowerPC/ppc64-encoding.s @@ -612,7 +612,9 @@ # CHECK-BE: cntlzw. 2, 3 # encoding: [0x7c,0x62,0x00,0x35] # CHECK-LE: cntlzw. 2, 3 # encoding: [0x35,0x00,0x62,0x7c] cntlzw. 2, 3 -# FIXME: cmpb 2, 3, 4 + cmpb 7, 21, 4 +# CHECK-BE: cmpb 7, 21, 4 # encoding: [0x7e,0xa7,0x23,0xf8] +# CHECK-LE: cmpb 7, 21, 4 # encoding: [0xf8,0x23,0xa7,0x7e] # FIXME: popcntb 2, 3 # CHECK-BE: popcntw 2, 3 # encoding: [0x7c,0x62,0x02,0xf4] # CHECK-LE: popcntw 2, 3 # encoding: [0xf4,0x02,0x62,0x7c]