From b2c4bf3ff696577e8f00930cabe90d80f275fa80 Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Wed, 8 Jun 2005 04:14:27 +0000 Subject: [PATCH] Handle some more real world cases of rlwimi. These don't come up that regularly in "normal" code, but for things like software graphics, they make a big difference. For the following code: unsigned short Trans16Bit(unsigned srcA,unsigned srcB,unsigned alpha) { unsigned tmpA,tmpB,mixed; tmpA = ((srcA & 0x03E0) << 15) | (srcA & 0x7C1F); tmpB = ((srcB & 0x03E0) << 15) | (srcB & 0x7C1F); mixed = (tmpA * alpha) + (tmpB * (32 - alpha)); return ((mixed >> 5) & 0x7C1F) | ((mixed >> 20) & 0x03E0); } We now generate: _Trans16Bit: .LBB_Trans16Bit_0: ; entry andi. r2, r4, 31775 rlwimi r2, r4, 15, 7, 11 subfic r4, r5, 32 mullw r2, r2, r4 andi. r4, r3, 31775 rlwimi r4, r3, 15, 7, 11 mullw r3, r4, r5 add r2, r2, r3 srwi r3, r2, 5 andi. r3, r3, 31775 rlwimi r3, r2, 12, 22, 26 blr Instead of: _Trans16Bit: .LBB_Trans16Bit_0: ; entry slwi r2, r4, 15 rlwinm r2, r2, 0, 7, 11 andi. r4, r4, 31775 or r2, r2, r4 subfic r4, r5, 32 mullw r2, r2, r4 slwi r4, r3, 15 rlwinm r4, r4, 0, 7, 11 andi. r3, r3, 31775 or r3, r4, r3 mullw r3, r3, r5 add r2, r2, r3 srwi r3, r2, 5 andi. r3, r3, 31775 srwi r2, r2, 20 rlwimi r3, r2, 0, 22, 26 blr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelPattern.cpp | 52 +++++++++++++++++++++------ 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelPattern.cpp b/lib/Target/PowerPC/PPCISelPattern.cpp index 410c62e661a..750161caf8a 100644 --- a/lib/Target/PowerPC/PPCISelPattern.cpp +++ b/lib/Target/PowerPC/PPCISelPattern.cpp @@ -982,8 +982,12 @@ void ISel::MoveCRtoGPR(unsigned CCReg, bool Inv, unsigned Idx, unsigned Result){ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) { bool IsRotate = false; unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, Amount = 0; - unsigned Op0Opc = OR.getOperand(0).getOpcode(); - unsigned Op1Opc = OR.getOperand(1).getOpcode(); + + SDOperand Op0 = OR.getOperand(0); + SDOperand Op1 = OR.getOperand(1); + + unsigned Op0Opc = Op0.getOpcode(); + unsigned Op1Opc = Op1.getOpcode(); // Verify that we have the correct opcodes if (ISD::SHL != Op0Opc && ISD::SRL != Op0Opc && ISD::AND != Op0Opc) @@ -993,7 +997,7 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) { // Generate Mask value for Target if (ConstantSDNode *CN = - dyn_cast(OR.getOperand(0).getOperand(1).Val)) { + dyn_cast(Op0.getOperand(1).Val)) { switch(Op0Opc) { case ISD::SHL: TgtMask <<= (unsigned)CN->getValue(); break; case ISD::SRL: TgtMask >>= (unsigned)CN->getValue(); break; @@ -1005,7 +1009,7 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) { // Generate Mask value for Insert if (ConstantSDNode *CN = - dyn_cast(OR.getOperand(1).getOperand(1).Val)) { + dyn_cast(Op1.getOperand(1).Val)) { switch(Op1Opc) { case ISD::SHL: Amount = CN->getValue(); @@ -1026,27 +1030,55 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) { return false; } + unsigned Tmp3 = 0; + + // If both of the inputs are ANDs and one of them has a logical shift by + // constant as its input, make that the inserted value so that we can combine + // the shift into the rotate part of the rlwimi instruction + if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { + if (Op1.getOperand(0).getOpcode() == ISD::SHL || + Op1.getOperand(0).getOpcode() == ISD::SRL) { + if (ConstantSDNode *CN = + dyn_cast(Op1.getOperand(0).getOperand(1).Val)) { + Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ? + CN->getValue() : 32 - CN->getValue(); + Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0)); + } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL || + Op0.getOperand(0).getOpcode() == ISD::SRL) { + if (ConstantSDNode *CN = + dyn_cast(Op0.getOperand(0).getOperand(1).Val)) { + std::swap(Op0, Op1); + std::swap(TgtMask, InsMask); + Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ? + CN->getValue() : 32 - CN->getValue(); + Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0)); + } + } + } + // Verify that the Target mask and Insert mask together form a full word mask // and that the Insert mask is a run of set bits (which implies both are runs // of set bits). Given that, Select the arguments and generate the rlwimi // instruction. unsigned MB, ME; - if (((TgtMask ^ InsMask) == 0xFFFFFFFF) && IsRunOfOnes(InsMask, MB, ME)) { + if (((TgtMask & InsMask) == 0) && IsRunOfOnes(InsMask, MB, ME)) { unsigned Tmp1, Tmp2; + bool fullMask = (TgtMask ^ InsMask) == 0xFFFFFFFF; // Check for rotlwi / rotrwi here, a special case of bitfield insert // where both bitfield halves are sourced from the same value. - if (IsRotate && + if (IsRotate && fullMask && OR.getOperand(0).getOperand(0) == OR.getOperand(1).getOperand(0)) { Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0)); BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Amount) .addImm(0).addImm(31); return true; } - if (Op0Opc == ISD::AND) - Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0)); + if (Op0Opc == ISD::AND && fullMask) + Tmp1 = SelectExpr(Op0.getOperand(0)); else - Tmp1 = SelectExpr(OR.getOperand(0)); - Tmp2 = SelectExpr(OR.getOperand(1).getOperand(0)); + Tmp1 = SelectExpr(Op0); + Tmp2 = Tmp3 ? Tmp3 : SelectExpr(Op1.getOperand(0)); BuildMI(BB, PPC::RLWIMI, 5, Result).addReg(Tmp1).addReg(Tmp2) .addImm(Amount).addImm(MB).addImm(ME); return true; -- 2.34.1