From 9f11e5da345cac0ed3dece4c1c3dee153382fa01 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 6 Sep 2015 04:17:30 +0000 Subject: [PATCH] [PowerPC] Don't commute trivial rlwimi instructions To commute a trivial rlwimi instructions (meaning one with a full mask and zero shift), we'd need to ability to form an all-zero mask (instead of an all-one mask) using rlwimi. We can't represent this, however, and we'll miscompile code if we try. The code quality problem that this highlights (that SDAG simplification can lead to us generating an ISD::OR node with a constant zero LHS) will be fixed as a follow-up. Fixes PR24719. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246937 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 5 + .../PowerPC/no-rlwimi-trivial-commute.mir | 92 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 79c399b71ae..70e223d78ad 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -571,6 +571,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned MB = MI->getOperand(4).getImm(); unsigned ME = MI->getOperand(5).getImm(); + // We can't commute a trivial mask (there is no way to represent an all-zero + // mask). + if (MB == 0 && ME == 31) + return nullptr; + if (NewMI) { // Create a new instruction. unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); diff --git a/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir new file mode 100644 index 00000000000..5c998d09a3d --- /dev/null +++ b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir @@ -0,0 +1,92 @@ +# RUN: llc -start-after=dead-mi-elimination -stop-after=twoaddressinstruction -o /dev/null %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64-unknown-linux-gnu" + + @d = global i32 15, align 4 + @b = global i32* @d, align 8 + @a = common global i32 0, align 4 + + ; Function Attrs: nounwind + define signext i32 @main() #0 { + entry: + %0 = load i32*, i32** @b, align 8 + %1 = load i32, i32* @a, align 4 + %lnot = icmp eq i32 %1, 0 + %lnot.ext = zext i1 %lnot to i32 + %shr.i = lshr i32 2072, %lnot.ext + %call.lobit = lshr i32 %shr.i, 7 + %2 = and i32 %call.lobit, 1 + %3 = load i32, i32* %0, align 4 + %or = or i32 %2, %3 + store i32 %or, i32* %0, align 4 + %4 = load i32, i32* @a, align 4 + %lnot.1 = icmp eq i32 %4, 0 + %lnot.ext.1 = zext i1 %lnot.1 to i32 + %shr.i.1 = lshr i32 2072, %lnot.ext.1 + %call.lobit.1 = lshr i32 %shr.i.1, 7 + %5 = and i32 %call.lobit.1, 1 + %or.1 = or i32 %5, %or + store i32 %or.1, i32* %0, align 4 + ret i32 %or.1 + } + + attributes #0 = { nounwind "target-cpu"="ppc64" } + +... +--- +name: main +alignment: 2 +exposesReturnsTwice: false +hasInlineAsm: false +isSSA: true +tracksRegLiveness: true +tracksSubRegLiveness: false +registers: + - { id: 0, class: g8rc_and_g8rc_nox0 } + - { id: 1, class: g8rc_and_g8rc_nox0 } + - { id: 2, class: gprc } + - { id: 3, class: gprc } + - { id: 4, class: gprc } + - { id: 5, class: g8rc_and_g8rc_nox0 } + - { id: 6, class: g8rc_and_g8rc_nox0 } + - { id: 7, class: gprc } + - { id: 8, class: gprc } + - { id: 9, class: gprc } + - { id: 10, class: g8rc } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x2 + + %0 = ADDIStocHA %x2, @b + %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b) + %2 = LWZ 0, %1 :: (load 4 from %ir.0) + %3 = LI 0 + %4 = RLWIMI %3, killed %2, 0, 0, 31 + ; CHECK-LABEL: name: main + ; CHECK: %[[REG1:[0-9]+]] = LI 0 + ; CHECK: %[[REG2:[0-9]+]] = COPY %[[REG1]] + ; CHECK: %[[REG2]] = RLWIMI %[[REG2]], killed %2, 0, 0, 31 + %8 = RLWIMI %3, %4, 0, 0, 31 + STW %4, 0, %1 :: (store 4 into %ir.0) + %10 = EXTSW_32_64 %8 + STW %8, 0, %1 :: (store 4 into %ir.0) + %x3 = COPY %10 + BLR8 implicit %x3, implicit %lr8, implicit %rm + +... -- 2.34.1