From 4193a13da473bd0f1087ab58097e23d6ce60a3ee Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Tue, 25 Aug 2015 21:34:38 +0000 Subject: [PATCH] Remove the final bit test during lowering switch statement if all cases in bit test cover a contiguous range. When lowering switch statement, if bit tests are used then LLVM will always generates a jump to the default statement in the last bit test. However, this is not necessary when all cases in bit tests cover a contiguous range. This is because when generating the bit tests header MBB, there is a range check that guarantees cases in bit tests won't go outside of [low, high], where low and high are minimum and maximum case values in the bit tests. This patch checks if this is the case and then doesn't emit jump to default statement and hence saves a bit test and a branch. Differential Revision: http://reviews.llvm.org/D12249 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245976 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 22 +++-- .../SelectionDAG/SelectionDAGBuilder.h | 5 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 33 +++++--- test/CodeGen/X86/switch.ll | 81 ++++++++++++++++++- 4 files changed, 116 insertions(+), 25 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4d12df87137..c0a1480dbd3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7761,12 +7761,22 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; @@ -7809,8 +7819,8 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI)); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f71190d6375..40f476c355e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -284,17 +284,18 @@ private: struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, + unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock* P, MachineBasicBlock* D, BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6b5fcb095e2..7419ce547e0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1499,25 +1499,32 @@ SelectionDAGISel::FinishBasicBlock() { FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + NextMBB = SDB->BitTestCases[i].Default; + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledWeight, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll index 748fd6f238b..d1222d04010 100644 --- a/test/CodeGen/X86/switch.ll +++ b/test/CodeGen/X86/switch.ll @@ -107,7 +107,6 @@ entry: i32 2, label %bb2 i32 5, label %bb2 i32 8, label %bb2 - ] bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return @@ -116,6 +115,10 @@ return: ret void ; This could be lowered as a jump table, but bit tests is more efficient. ; CHECK-LABEL: bt_is_better +; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8]. +; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be +; in 2,5,8. +; ; 73 = 2^0 + 2^3 + 2^6 ; CHECK: movl $73 ; CHECK: btl @@ -123,7 +126,74 @@ return: ret void ; CHECK: movl $146 ; CHECK: btl ; 292 = 2^2 + 2^5 + 2^8 -; CHECK: movl $292 +; CHECK-NOT: movl $292 +; CHECK-NOT: btl +} + +define void @bt_is_better2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 3, label %bb0 + i32 6, label %bb0 + i32 1, label %bb1 + i32 4, label %bb1 + i32 7, label %bb1 + i32 2, label %bb2 + i32 8, label %bb2 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +return: ret void + +; This will also be lowered as bit test, but as the range [0,8] is not fully +; covered (5 missing), the default statement can be jumped to and we end up +; with one more branch. +; CHECK-LABEL: bt_is_better2 +; +; 73 = 2^0 + 2^3 + 2^6 +; CHECK: movl $73 +; CHECK: btl +; 146 = 2^1 + 2^4 + 2^7 +; CHECK: movl $146 +; CHECK: btl +; 260 = 2^2 + 2^8 +; CHECK: movl $260 +; CHECK: btl +} + +define void @bt_is_better3(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 10, label %bb0 + i32 13, label %bb0 + i32 16, label %bb0 + i32 11, label %bb1 + i32 14, label %bb1 + i32 17, label %bb1 + i32 12, label %bb2 + i32 18, label %bb2 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +return: ret void + +; We don't have to subtract 10 from the case value to let the range become +; [0, 8], as each value in the range [10, 18] can be represented by bits in a +; word. Then we still need a branch to jump to the default statement for the +; range [0, 10). +; CHECK-LABEL: bt_is_better3 +; +; 74752 = 2^10 + 2^13 + 2^16 +; CHECK: movl $74752 +; CHECK: btl +; 149504 = 2^11 + 2^14 + 2^17 +; CHECK: movl $149504 +; CHECK: btl +; 266240 = 2^12 + 2^15 + 2^18 +; CHECK: movl $266240 ; CHECK: btl } @@ -410,6 +480,9 @@ return: ret void ; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so ; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12, ; but the latter set has more cases, so should be tested for earlier. +; The bit test on 0,3,6 is unnecessary as all cases cover the rage [0, 9]. +; The range check guarantees that cases other than 1,4,7 and 2,5,8,9 must be +; in 0,3,6. ; CHECK-LABEL: bt_order_by_weight ; 146 = 2^1 + 2^4 + 2^7 @@ -419,8 +492,8 @@ return: ret void ; CHECK: movl $804 ; CHECK: btl ; 73 = 2^0 + 2^3 + 2^6 -; CHECK: movl $73 -; CHECK: btl +; CHECK-NOT: movl $73 +; CHECK-NOT: btl } !1 = !{!"branch_weights", -- 2.34.1