From a421ac689f50cee8f7258c9110b69de00bd3ddba Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 6 Dec 2014 01:28:50 +0000 Subject: [PATCH] SelectionDAG switch lowering: Replace unreachable default with most popular case. This can significantly reduce the size of the switch, allowing for more efficient lowering. I also worked with the idea of exploiting unreachable defaults by omitting the range check for jump tables, but always ended up with a non-neglible binary size increase. It might be worth looking into some more. SimplifyCFG currently does this transformation, but I'm working towards changing that so we can optimize harder based on unreachable defaults. Differential Revision: http://reviews.llvm.org/D6510 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223566 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 57 +++++++++++++------ .../X86/2013-10-14-FastISel-incorrect-vreg.ll | 6 +- test/CodeGen/X86/asm-label.ll | 6 +- test/CodeGen/X86/switch-jump-table.ll | 52 +++++++++++++++++ 4 files changed, 98 insertions(+), 23 deletions(-) create mode 100644 test/CodeGen/X86/switch-jump-table.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c3b197e97eb..a5fdfc55487 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2698,32 +2698,55 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (SwitchMBB + 1 != FuncInfo.MF->end()) NextBlock = SwitchMBB + 1; + + // Create a vector of Cases, sorted so that we can efficiently create a binary + // search tree from them. + CaseVector Cases; + Clusterify(Cases, SI); + + // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - // If there is only the default destination, branch to it if it is not the - // next basic block. Otherwise, just fall through. - if (!SI.getNumCases()) { + if (isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && + !Cases.empty()) { + // Replace an unreachable default destination with the most popular case + // destination. + DenseMap Popularity; + uint64_t MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } + + // Set new default. + assert(MaxPop > 0); + assert(MaxBB); + Default = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the default. + Cases.erase(std::remove_if(Cases.begin(), Cases.end(), + [&](const Case &C) { return C.BB == Default; }), + Cases.end()); + } + + // If there is only the default destination, go there directly. + if (Cases.empty()) { // Update machine-CFG edges. SwitchMBB->addSuccessor(Default); // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default))); - + if (Default != NextBlock) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Default))); + } return; } - // If there are any non-default case statements, create a vector of Cases - // representing each one, and sort the vector so that we can efficiently - // create a binary search tree from them. - CaseVector Cases; - Clusterify(Cases, SI); - - // Get the Value to be switched on and default basic blocks, which will be - // inserted into CaseBlock records, representing basic blocks in the binary - // search tree. + // Get the Value to be switched on. const Value *SV = SI.getCondition(); // Push the initial CaseRec onto the worklist diff --git a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll index 10dc927200b..9cd150a2f56 100644 --- a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll +++ b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll @@ -41,7 +41,7 @@ entry: i1 false, label %label_end ] default: - unreachable + br label %label_end label_true: br label %label_end @@ -80,7 +80,7 @@ entry: i1 false, label %label_end ] default: - unreachable + br label %label_end label_true: br label %label_end @@ -119,7 +119,7 @@ entry: i1 false, label %label_end ] default: - unreachable + br label %label_end label_true: br label %label_end diff --git a/test/CodeGen/X86/asm-label.ll b/test/CodeGen/X86/asm-label.ll index 1fc6e2eaf2b..1da66e74d34 100644 --- a/test/CodeGen/X86/asm-label.ll +++ b/test/CodeGen/X86/asm-label.ll @@ -24,7 +24,7 @@ if.end: ; preds = %if.then br label %cleanup cleanup: ; preds = %if.end, %if.then9 - switch i32 undef, label %unreachable [ + switch i32 undef, label %default [ i32 0, label %cleanup.cont i32 1, label %if.end11 ] @@ -35,6 +35,6 @@ cleanup.cont: ; preds = %cleanup if.end11: ; preds = %cleanup.cont, %cleanup, %land.lhs.true, %entry ret void -unreachable: ; preds = %cleanup - unreachable +default: ; preds = %cleanup + br label %if.end11 } diff --git a/test/CodeGen/X86/switch-jump-table.ll b/test/CodeGen/X86/switch-jump-table.ll new file mode 100644 index 00000000000..7ae45b56641 --- /dev/null +++ b/test/CodeGen/X86/switch-jump-table.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=x86 < %s | FileCheck %s + + +; An unreachable default destination is replaced with the most popular case label. + +define void @sum2(i32 %x, i32* %to) { +; CHECK-LABEL: sum2: +; CHECK: movl 4(%esp), [[REG:%e[a-z]{2}]] +; cmpl $3, [[REG]] +; CHECK: jbe .LBB0_1 +; CHECK: movl $4 +; CHECK: retl +; CHECK-LABEL: .LBB0_1: +; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4) + +entry: + switch i32 %x, label %default [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb4 + ] +bb0: + store i32 0, i32* %to + br label %exit +bb1: + store i32 1, i32* %to + br label %exit +bb2: + store i32 2, i32* %to + br label %exit +bb3: + store i32 3, i32* %to + br label %exit +bb4: + store i32 4, i32* %to + br label %exit +exit: + ret void +default: + unreachable + +; The jump table has four entries. +; CHECK-LABEL: .LJTI0_0: +; CHECK-NEXT: .long .LBB0_2 +; CHECK-NEXT: .long .LBB0_3 +; CHECK-NEXT: .long .LBB0_4 +; CHECK-NEXT: .long .LBB0_5 +; CHECK-NOT: .long +} -- 2.34.1