return MadeChange;
}
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (I && I->hasOneUse() &&
+ TTI->getUserCost(I) == TargetTransformInfo::TCC_Expensive)
+ return true;
+
+ return false;
+}
+
/// Returns true if a SelectInst should be turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ SelectInst *SI) {
// FIXME: This should use the same heuristics as IfConversion to determine
// whether a select is better represented as a branch. This requires that
// branch probability metadata is preserved for the select, which is not the
// on a load from memory. But if the load is used more than once, do not
// change the select to a branch because the load is probably needed
// regardless of whether the branch is taken or not.
- return ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+ if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
+ return true;
+
+ // If either operand of the select is expensive and only needed on one side
+ // of the select, we should form a branch.
+ if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+ sinkSelectOperand(TTI, SI->getFalseValue()))
+ return true;
+
+ return false;
}
// We have efficient codegen support for the select instruction.
// Check if it is profitable to keep this 'select'.
if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(SI))
+ !isFormingBranchFromSelectProfitable(TTI, SI))
return false;
}
ModifiedDT = true;
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // br i1 %cmp, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // In addition, we may sink instructions that produce %c or %d from
+ // the entry block into the destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
// First, we split the block containing the select into 2 blocks.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
- BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
-
- // Create a new block serving as the landing pad for the branch.
- BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
- NextBlock->getParent(), NextBlock);
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- // Move the unconditional branch from the block with the select in it into our
- // landing pad block.
+ // Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(NextBlock, SmallBlock);
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr;
+ BasicBlock *FalseBlock = nullptr;
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+ TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+ FalseInst->moveBefore(FalseBranch);
+ }
+
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ BranchInst::Create(EndBlock, FalseBlock);
+ }
// Insert the real conditional branch based on the original condition.
- BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ if (TrueBlock == nullptr) {
+ BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+ FalseBlock = StartBlock;
+ } else {
+ BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+ }
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", &NextBlock->front());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), StartBlock);
- PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ PN->addIncoming(SI->getTrueValue(), TrueBlock);
+ PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
SI->replaceAllUsesWith(PN);
SI->eraseFromParent();
--- /dev/null
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
+entry:
+ %load = load double, double* %b, align 8
+ %cmp = fcmp olt double %load, %a
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK: %load = load double, double* %b, align 8
+; CHECK: %cmp = fcmp olt double %load, %a
+; CHECK: br i1 %cmp, label %select.end, label %select.false
+; CHECK: select.false:
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ]
+; CHECK: ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK: %cmp = fcmp ogt float %a, 1.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK: select.true.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 3.0
+ %sel = select i1 %cmp, float 4.0, float %div
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK: %cmp = fcmp ogt float %a, 3.0
+; CHECK: br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK: select.false.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+ %div1 = fdiv float %a, %b
+ %div2 = fdiv float %b, %a
+ %cmp = fcmp ogt float %a, 5.0
+ %sel = select i1 %cmp, float %div1, float %div2
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK: %cmp = fcmp ogt float %a, 5.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK: select.true.sink:
+; CHECK: %div1 = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.false.sink:
+; CHECK: %div2 = fdiv float %b, %a
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+ %add = fadd float %a, %b
+ %cmp = fcmp ogt float 6.0, %a
+ %sel = select i1 %cmp, float %add, float 7.0
+ ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 7.0
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %add = fadd float %div, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %add, float 8.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 8.0
+}
+