STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
namespace {
uint64_t TableSize,
ConstantInt *Offset,
const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
- Constant *DefaultValue);
+ Constant *DefaultValue,
+ const TargetData *TD);
/// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
private:
// Depending on the contents of the table, it can be represented in
// different ways.
// store that single value and return it for each lookup.
SingleValueKind,
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
// The table is stored as an array of values. Values are retrieved by load
// instructions from the table.
ArrayKind
// For SingleValueKind, this is the single value.
Constant *SingleValue;
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
// For ArrayKind, this is the array.
GlobalVariable *Array;
};
uint64_t TableSize,
ConstantInt *Offset,
const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
- Constant *DefaultValue) {
+ Constant *DefaultValue,
+ const TargetData *TD) {
assert(Values.size() && "Can't build lookup table without values.");
assert(TableSize >= Values.size() && "Can't fit values in table.");
return;
}
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ TableInt |= cast<ConstantInt>(TableContents[I - 1])->getZExtValue();
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
// Store the table in an array.
ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
switch (Kind) {
case SingleValueKind:
return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Index;
+ IntegerType *IndexTy = cast<IntegerType>(Index->getType());
+ if (IndexTy->getBitWidth() < MapTy->getBitWidth())
+ ShiftAmt = Builder.CreateZExt(ShiftAmt, MapTy, "switch.zext");
+ else if (IndexTy->getBitWidth() > MapTy->getBitWidth())
+ ShiftAmt = Builder.CreateTrunc(ShiftAmt, MapTy, "switch.trunc");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
case ArrayKind: {
Value *GEPIndices[] = { Builder.getInt32(0), Index };
Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
llvm_unreachable("Unknown lookup table kind!");
}
+bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
/// for this switch, based on the number of caes, size of the table and the
/// types of the results.
static bool ShouldBuildLookupTable(SwitchInst *SI,
- uint64_t TableSize) {
+ uint64_t TableSize,
+ const TargetData *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
// The table density should be at least 40%. This is the same criterion as for
// jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Find the best cut-off.
if (SI->getNumCases() * 10 >= TableSize * 4)
return true;
- return false;
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
}
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder,
+ const TargetData* TD) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
- // FIXME: If the results are all integers and the lookup table would fit in a
- // target-legal register, we should store them as a bitmap and use shift/mask
- // to look up the result.
-
// FIXME: This creates arrays of GEPs to constant strings, which means each
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
return false;
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- if (!ShouldBuildLookupTable(SI, TableSize))
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
return false;
// Create the BB that does the lookups.
PHINode *PHI = PHIs[I];
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
- DefaultResults[PHI]);
+ DefaultResults[PHI], TD);
Value *Result = Table.BuildLookup(TableIndex, Builder);
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
- if (SwitchToLookupTable(SI, Builder))
+ if (SwitchToLookupTable(SI, Builder, TD))
return SimplifyCFG(BB) | true;
return false;
; The table for @f
; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
-; The int table for @h
-; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
-
; The float table for @h
-; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The table for @foostring
-; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
; The table for @earlyreturncrash
-; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup.
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i8* %switch.gep
-; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
+; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
+; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load float* %switch.gep
; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog:
-; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
-; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
; CHECK-NEXT: ret void
}
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load
}
; CHECK: @earlyreturncrash
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: sw.epilog:
; CHECK-NEXT: ret i32 7
}
+
+
+; Example 7 from http://blog.regehr.org/archives/320
+; It is not dense enough for a regular table, but the results
+; can be packed into a bitmap.
+
+define i32 @crud(i8 zeroext %c) {
+entry:
+ %cmp = icmp ult i8 %c, 33
+ br i1 %cmp, label %lor.end, label %switch.early.test
+
+switch.early.test:
+ switch i8 %c, label %lor.rhs [
+ i8 92, label %lor.end
+ i8 62, label %lor.end
+ i8 60, label %lor.end
+ i8 59, label %lor.end
+ i8 58, label %lor.end
+ i8 46, label %lor.end
+ i8 44, label %lor.end
+ i8 34, label %lor.end
+ i8 39, label %switch.edge
+ ]
+
+switch.edge: br label %lor.end
+lor.rhs: br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %switch.early.test ],
+ [ false, %lor.rhs ],
+ [ true, %entry ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.edge ]
+ %lor.ext = zext i1 %0 to i32
+ ret i32 %lor.ext
+
+; CHECK: @crud
+; CHECK: entry:
+; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
+; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
+; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.zext = zext i8 %switch.tableidx to i59
+; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.zext, 1
+; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
+; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
+; CHECK-NEXT: ret i32 %lor.ext
+}