From: Matt Arsenault Date: Fri, 30 May 2014 18:34:43 +0000 (+0000) Subject: Make bitcast, extractelement, and insertelement considered cheap for speculation. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=c4e0611d86a6501fd987af04170ddecfe56fa794;p=oota-llvm.git Make bitcast, extractelement, and insertelement considered cheap for speculation. This helps more branches into selects. On R600, vectors are cheap and anything that helps remove branches is very good. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209914 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 150dbdd4ec4..e155daf6fcc 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -227,6 +227,9 @@ static unsigned ComputeSpeculationCost(const User *I) { case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: + case Instruction::BitCast: + case Instruction::ExtractElement: + case Instruction::InsertElement: return 1; // These are all cheap. case Instruction::Call: diff --git a/test/Transforms/SimplifyCFG/speculate-vector-ops.ll b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll new file mode 100644 index 00000000000..91972eb5dd9 --- /dev/null +++ b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -simplifycfg < %s | FileCheck %s + +define i32 @speculate_vector_extract(i32 %d, <4 x i32> %v) #0 { +; CHECK-LABEL: @speculate_vector_extract( +; CHECK-NOT: br +entry: + %conv = insertelement <4 x i32> undef, i32 %d, i32 0 + %conv2 = insertelement <4 x i32> %conv, i32 %d, i32 1 + %conv3 = insertelement <4 x i32> %conv2, i32 %d, i32 2 + %conv4 = insertelement <4 x i32> %conv3, i32 %d, i32 3 + %tmp6 = add nsw <4 x i32> %conv4, + %cmp = icmp eq <4 x i32> %tmp6, zeroinitializer + %cmp.ext = sext <4 x i1> %cmp to <4 x i32> + %tmp8 = extractelement <4 x i32> %cmp.ext, i32 0 + %tobool = icmp eq i32 %tmp8, 0 + br i1 %tobool, label %cond.else, label %cond.then + +return: ; preds = %cond.end28 + ret i32 %cond32 + +cond.then: ; preds = %entry + %tmp10 = extractelement <4 x i32> %v, i32 0 + br label %cond.end + +cond.else: ; preds = %entry + %tmp12 = extractelement <4 x i32> %v, i32 3 + br label %cond.end + +cond.end: ; preds = %cond.else, %cond.then + %cond = phi i32 [ %tmp10, %cond.then ], [ %tmp12, %cond.else ] + %tmp14 = extractelement <4 x i32> %cmp.ext, i32 1 + %tobool15 = icmp eq i32 %tmp14, 0 + br i1 %tobool15, label %cond.else17, label %cond.then16 + +cond.then16: ; preds = %cond.end + %tmp20 = extractelement <4 x i32> %v, i32 1 + br label %cond.end18 + +cond.else17: ; preds = %cond.end + br label %cond.end18 + +cond.end18: ; preds = %cond.else17, %cond.then16 + %cond22 = phi i32 [ %tmp20, %cond.then16 ], [ %cond, %cond.else17 ] + %tmp24 = extractelement <4 x i32> %cmp.ext, i32 2 + %tobool25 = icmp eq i32 %tmp24, 0 + br i1 %tobool25, label %cond.else27, label %cond.then26 + +cond.then26: ; preds = %cond.end18 + %tmp30 = extractelement <4 x i32> %v, i32 2 + br label %cond.end28 + +cond.else27: ; preds = %cond.end18 + br label %cond.end28 + +cond.end28: ; preds = %cond.else27, %cond.then26 + %cond32 = phi i32 [ %tmp30, %cond.then26 ], [ %cond22, %cond.else27 ] + br label %return +} + +attributes #0 = { nounwind }