bar ();
}
-void neg_eq_cst(unsigned int a) {
-if (-a == 123)
-bar();
-}
-
All should simplify to a single comparison. All of these are
currently not optimized with "clang -emit-llvm-bc | opt
-std-compile-opts".
//===---------------------------------------------------------------------===//
-Switch lowering generates less than ideal code for the following switch:
define void @a(i32 %x) nounwind {
entry:
switch i32 %x, label %if.end [
Generated code on x86-64 (other platforms give similar results):
a:
cmpl $5, %edi
- ja .LBB0_2
- movl %edi, %eax
- movl $47, %ecx
- btq %rax, %rcx
- jb .LBB0_3
+ ja LBB2_2
+ cmpl $4, %edi
+ jne LBB2_3
.LBB0_2:
ret
.LBB0_3:
jmp foo # TAILCALL
-The movl+movl+btq+jb could be simplified to a cmpl+jne.
-
-Or, if we wanted to be really clever, we could simplify the whole thing to
+If we wanted to be really clever, we could simplify the whole thing to
something like the following, which eliminates a branch:
xorl $1, %edi
cmpl $4, %edi
//===---------------------------------------------------------------------===//
-We should optimize this:
+This code:
- %tmp = load i16* %arrayidx, align 4, !tbaa !0
- %A = trunc i16 %tmp to i8
- %cmp = icmp eq i8 %A, 127
- %B.mask = and i16 %tmp, -256
- %cmp7 = icmp eq i16 %B.mask, 17664
- %or.cond = and i1 %cmp, %cmp7
- br i1 %or.cond, label %land.lhs.true9, label %if.end
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:1;
+int f4:29;
+} t1;
-into:
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:30;
+} t2;
- %tmp = load i16* %arrayidx, align 4, !tbaa !0
- %0 = icmp eq i16 %tmp, 17791
- br i1 %0, label %land.lhs.true9, label %if.end
-
-with this patch:
-Index: InstCombine/InstCombineCompares.cpp
-===================================================================
---- InstCombine/InstCombineCompares.cpp (revision 129500)
-+++ InstCombine/InstCombineCompares.cpp (working copy)
-@@ -2506,6 +2506,18 @@
- return &I;
- }
- }
-+
-+ // Transform "icmp eq (trunc X), cst" to "icmp (and X, mask), cst"
-+ if (Op0->hasOneUse() && match(Op0, m_Trunc(m_Value(A))) &&
-+ isa<ConstantInt>(Op1)) {
-+ APInt MaskV = APInt::getLowBitsSet(A->getType()->getPrimitiveSizeInBits(),
-+ Op0->getType()->getPrimitiveSizeInBits());
-+ Value *Mask =
-+ Builder->CreateAnd(A, ConstantInt::get(A->getContext(), MaskV));
-+ return new ICmpInst(I.getPredicate(), Mask,
-+ ConstantExpr::getZExt(cast<ConstantInt>(Op1),
-+ Mask->getType()));
-+ }
- }
-
- {
+t1 s1;
+t2 s2;
+
+void func1(void)
+{
+s1.f1 = s2.f1;
+s1.f2 = s2.f2;
+}
+
+Compiles into this IR (on x86-64 at least):
+
+%struct.t1 = type { i8, [3 x i8] }
+@s2 = global %struct.t1 zeroinitializer, align 4
+@s1 = global %struct.t1 zeroinitializer, align 4
+define void @func1() nounwind ssp noredzone {
+entry:
+ %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4
+ %bf.val.sext5 = and i32 %0, 1
+ %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4
+ %2 = and i32 %1, -4
+ %3 = or i32 %2, %bf.val.sext5
+ %bf.val.sext26 = and i32 %0, 2
+ %4 = or i32 %3, %bf.val.sext26
+ store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4
+ ret void
+}
+
+The two or/and's should be merged into one each.
+//===---------------------------------------------------------------------===//
+
+Machine level code hoisting can be useful in some cases. For example, PR9408
+is about:
-Not having this is blocking resolving PR6627.
+typedef union {
+ void (*f1)(int);
+ void (*f2)(long);
+} funcs;
+
+void foo(funcs f, int which) {
+ int a = 5;
+ if (which) {
+ f.f1(a);
+ } else {
+ f.f2(a);
+ }
+}
+
+which we compile to:
+
+foo: # @foo
+# BB#0: # %entry
+ pushq %rbp
+ movq %rsp, %rbp
+ testl %esi, %esi
+ movq %rdi, %rax
+ je .LBB0_2
+# BB#1: # %if.then
+ movl $5, %edi
+ callq *%rax
+ popq %rbp
+ ret
+.LBB0_2: # %if.else
+ movl $5, %edi
+ callq *%rax
+ popq %rbp
+ ret
+
+Note that bb1 and bb2 are the same. This doesn't happen at the IR level
+because one call is passing an i32 and the other is passing an i64.
//===---------------------------------------------------------------------===//
+I see this sort of pattern in 176.gcc in a few places (e.g. the start of
+store_bit_field). The rem should be replaced with a multiply and subtract:
+
+ %3 = sdiv i32 %A, %B
+ %4 = srem i32 %A, %B
+
+Similarly for udiv/urem. Note that this shouldn't be done on X86 or ARM,
+which can do this in a single operation (instruction or libcall). It is
+probably best to do this in the code generator.
+
+//===---------------------------------------------------------------------===//