X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FREADME.txt;h=a9aab86abdacb830e02f8c364723e8ed94f25541;hb=cae1ea691d79ce786e9209cb5917036f42f13d7c;hp=ffe3fa477b9eac6b7aeadbce171b6ed3d35b7ad8;hpb=325eeb1cd7c3247ead1513204bc9cf4b6e883653;p=oota-llvm.git diff --git a/lib/Target/README.txt b/lib/Target/README.txt index ffe3fa477b9..a9aab86abda 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,22 +2,6 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// -With the recent changes to make the implicit def/use set explicit in -machineinstrs, we should change the target descriptions for 'call' instructions -so that the .td files don't list all the call-clobbered registers as implicit -defs. Instead, these should be added by the code generator (e.g. on the dag). - -This has a number of uses: - -1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions - for their different impdef sets. -2. Targets with multiple calling convs (e.g. x86) which have different clobber - sets don't need copies of call instructions. -3. 'Interprocedural register allocation' can be done to reduce the clobber sets - of calls. - -//===---------------------------------------------------------------------===// - We should recognized various "overflow detection" idioms and translate them into llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: @@ -168,7 +152,7 @@ stuff too. //===---------------------------------------------------------------------===// -For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal +For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal to the type size. It works but can be overly conservative as the alignment of specific vector types are target dependent. @@ -278,22 +262,7 @@ unsigned countbits_slow(unsigned v) { c += v & 1; return c; } -unsigned countbits_fast(unsigned v){ - unsigned c; - for (c = 0; v; c++) - v &= v - 1; // clear the least significant bit set - return c; -} -BITBOARD = unsigned long long -int PopCnt(register BITBOARD a) { - register int c=0; - while(a) { - c++; - a &= a - 1; - } - return c; -} unsigned int popcount(unsigned int input) { unsigned int count = 0; for (unsigned int i = 0; i < 4 * 8; i++) @@ -870,11 +839,6 @@ rshift_gt (unsigned int a) bar (); } -void neg_eq_cst(unsigned int a) { -if (-a == 123) -bar(); -} - All should simplify to a single comparison. All of these are currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -966,6 +930,31 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +int g(int x) { return (x - 10) < 0; } +Should combine to "x <= 9" (the sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int g(int x) { return (x + 10) < 0; } +Should combine to "x < -10" (the add has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int f(int i, int j) { return i < j + 1; } +int g(int i, int j) { return j > i - 1; } +Should combine to "i <= j" (the add/sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } +The & 15 part should be optimized away, it doesn't change the result. Currently +not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 @@ -1767,7 +1756,6 @@ case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// -Switch lowering generates less than ideal code for the following switch: define void @a(i32 %x) nounwind { entry: switch i32 %x, label %if.end [ @@ -1788,19 +1776,15 @@ declare void @foo() Generated code on x86-64 (other platforms give similar results): a: cmpl $5, %edi - ja .LBB0_2 - movl %edi, %eax - movl $47, %ecx - btq %rax, %rcx - jb .LBB0_3 + ja LBB2_2 + cmpl $4, %edi + jne LBB2_3 .LBB0_2: ret .LBB0_3: jmp foo # TAILCALL -The movl+movl+btq+jb could be simplified to a cmpl+jne. - -Or, if we wanted to be really clever, we could simplify the whole thing to +If we wanted to be really clever, we could simplify the whole thing to something like the following, which eliminates a branch: xorl $1, %edi cmpl $4, %edi @@ -2305,4 +2289,66 @@ The two or/and's should be merged into one each. //===---------------------------------------------------------------------===// +Machine level code hoisting can be useful in some cases. For example, PR9408 +is about: + +typedef union { + void (*f1)(int); + void (*f2)(long); +} funcs; + +void foo(funcs f, int which) { + int a = 5; + if (which) { + f.f1(a); + } else { + f.f2(a); + } +} + +which we compile to: + +foo: # @foo +# BB#0: # %entry + pushq %rbp + movq %rsp, %rbp + testl %esi, %esi + movq %rdi, %rax + je .LBB0_2 +# BB#1: # %if.then + movl $5, %edi + callq *%rax + popq %rbp + ret +.LBB0_2: # %if.else + movl $5, %edi + callq *%rax + popq %rbp + ret + +Note that bb1 and bb2 are the same. This doesn't happen at the IR level +because one call is passing an i32 and the other is passing an i64. +//===---------------------------------------------------------------------===// + +I see this sort of pattern in 176.gcc in a few places (e.g. the start of +store_bit_field). The rem should be replaced with a multiply and subtract: + + %3 = sdiv i32 %A, %B + %4 = srem i32 %A, %B + +Similarly for udiv/urem. Note that this shouldn't be done on X86 or ARM, +which can do this in a single operation (instruction or libcall). It is +probably best to do this in the code generator. + +//===---------------------------------------------------------------------===// + +unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; } +should fold to (x & y) == 0. + +//===---------------------------------------------------------------------===// + +unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } +should fold to x > y. + +//===---------------------------------------------------------------------===//