X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FREADME.txt;h=f68cf0e40df0c89823eabf74bdde6bbe370cae8f;hb=7e9e36a23e07dfb0d7ceda3e76450073c0534f35;hp=2af2d61a6c89d406a64a3fb5deaa7d4d84aab994;hpb=cea03cdb6946841a951f6c622b117459ea5ca157;p=oota-llvm.git diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 2af2d61a6c8..f68cf0e40df 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -125,8 +125,7 @@ specific vector types are target dependent. //===---------------------------------------------------------------------===// -We should add 'unaligned load/store' nodes, and produce them from code like -this: +We should produce an unaligned load from code like this: v4sf example(float *P) { return (v4sf){P[0], P[1], P[2], P[3] }; @@ -167,52 +166,14 @@ if anyone cared enough about sincos. //===---------------------------------------------------------------------===// -Scalar Repl cannot currently promote this testcase to 'ret long cst': - - %struct.X = type { i32, i32 } - %struct.Y = type { %struct.X } - -define i64 @bar() { - %retval = alloca %struct.Y, align 8 - %tmp12 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 0 - store i32 0, i32* %tmp12 - %tmp15 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 1 - store i32 1, i32* %tmp15 - %retval.upgrd.1 = bitcast %struct.Y* %retval to i64* - %retval.upgrd.2 = load i64* %retval.upgrd.1 - ret i64 %retval.upgrd.2 -} - -it should be extended to do so. - -//===---------------------------------------------------------------------===// - --scalarrepl should promote this to be a vector scalar. - - %struct..0anon = type { <4 x float> } - -define void @test1(<4 x float> %V, float* %P) { - %u = alloca %struct..0anon, align 16 - %tmp = getelementptr %struct..0anon* %u, i32 0, i32 0 - store <4 x float> %V, <4 x float>* %tmp - %tmp1 = bitcast %struct..0anon* %u to [4 x float]* - %tmp.upgrd.1 = getelementptr [4 x float]* %tmp1, i32 0, i32 1 - %tmp.upgrd.2 = load float* %tmp.upgrd.1 - %tmp3 = mul float %tmp.upgrd.2, 2.000000e+00 - store float %tmp3, float* %P - ret void -} - -//===---------------------------------------------------------------------===// - Turn this into a single byte store with no load (the other 3 bytes are unmodified): -void %test(uint* %P) { - %tmp = load uint* %P - %tmp14 = or uint %tmp, 3305111552 - %tmp15 = and uint %tmp14, 3321888767 - store uint %tmp15, uint* %P +define void @test(i32* %P) { + %tmp = load i32* %P + %tmp14 = or i32 %tmp, 3305111552 + %tmp15 = and i32 %tmp14, 3321888767 + store i32 %tmp15, i32* %P ret void } @@ -634,32 +595,6 @@ once. //===---------------------------------------------------------------------===// -We should extend parameter attributes to capture more information about -pointer parameters for alias analysis. Some ideas: - -1. Add a "nocapture" attribute, which indicates that the callee does not store - the address of the parameter into a global or any other memory location - visible to the callee. This can be used to make basicaa and other analyses - more powerful. It is true for things like memcpy, strcat, and many other - things, including structs passed by value, most C++ references, etc. -2. Generalize readonly to be set on parameters. This is important mod/ref - info for the function, which is important for basicaa and others. It can - also be used by the inliner to avoid inserting a memcpy for byval - arguments when the function is inlined. - -These functions can be inferred by various analysis passes such as the -globalsmodrefaa pass. Note that getting #2 right is actually really tricky. -Consider this code: - -struct S; S G; -void caller(S byvalarg) { G.field = 1; ... } -void callee() { caller(G); } - -The fact that the caller does not modify byval arg is not enough, we need -to know that it doesn't modify G either. This is very tricky. - -//===---------------------------------------------------------------------===// - We should add an FRINT node to the DAG to model targets that have legal implementations of ceil/floor/rint. @@ -814,16 +749,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course. //===---------------------------------------------------------------------===// -We should be able to evaluate this loop: - -int test(int x_offs) { - while (x_offs > 4) - x_offs -= 4; - return x_offs; -} - -//===---------------------------------------------------------------------===// - Reassociate should turn things like: int factorial(int X) { @@ -1690,6 +1615,19 @@ foo: //===---------------------------------------------------------------------===// +The arg promotion pass should make use of nocapture to make its alias analysis +stuff much more precise. + +//===---------------------------------------------------------------------===// + +The following functions should be optimized to use a select instead of a +branch (from gcc PR40072): + +char char_int(int m) {if(m>7) return 0; return m;} +int int_char(char m) {if(m>7) return 0; return m;} + +//===---------------------------------------------------------------------===// + Instcombine should replace the load with a constant in: static const char x[4] = {'a', 'b', 'c', 'd'}; @@ -1704,16 +1642,38 @@ is a null (making it a C string). There's no need for these restrictions. //===---------------------------------------------------------------------===// -The arg promotion pass should make use of nocapture to make its alias analysis -stuff much more precise. +InstCombine's "turn load from constant into constant" optimization should be +more aggressive in the presence of bitcasts. For example, because of unions, +this code: -//===---------------------------------------------------------------------===// +union vec2d { + double e[2]; + double v __attribute__((vector_size(16))); +}; +typedef union vec2d vec2d; -The following functions should be optimized to use a select instead of a -branch (from gcc PR40072): +static vec2d a={{1,2}}, b={{3,4}}; + +vec2d foo () { + return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v }; +} -char char_int(int m) {if(m>7) return 0; return m;} -int int_char(char m) {if(m>7) return 0; return m;} +Compiles into: + +@a = internal constant %0 { [2 x double] + [double 1.000000e+00, double 2.000000e+00] }, align 16 +@b = internal constant %0 { [2 x double] + [double 3.000000e+00, double 4.000000e+00] }, align 16 +... +define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind { +entry: + %0 = load <2 x double>* getelementptr (%struct.vec2d* + bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16 + %1 = load <2 x double>* getelementptr (%struct.vec2d* + bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16 -//===---------------------------------------------------------------------===// +Instcombine should be able to optimize away the loads (and thus the globals). + + +//===---------------------------------------------------------------------===//