X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FREADME.txt;h=f68cf0e40df0c89823eabf74bdde6bbe370cae8f;hb=7e9e36a23e07dfb0d7ceda3e76450073c0534f35;hp=2af2d61a6c89d406a64a3fb5deaa7d4d84aab994;hpb=cea03cdb6946841a951f6c622b117459ea5ca157;p=oota-llvm.git

diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 2af2d61a6c8..f68cf0e40df 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -125,8 +125,7 @@ specific vector types are target dependent.
 
 //===---------------------------------------------------------------------===//
 
-We should add 'unaligned load/store' nodes, and produce them from code like
-this:
+We should produce an unaligned load from code like this:
 
 v4sf example(float *P) {
   return (v4sf){P[0], P[1], P[2], P[3] };
@@ -167,52 +166,14 @@ if anyone cared enough about sincos.
 
 //===---------------------------------------------------------------------===//
 
-Scalar Repl cannot currently promote this testcase to 'ret long cst':
-
-        %struct.X = type { i32, i32 }
-        %struct.Y = type { %struct.X }
-
-define i64 @bar() {
-        %retval = alloca %struct.Y, align 8
-        %tmp12 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 0
-        store i32 0, i32* %tmp12
-        %tmp15 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 1
-        store i32 1, i32* %tmp15
-        %retval.upgrd.1 = bitcast %struct.Y* %retval to i64*
-        %retval.upgrd.2 = load i64* %retval.upgrd.1
-        ret i64 %retval.upgrd.2
-}
-
-it should be extended to do so.
-
-//===---------------------------------------------------------------------===//
-
--scalarrepl should promote this to be a vector scalar.
-
-        %struct..0anon = type { <4 x float> }
-
-define void @test1(<4 x float> %V, float* %P) {
-        %u = alloca %struct..0anon, align 16
-        %tmp = getelementptr %struct..0anon* %u, i32 0, i32 0
-        store <4 x float> %V, <4 x float>* %tmp
-        %tmp1 = bitcast %struct..0anon* %u to [4 x float]*
-        %tmp.upgrd.1 = getelementptr [4 x float]* %tmp1, i32 0, i32 1
-        %tmp.upgrd.2 = load float* %tmp.upgrd.1
-        %tmp3 = mul float %tmp.upgrd.2, 2.000000e+00
-        store float %tmp3, float* %P
-        ret void
-}
-
-//===---------------------------------------------------------------------===//
-
 Turn this into a single byte store with no load (the other 3 bytes are
 unmodified):
 
-void %test(uint* %P) {
-	%tmp = load uint* %P
-        %tmp14 = or uint %tmp, 3305111552
-        %tmp15 = and uint %tmp14, 3321888767
-        store uint %tmp15, uint* %P
+define void @test(i32* %P) {
+	%tmp = load i32* %P
+        %tmp14 = or i32 %tmp, 3305111552
+        %tmp15 = and i32 %tmp14, 3321888767
+        store i32 %tmp15, i32* %P
         ret void
 }
 
@@ -634,32 +595,6 @@ once.
 
 //===---------------------------------------------------------------------===//
 
-We should extend parameter attributes to capture more information about
-pointer parameters for alias analysis.  Some ideas:
-
-1. Add a "nocapture" attribute, which indicates that the callee does not store
-   the address of the parameter into a global or any other memory location
-   visible to the callee.  This can be used to make basicaa and other analyses
-   more powerful.  It is true for things like memcpy, strcat, and many other
-   things, including structs passed by value, most C++ references, etc.
-2. Generalize readonly to be set on parameters.  This is important mod/ref 
-   info for the function, which is important for basicaa and others.  It can
-   also be used by the inliner to avoid inserting a memcpy for byval 
-   arguments when the function is inlined.
-
-These functions can be inferred by various analysis passes such as the 
-globalsmodrefaa pass.  Note that getting #2 right is actually really tricky.
-Consider this code:
-
-struct S;  S G;
-void caller(S byvalarg) { G.field = 1; ... }
-void callee() { caller(G); }
-
-The fact that the caller does not modify byval arg is not enough, we need
-to know that it doesn't modify G either.  This is very tricky.
-
-//===---------------------------------------------------------------------===//
-
 We should add an FRINT node to the DAG to model targets that have legal
 implementations of ceil/floor/rint.
 
@@ -814,16 +749,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course.
 
 //===---------------------------------------------------------------------===//
 
-We should be able to evaluate this loop:
-
-int test(int x_offs) {
-  while (x_offs > 4)
-     x_offs -= 4;
-  return x_offs;
-}
-
-//===---------------------------------------------------------------------===//
-
 Reassociate should turn things like:
 
 int factorial(int X) {
@@ -1690,6 +1615,19 @@ foo:
 
 //===---------------------------------------------------------------------===//
 
+The arg promotion pass should make use of nocapture to make its alias analysis
+stuff much more precise.
+
+//===---------------------------------------------------------------------===//
+
+The following functions should be optimized to use a select instead of a
+branch (from gcc PR40072):
+
+char char_int(int m) {if(m>7) return 0; return m;}
+int int_char(char m) {if(m>7) return 0; return m;}
+
+//===---------------------------------------------------------------------===//
+
 Instcombine should replace the load with a constant in:
 
   static const char x[4] = {'a', 'b', 'c', 'd'};
@@ -1704,16 +1642,38 @@ is a null (making it a C string). There's no need for these restrictions.
 
 //===---------------------------------------------------------------------===//
 
-The arg promotion pass should make use of nocapture to make its alias analysis
-stuff much more precise.
+InstCombine's "turn load from constant into constant" optimization should be
+more aggressive in the presence of bitcasts.  For example, because of unions,
+this code:
 
-//===---------------------------------------------------------------------===//
+union vec2d {
+    double e[2];
+    double v __attribute__((vector_size(16)));
+};
+typedef union vec2d vec2d;
 
-The following functions should be optimized to use a select instead of a
-branch (from gcc PR40072):
+static vec2d a={{1,2}}, b={{3,4}};
+    
+vec2d foo () {
+    return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
+}
 
-char char_int(int m) {if(m>7) return 0; return m;}
-int int_char(char m) {if(m>7) return 0; return m;}
+Compiles into:
+
+@a = internal constant %0 { [2 x double] 
+           [double 1.000000e+00, double 2.000000e+00] }, align 16
+@b = internal constant %0 { [2 x double]
+           [double 3.000000e+00, double 4.000000e+00] }, align 16
+...
+define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind {
+entry:
+	%0 = load <2 x double>* getelementptr (%struct.vec2d* 
+           bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16
+	%1 = load <2 x double>* getelementptr (%struct.vec2d* 
+           bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16
 
-//===---------------------------------------------------------------------===//
 
+Instcombine should be able to optimize away the loads (and thus the globals).
+
+
+//===---------------------------------------------------------------------===//