add a note

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index d9091d0ca7698cb4e7ef3609d3898d2241d8cc6b..c369c8f2434cca7dc1bb87592d1f35edcb0be7c8 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -22,14 +22,6 @@ loads from the static array.
  Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
  precision don't matter (ffastmath).  Misc/mandel will like this. :)
  
-===-------------------------------------------------------------------------===
-
-For all targets, not just X86:
-When llvm.memcpy, llvm.memset, or llvm.memmove are lowered, they should be 
-optimized to a few store instructions if the source is constant and the length
-is smallish (< 8). This will greatly help some tests like Shootout/strcat.c
-and fldry.
-
  //===---------------------------------------------------------------------===//
  
  Solve this DAG isel folding deficiency:
@@ -64,11 +56,53 @@ Number 1 is the preferred solution.
  
  //===---------------------------------------------------------------------===//
  
-For dag combiner and instcombine:
-Fold: "A / (B << N)" where B is a power of 2, to "A >> (N + log2(B))".
-Fold: "A % (B << N)" where B is a power of 2, to "A & ((B << N) - 1)".
+DAG combine this into mul A, 8:
+
+int %test(int %A) {
+  %B = mul int %A, 8  ;; shift
+  %C = add int %B, 7  ;; dead, no demanded bits.
+  %D = and int %C, -8 ;; dead once add is gone.
+  ret int %D
+}
+
+This sort of thing occurs in the alloca lowering code and other places that
+are generating alignment of an already aligned value.
+
+//===---------------------------------------------------------------------===//
+
+Turn this into a signed shift right in instcombine:
+
+int f(unsigned x) {
+  return x >> 31 ? -1 : 0;
+}
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25600
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01492.html
+
+//===---------------------------------------------------------------------===//
+
+We should reassociate:
+int f(int a, int b){ return a * a + 2 * a * b + b * b; }
+into:
+int f(int a, int b) { return a * (a + 2 * b) + b * b; }
+to eliminate a multiply.
+
+//===---------------------------------------------------------------------===//
  
-int t(int X, int Y) { return 1 << (X+4); }  --> 16 << X
+On targets with expensive 64-bit multiply, we could LSR this:
+
+for (i = ...; ++i) {
+   x = 1ULL << i;
+
+into:
+ long long tmp = 1;
+ for (i = ...; ++i, tmp+=tmp)
+   x = tmp;
+
+This would be a win on ppc32, but not x86 or ppc64.
+
+//===---------------------------------------------------------------------===//
  
+Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
  
  //===---------------------------------------------------------------------===//