X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FREADME.txt;h=2f6842e8cb6094f792fd25379883ae796d39aee8;hb=6248a546f23e7ffa84c171dc364b922e28467275;hp=a6f26a5dfe1954d7f3fc66158391aa10959d0fd3;hpb=d5d2baec2609da3ade9ca205e87c88d35e9e6976;p=oota-llvm.git

diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index a6f26a5dfe1..2f6842e8cb6 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -10,6 +10,11 @@ Reimplement 'select' in terms of 'SEL'.
 * Implement pre/post increment support.  (e.g. PR935)
 * Implement smarter constant generation for binops with large immediates.
 
+A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+
+Interesting optimization for PIC codegen on arm-linux:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129
+
 //===---------------------------------------------------------------------===//
 
 Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
@@ -71,26 +76,6 @@ were disabled due to badness with the ARM carry flag on subtracts.
 
 //===---------------------------------------------------------------------===//
 
-We currently compile abs:
-int foo(int p) { return p < 0 ? -p : p; }
-
-into:
-
-_foo:
-        rsb r1, r0, #0
-        cmn r0, #1
-        movgt r1, r0
-        mov r0, r1
-        bx lr
-
-This is very, uh, literal.  This could be a 3 operation sequence:
-  t = (p sra 31); 
-  res = (p xor t)-t
-
-Which would be better.  This occurs in png decode.
-
-//===---------------------------------------------------------------------===//
-
 More load / store optimizations:
 1) Better representation for block transfer? This is from Olden/power:
 
@@ -605,3 +590,112 @@ than the Z bit, we'll need additional logic to reverse the conditionals
 associated with the comparison. Perhaps a pseudo-instruction for the comparison,
 with a post-codegen pass to clean up and handle the condition codes?
 See PR5694 for testcase.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on armv5:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	ldr	r2, .LCPI0_0
+	and	r0, r0, r2
+	ldr	r2, .LCPI0_1
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+We should be able to replace the second ldr+and with a bic (i.e. reuse the
+constant which was already loaded).  Not sure what's necessary to do that.
+
+//===---------------------------------------------------------------------===//
+
+The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
+
+int a(int x) { return __builtin_bswap32(x); }
+
+a:
+	mov	r1, #255, 24
+	mov	r2, #255, 16
+	and	r1, r1, r0, lsr #8
+	and	r2, r2, r0, lsl #8
+	orr	r1, r1, r0, lsr #24
+	orr	r0, r2, r0, lsl #24
+	orr	r0, r0, r1
+	bx	lr
+
+Something like the following would be better (fewer instructions/registers):
+	eor     r1, r0, r0, ror #16
+	bic     r1, r1, #0xff0000
+	mov     r1, r1, lsr #8
+	eor     r0, r1, r0, ror #8
+	bx	lr
+
+A custom Thumb version would also be a slight improvement over the generic
+version.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+       ldrb    r0, [r0]
+       ldrb    r1, [r1]
+       orr     r0, r1, r0
+       tst     r0, #255
+       moveq   r0, #0
+       streq   r0, [r2]
+       bx      lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
+When loading immediate constants with movt/movw, if there are multiple
+constants needed with the same low 16 bits, and those values are not live at
+the same time, it would be possible to use a single movw instruction, followed
+by multiple movt instructions to rewrite the high bits to different values.
+For example:
+
+  volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4,
+  !tbaa
+!0
+  volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4,
+  !tbaa
+!0
+
+is compiled and optimized to:
+
+    movw    r0, #32796
+    mov.w    r1, #-1
+    movt    r0, #20480
+    str    r1, [r0]
+    movw    r0, #32796    @ <= this MOVW is not needed, value is there already
+    movt    r0, #20482
+    str    r1, [r0]
+
+//===---------------------------------------------------------------------===//
+
+Improve codegen for select's:
+if (x != 0) x = 1
+if (x == 1) x = 1
+
+ARM codegen used to look like this:
+       mov     r1, r0
+       cmp     r1, #1
+       mov     r0, #0
+       moveq   r0, #1
+
+The naive lowering select between two different values. It should recognize the
+test is equality test so it's more a conditional move rather than a select:
+       cmp     r0, #1
+       movne   r0, #0
+
+Currently this is a ARM specific dag combine. We probably should make it into a
+target-neutral one.