X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FREADME-Thumb.txt;h=6b605bb0a7cf63ec8d9c893d5ef7f2621c7c2071;hb=6935efcb667bcd4dd3a00bbd420461e1fadba73a;hp=3818d831461901af3b8003cb4ffc1bf2435f5c04;hpb=2ef02a220e3f949ebd18948ebb5bea34dc18b652;p=oota-llvm.git

diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 3818d831461..6b605bb0a7c 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -4,6 +4,7 @@
 
 * Add support for compiling functions in both ARM and Thumb mode, then taking
   the smallest.
+
 * Add support for compiling individual basic blocks in thumb mode, when in a 
   larger ARM function.  This can be used for presumed cold code, like paths
   to abort (failure path of asserts), EH handling code, etc.
@@ -36,7 +37,7 @@ LPCRELL0:
 	mov r1, #PCRELV0
 	add r1, pc
 	ldr r0, [r0, r1]
-	cpy pc, r0 
+	mov pc, r0 
 	.align	2
 LJTI1_0_0:
 	.long	 LBB1_3
@@ -50,7 +51,7 @@ We should be able to generate:
 LPCRELL0:
 	add r1, LJTI1_0_0
 	ldr r0, [r0, r1]
-	cpy pc, r0 
+	mov pc, r0 
 	.align	2
 LJTI1_0_0:
 	.long	 LBB1_3
@@ -67,25 +68,9 @@ LPCRELL0:
 
 //===---------------------------------------------------------------------===//
 
-We compiles the following using a jump table.
+We compiles the following:
 
 define i16 @func_entry_2E_ce(i32 %i) {
-newFuncRoot:
-        br label %entry.ce
-
-bb12.exitStub:          ; preds = %entry.ce
-        ret i16 0
-
-bb4.exitStub:           ; preds = %entry.ce, %entry.ce, %entry.ce
-        ret i16 1
-
-bb9.exitStub:           ; preds = %entry.ce, %entry.ce, %entry.ce
-        ret i16 2
-
-bb.exitStub:            ; preds = %entry.ce
-        ret i16 3
-
-entry.ce:               ; preds = %newFuncRoot
         switch i32 %i, label %bb12.exitStub [
                  i32 0, label %bb4.exitStub
                  i32 1, label %bb9.exitStub
@@ -95,8 +80,58 @@ entry.ce:               ; preds = %newFuncRoot
                  i32 8, label %bb.exitStub
                  i32 9, label %bb9.exitStub
         ]
+
+bb12.exitStub:
+        ret i16 0
+
+bb4.exitStub:
+        ret i16 1
+
+bb9.exitStub:
+        ret i16 2
+
+bb.exitStub:
+        ret i16 3
 }
 
+into:
+
+_func_entry_2E_ce:
+        mov r2, #1
+        lsl r2, r0
+        cmp r0, #9
+        bhi LBB1_4      @bb12.exitStub
+LBB1_1: @newFuncRoot
+        mov r1, #13
+        tst r2, r1
+        bne LBB1_5      @bb4.exitStub
+LBB1_2: @newFuncRoot
+        ldr r1, LCPI1_0
+        tst r2, r1
+        bne LBB1_6      @bb9.exitStub
+LBB1_3: @newFuncRoot
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+        bne LBB1_7      @bb.exitStub
+LBB1_4: @bb12.exitStub
+        mov r0, #0
+        bx lr
+LBB1_5: @bb4.exitStub
+        mov r0, #1
+        bx lr
+LBB1_6: @bb9.exitStub
+        mov r0, #2
+        bx lr
+LBB1_7: @bb.exitStub
+        mov r0, #3
+        bx lr
+LBB1_8:
+        .align  2
+LCPI1_0:
+        .long   642
+
+
 gcc compiles to:
 
 	cmp	r0, #9
@@ -124,6 +159,21 @@ L12:
 	.align 2
 L11:
 	.long	642
+        
+
+GCC is doing a couple of clever things here:
+  1. It is predicating one of the returns.  This isn't a clear win though: in
+     cases where that return isn't taken, it is replacing one condbranch with
+     two 'ne' predicated instructions.
+  2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+     tst.  This will probably require whole function isel.
+  3. GCC emits:
+  	tst	r1, #256
+     we emit:
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+  
 
 //===---------------------------------------------------------------------===//
 
@@ -146,11 +196,53 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
 objects are referenced off the frame pointer with negative offsets. See
 oggenc for an example.
 
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+	ldr r5, LCPI1_0
+LPC0:
+	add r5, pc
+	ldr r6, LCPI1_1
+	ldr r2, LCPI1_2
+	mov r3, r6
+	mov lr, pc
+	bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Add ldmia, stmia support.
+
+//===---------------------------------------------------------------------===//
+
+Thumb load / store address mode offsets are scaled. The values kept in the
+instruction operands are pre-scale values. This probably ought to be changed
+to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
+
+//===---------------------------------------------------------------------===//
+
+We need to make (some of the) Thumb1 instructions predicable. That will allow
+shrinking of predicated Thumb2 instructions. To allow this, we need to be able
+to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.
+
+//===---------------------------------------------------------------------===//
+
+Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
+
+//===---------------------------------------------------------------------===//
+
+Thumb1 immediate field sometimes keep pre-scaled values. See
+Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+Thumb2.
+
 //===---------------------------------------------------------------------===//
 
-We are reserving R3 as a scratch register under thumb mode. So if it is live in
-to the function, we save / restore R3 to / from R12. Until register scavenging
-is done, we should save R3 to a high callee saved reg at emitPrologue time
-(when hasFP is true or stack size is large) and restore R3 from that register
-instead. This allows us to at least get rid of the save to r12 everytime it is
-used.
+Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
+add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
+won't have to over-estimate. It can also be used for loop alignment pass.