add ppc64 r+i stores with update.

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 48298fae8788903a01721c0b0146d8454fb4a819..23874b7be882a942ff1cd195d02b9150158b23ae 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1,6 +1,36 @@
  Target Independent Opportunities:
  
-===-------------------------------------------------------------------------===
+//===---------------------------------------------------------------------===//
+
+We should make the following changes to clean up MachineInstr:
+
+1. Add an Opcode field to TargetInstrDescriptor, so you can tell the opcode of
+   an instruction with just a TargetInstrDescriptor*.
+2. Remove the Opcode field from MachineInstr, replacing it with a
+   TargetInstrDescriptor*.
+3. Getting information about a machine instr then becomes:
+     MI->getInfo()->isTwoAddress()
+   instead of:
+     const TargetInstrInfo &TII = ...
+     TII.isTwoAddrInstr(MI->getOpcode())
+
+//===---------------------------------------------------------------------===//
+
+With the recent changes to make the implicit def/use set explicit in
+machineinstrs, we should change the target descriptions for 'call' instructions
+so that the .td files don't list all the call-clobbered registers as implicit
+defs.  Instead, these should be added by the code generator (e.g. on the dag).
+
+This has a number of uses:
+
+1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
+   for their different impdef sets.
+2. Targets with multiple calling convs (e.g. x86) which have different clobber
+   sets don't need copies of call instructions.
+3. 'Interprocedural register allocation' can be done to reduce the clobber sets
+   of calls.
+
+//===---------------------------------------------------------------------===//
  
  FreeBench/mason contains code like this:
  
@@ -273,6 +303,22 @@ unsigned int swap_32(unsigned int v) {
    return v;
  }
  
+Nor is this:
+
+ushort %bad(ushort %a) {
+entry:
+        %tmp = cast ushort %a to uint           ; <uint> [#uses=1]
+        %tmp2 = shr uint %tmp, ubyte 8          ; <uint> [#uses=1]
+        %tmp2 = cast uint %tmp2 to ushort               ; <ushort> [#uses=1]
+        %tmp5 = shl ushort %a, ubyte 8          ; <ushort> [#uses=1]
+        %tmp6 = or ushort %tmp2, %tmp5          ; <ushort> [#uses=1]
+        ret ushort %tmp6
+}
+
+unsigned short bad(unsigned short a) {
+       return ((a & 0xff00) >> 8 | (a & 0x00ff) << 8);
+}
+
  //===---------------------------------------------------------------------===//
  
  These should turn into single 16-bit (unaligned?) loads on little/big endian
@@ -286,3 +332,87 @@ unsigned short read_16_be(const unsigned char *adr) {
  }
  
  //===---------------------------------------------------------------------===//
+
+-scalarrepl should promote this to be a vector scalar.
+
+        %struct..0anon = type { <4 x float> }
+implementation   ; Functions:
+void %test1(<4 x float> %V, float* %P) {
+entry:
+        %u = alloca %struct..0anon, align 16            ; <%struct..0anon*> [#uses=2]
+        %tmp = getelementptr %struct..0anon* %u, int 0, uint 0          ; <<4 x float>*> [#uses=1]
+        store <4 x float> %V, <4 x float>* %tmp
+        %tmp1 = cast %struct..0anon* %u to [4 x float]*         ; <[4 x float]*> [#uses=1]
+        %tmp = getelementptr [4 x float]* %tmp1, int 0, int 1           ; <float*> [#uses=1]
+        %tmp = load float* %tmp         ; <float> [#uses=1]
+        %tmp3 = mul float %tmp, 2.000000e+00            ; <float> [#uses=1]
+        store float %tmp3, float* %P
+        ret void
+}
+
+//===---------------------------------------------------------------------===//
+
+-instcombine should handle this transform:
+   setcc (sdiv X / C1 ), C2
+when X, C1, and C2 are unsigned.  Similarly for udiv and signed operands. 
+
+Currently InstCombine avoids this transform but will do it when the signs of
+the operands and the sign of the divide match. See the FIXME in 
+InstructionCombining.cpp in the visitSetCondInst method after the switch case 
+for Instruction::UDiv (around line 4447) for more details.
+
+The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of
+this construct. 
+
+//===---------------------------------------------------------------------===//
+
+Instcombine misses several of these cases (see the testcase in the patch):
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg01519.html
+
+//===---------------------------------------------------------------------===//
+
+viterbi speeds up *significantly* if the various "history" related copy loops
+are turned into memcpy calls at the source level.  We need a "loops to memcpy"
+pass.
+
+//===---------------------------------------------------------------------===//
+
+-predsimplify should transform this:
+
+void bad(unsigned x)
+{
+  if (x > 4)
+    bar(12);
+  else if (x > 3)
+    bar(523);
+  else if (x > 2)
+    bar(36);
+  else if (x > 1)
+    bar(65);
+  else if (x > 0)
+    bar(45);
+  else
+    bar(367);
+}
+
+into:
+
+void good(unsigned x)
+{
+  if (x == 4)
+    bar(523);
+  else if (x == 3)
+    bar(36);
+  else if (x == 2)
+    bar(65);
+  else if (x == 1)
+    bar(45);
+  else if (x == 0)
+    bar(367);
+  else
+    bar(12);
+}
+
+to enable further optimizations.
+
+//===---------------------------------------------------------------------===//