fix bogus division-by-power-of-2 (was wrong for negative input, adds extr insn)

author Duraid Madina <duraid@octopus.com.au>

Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)

committer Duraid Madina <duraid@octopus.com.au>

Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)
author Duraid Madina <duraid@octopus.com.au>
Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)
committer Duraid Madina <duraid@octopus.com.au>
Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)
diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp

index 03ff36ab04a8e5fc85866edd5c5f51122d32b6cc..13a4689bd31d793ef8ef904c7a709d5baea11dd8 100644 (file)
--- a/lib/Target/IA64/IA64ISelPattern.cpp
+++ b/lib/Target/IA64/IA64ISelPattern.cpp
@@ -1122,9 +1122,21 @@ pC = pA OR pB
        switch (ponderIntegerDivisionBy(N.getOperand(1), isSigned, Tmp3)) {
         case 1: // division by a constant that's a power of 2
           Tmp1 = SelectExpr(N.getOperand(0));
-         if(isSigned)   // becomes a shift right:
-           BuildMI(BB, IA64::SHRS, 2, Result).addReg(Tmp1).addImm(Tmp3);
-         else
+         if(isSigned) {  // argument could be negative, so emit some code:
+           unsigned divAmt=Tmp3;
+           unsigned tempGR1=MakeReg(MVT::i64);
+           unsigned tempGR2=MakeReg(MVT::i64);
+           unsigned tempGR3=MakeReg(MVT::i64);
+           BuildMI(BB, IA64::SHRS, 2, tempGR1)
+             .addReg(Tmp1).addImm(divAmt-1);
+           BuildMI(BB, IA64::EXTRU, 3, tempGR2)
+             .addReg(tempGR1).addImm(64-divAmt).addImm(divAmt);
+           BuildMI(BB, IA64::ADD, 2, tempGR3)
+             .addReg(Tmp1).addReg(tempGR2);
+           BuildMI(BB, IA64::SHRS, 2, Result)
+             .addReg(tempGR3).addImm(divAmt);
+         }
+         else // unsigned div-by-power-of-2 becomes a simple shift right:
             BuildMI(BB, IA64::SHRU, 2, Result).addReg(Tmp1).addImm(Tmp3);
           return Result; // early exit
        }
@@ -1171,10 +1183,11 @@ pC = pA OR pB
      }
  
      // we start by computing an approximate reciprocal (good to 9 bits?)
-    // note, this instruction writes _both_ TmpF5 (answer) and tmpPR (predicate)
-    // FIXME: or at least, it should!!
-    BuildMI(BB, IA64::FRCPAS1FLOAT, 2, TmpF5).addReg(TmpF3).addReg(TmpF4);
-    BuildMI(BB, IA64::FRCPAS1PREDICATE, 2, TmpPR).addReg(TmpF3).addReg(TmpF4);
+    // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
+    BuildMI(BB, IA64::FRCPAS1, 4)
+      .addReg(TmpF5, MachineOperand::Def)
+      .addReg(TmpPR, MachineOperand::Def)
+      .addReg(TmpF3).addReg(TmpF4);
  
      if(!isModulus) { // if this is a divide, we worry about div-by-zero
        unsigned bogusPR=MakeReg(MVT::i1); // won't appear, due to twoAddress
diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td

index 0424b3be2efe34ebaac72185fcb4b6184e6930ad..6177c9ba99677e89445f264c24267dcaadd261b0 100644 (file)
--- a/lib/Target/IA64/IA64InstrInfo.td
+++ b/lib/Target/IA64/IA64InstrInfo.td
@@ -120,6 +120,9 @@ def SHRS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
  def SHRSI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s21imm:$imm),
    "shr $dst = $src1, $imm;;">;
  
+def EXTRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),
+  "extr.u $dst = $src1, $imm1, $imm2;;">;
+
  def DEPZ : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),          "dep.z $dst = $src1, $imm1, $imm2;;">;
  
  def SXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;">;
@@ -258,14 +261,8 @@ def CFNMAS1 : AForm<0x03, 0x0b,
    (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
      "($qp) fnma.s1 $dst = $src1, $src2, $src3;;">;
  
-// FIXME: we 'explode' FRCPA (which should write two registers) into two
-// operations that write one each. this is a waste, and is also destroying
-// f127. not cool.
-def FRCPAS1FLOAT : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
-  "frcpa.s1 $dst , p0 = $src1, $src2;;">;
-// XXX: this _will_ break things: (f127)
-def FRCPAS1PREDICATE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "frcpa.s1 f127 , $dst = $src1, $src2;; // XXX FIXME!!!!">;
+def FRCPAS1 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2),
+  "frcpa.s1 $dstFR, $dstPR = $src1, $src2;;">;
  
  def XMAL : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
    "xma.l $dst = $src1, $src2, $src3;;">;
author	Duraid Madina <duraid@octopus.com.au>
	Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)
committer	Duraid Madina <duraid@octopus.com.au>
	Fri, 8 Apr 2005 10:01:48 +0000 (10:01 +0000)
lib/Target/IA64/IA64ISelPattern.cpp		patch \| blob \| history
lib/Target/IA64/IA64InstrInfo.td		patch \| blob \| history