From 1c73c7be9df4ccd6f97e4ab3f67d0b823efcfaa2 Mon Sep 17 00:00:00 2001
From: Nate Begeman <natebegeman@mac.com>
Date: Wed, 3 Aug 2005 23:26:28 +0000
Subject: [PATCH] Scalar SSE: load +0.0 -> xorps/xorpd Scalar SSE: a < b ? c :
 0.0 -> cmpss, andps Scalar SSE: float -> i16 needs to be promoted

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22637 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelPattern.cpp | 202 ++++++++++++++++++------------
 lib/Target/X86/X86InstrInfo.td    |  11 ++
 2 files changed, 133 insertions(+), 80 deletions(-)
diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp
index 41bd222e110..47e81791479 100644
--- a/lib/Target/X86/X86ISelPattern.cpp
+++ b/lib/Target/X86/X86ISelPattern.cpp
@@ -189,6 +189,7 @@ namespace {
 
         // SSE has no i16 to fp conversion, only i32
         setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+        setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 
         // We don't support sin/cos/sqrt/fmod
         setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -201,6 +202,8 @@ namespace {
         setOperationAction(ISD::FABS , MVT::f32, Expand);
         setOperationAction(ISD::FNEG , MVT::f32, Expand);
         setOperationAction(ISD::SREM , MVT::f32, Expand);
+
+        addLegalFPImmediate(+0.0); // xorps / xorpd
       } else {
         // Set up the FP register classes.
         addRegisterClass(MVT::f64, X86::RFPRegisterClass);
@@ -1114,8 +1117,8 @@ namespace {
     bool EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg);
     void EmitCMP(SDOperand LHS, SDOperand RHS, bool isOnlyUse);
     bool EmitBranchCC(MachineBasicBlock *Dest, SDOperand Chain, SDOperand Cond);
-    void EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
-                      unsigned RTrue, unsigned RFalse, unsigned RDest);
+    void EmitSelectCC(SDOperand Cond, SDOperand True, SDOperand False, 
+                      MVT::ValueType SVT, unsigned RDest);
     unsigned SelectExpr(SDOperand N);
 
     X86AddressMode SelectAddrExprs(const X86ISelAddressMode &IAM);
@@ -1747,11 +1750,11 @@ bool ISel::EmitBranchCC(MachineBasicBlock *Dest, SDOperand Chain,
 }
 
 /// EmitSelectCC - Emit code into BB that performs a select operation between
-/// the two registers RTrue and RFalse, generating a result into RDest.  Return
-/// true if the fold cannot be performed.
+/// the two registers RTrue and RFalse, generating a result into RDest.
 ///
-void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
-                        unsigned RTrue, unsigned RFalse, unsigned RDest) {
+void ISel::EmitSelectCC(SDOperand Cond, SDOperand True, SDOperand False,
+                        MVT::ValueType SVT, unsigned RDest) {
+  unsigned RTrue, RFalse;
   enum Condition {
     EQ, NE, LT, LE, GT, GE, B, BE, A, AE, P, NP,
     NOT_SET
@@ -1773,12 +1776,13 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
     X86::FCMOVA ,  X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP
   };
   static const int SSE_CMOVTAB[] = {
-    0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */,
-    1 /* CMPLTSS */, 2 /* CMPLESS */, /*missing*/0, /*missing*/0,
-    /*missing*/0,  /*missing*/0, /*missing*/0, /*missing*/0
+    /*CMPEQ*/   0, /*CMPNEQ*/   4, /*missing*/  0, /*missing*/  0,
+    /*missing*/ 0, /*missing*/  0, /*CMPLT*/    1, /*CMPLE*/    2,
+    /*CMPNLE*/  6, /*CMPNLT*/   5, /*CMPUNORD*/ 3, /*CMPORD*/   7
   };
-
-  if (SetCCSDNode *SetCC = dyn_cast<SetCCSDNode>(Cond)) {
+  
+  SetCCSDNode *SetCC;
+  if ((SetCC = dyn_cast<SetCCSDNode>(Cond))) {
     if (MVT::isInteger(SetCC->getOperand(0).getValueType())) {
       switch (SetCC->getCondition()) {
       default: assert(0 && "Unknown integer comparison!");
@@ -1793,20 +1797,6 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
       case ISD::SETULE: CondCode = BE; break;
       case ISD::SETUGE: CondCode = AE; break;
       }
-    } else if (X86ScalarSSE) {
-      switch (SetCC->getCondition()) {
-      default: assert(0 && "Unknown scalar fp comparison!");
-      case ISD::SETEQ:  CondCode = EQ; break;
-      case ISD::SETNE:  CondCode = NE; break;
-      case ISD::SETULT:
-      case ISD::SETLT:  CondCode = LT; break;
-      case ISD::SETULE:
-      case ISD::SETLE:  CondCode = LE; break;
-      case ISD::SETUGT:
-      case ISD::SETGT:  CondCode = GT; break;
-      case ISD::SETUGE:
-      case ISD::SETGE:  CondCode = GE; break;
-      }
     } else {
       // On a floating point condition, the flags are set as follows:
       // ZF  PF  CF   op
@@ -1843,55 +1833,106 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
     }
   }
 
-  // There's no SSE equivalent of FCMOVE.  In some cases we can fake it up, in
-  // Others we will have to do the PowerPC thing and generate an MBB for the
-  // true and false values and select between them with a PHI.
+  // There's no SSE equivalent of FCMOVE.  For cases where we set a condition
+  // code above and one of the results of the select is +0.0, then we can fake 
+  // it up through a clever AND with mask.  Otherwise, we will fall through to
+  // the code below that will use a PHI node to select the right value.
   if (X86ScalarSSE && (SVT == MVT::f32 || SVT == MVT::f64)) {
-    if (0 && CondCode != NOT_SET) {
-      // FIXME: check for min and max
-    } else {
-      // FIXME: emit a direct compare and branch rather than setting a cond reg
-      //        and testing it.
-      unsigned CondReg = SelectExpr(Cond);
-      BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
-
-      // Create an iterator with which to insert the MBB for copying the false
-      // value and the MBB to hold the PHI instruction for this SetCC.
-      MachineBasicBlock *thisMBB = BB;
-      const BasicBlock *LLVM_BB = BB->getBasicBlock();
-      ilist<MachineBasicBlock>::iterator It = BB;
-      ++It;
-
-      //  thisMBB:
-      //  ...
-      //   TrueVal = ...
-      //   cmpTY ccX, r1, r2
-      //   bCC sinkMBB
-      //   fallthrough --> copy0MBB
-      MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
-      MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
-      BuildMI(BB, X86::JNE, 1).addMBB(sinkMBB);
-      MachineFunction *F = BB->getParent();
-      F->getBasicBlockList().insert(It, copy0MBB);
-      F->getBasicBlockList().insert(It, sinkMBB);
-      // Update machine-CFG edges
-      BB->addSuccessor(copy0MBB);
-      BB->addSuccessor(sinkMBB);
-
-      //  copy0MBB:
-      //   %FalseValue = ...
-      //   # fallthrough to sinkMBB
-      BB = copy0MBB;
-      // Update machine-CFG edges
-      BB->addSuccessor(sinkMBB);
-
-      //  sinkMBB:
-      //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-      //  ...
-      BB = sinkMBB;
-      BuildMI(BB, X86::PHI, 4, RDest).addReg(RFalse)
-        .addMBB(copy0MBB).addReg(RTrue).addMBB(thisMBB);
+    if (SetCC && SetCC->getOperand(0).getValueType() == SVT && 
+        NOT_SET != CondCode) {
+      ConstantFPSDNode *CT = dyn_cast<ConstantFPSDNode>(True);
+      ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(False);
+      bool TrueZero = CT && CT->isExactlyValue(0.0);
+      bool FalseZero = CF && CF->isExactlyValue(0.0);
+      if (TrueZero || FalseZero) {
+        SDOperand LHS = Cond.getOperand(0);
+        SDOperand RHS = Cond.getOperand(1);
+        
+        // Select the two halves of the condition
+        unsigned RLHS, RRHS;
+        if (getRegPressure(LHS) > getRegPressure(RHS)) {
+          RLHS = SelectExpr(LHS);
+          RRHS = SelectExpr(RHS);
+        } else {
+          RRHS = SelectExpr(RHS);
+          RLHS = SelectExpr(LHS);
+        }
+        
+        // Emit the comparison and generate a mask from it
+        unsigned MaskReg = MakeReg(SVT);
+        unsigned Opc = (SVT == MVT::f32) ? X86::CMPSSrr : X86::CMPSDrr;
+        BuildMI(BB, Opc, 3, MaskReg).addReg(RLHS).addReg(RRHS)
+          .addImm(SSE_CMOVTAB[CondCode]);
+        
+        if (TrueZero) {
+          RFalse = SelectExpr(False);
+          Opc = (SVT == MVT::f32) ? X86::ANDNPSrr : X86::ANDNPDrr;
+          BuildMI(BB, Opc, 2, RDest).addReg(MaskReg).addReg(RFalse);
+        } else {
+          RTrue = SelectExpr(True);
+          Opc = (SVT == MVT::f32) ? X86::ANDPSrr : X86::ANDPDrr;
+          BuildMI(BB, Opc, 2, RDest).addReg(MaskReg).addReg(RTrue);
+        }
+        return;
+      }
     }
+  }
+    
+  // Select the true and false values for use in both the SSE PHI case, and the
+  // integer or x87 cmov cases below.
+  if (getRegPressure(True) > getRegPressure(False)) {
+    RTrue = SelectExpr(True);
+    RFalse = SelectExpr(False);
+  } else {
+    RFalse = SelectExpr(False);
+    RTrue = SelectExpr(True);
+  }
+
+  // Since there's no SSE equivalent of FCMOVE, and we couldn't generate an
+  // AND with mask, we'll have to do the normal RISC thing and generate a PHI
+  // node to select between the true and false values.
+  if (X86ScalarSSE && (SVT == MVT::f32 || SVT == MVT::f64)) {
+    // FIXME: emit a direct compare and branch rather than setting a cond reg
+    //        and testing it.
+    unsigned CondReg = SelectExpr(Cond);
+    BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
+    
+    // Create an iterator with which to insert the MBB for copying the false
+    // value and the MBB to hold the PHI instruction for this SetCC.
+    MachineBasicBlock *thisMBB = BB;
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    ilist<MachineBasicBlock>::iterator It = BB;
+    ++It;
+    
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC sinkMBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+    BuildMI(BB, X86::JNE, 1).addMBB(sinkMBB);
+    MachineFunction *F = BB->getParent();
+    F->getBasicBlockList().insert(It, copy0MBB);
+    F->getBasicBlockList().insert(It, sinkMBB);
+    // Update machine-CFG edges
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+    
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+    
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(BB, X86::PHI, 4, RDest).addReg(RFalse)
+      .addMBB(copy0MBB).addReg(RTrue).addMBB(thisMBB);
     return;
   }
 
@@ -2285,6 +2326,13 @@ unsigned ISel::SelectExpr(SDOperand N) {
     addConstantPoolReference(BuildMI(BB, X86::LEA32r, 4, Result), Tmp1);
     return Result;
   case ISD::ConstantFP:
+    if (X86ScalarSSE) {
+      assert(cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0) &&
+             "SSE only supports +0.0");
+      Opc = (N.getValueType() == MVT::f32) ? X86::FLD0SS : X86::FLD0SD;
+      BuildMI(BB, Opc, 0, Result);
+      return Result;
+    }
     ContainsFPCode = true;
     Tmp1 = Result;   // Intermediate Register
     if (cast<ConstantFPSDNode>(N)->getValue() < 0.0 ||
@@ -2969,14 +3017,8 @@ unsigned ISel::SelectExpr(SDOperand N) {
   }
 
   case ISD::SELECT:
-    if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
-      Tmp2 = SelectExpr(N.getOperand(1));
-      Tmp3 = SelectExpr(N.getOperand(2));
-    } else {
-      Tmp3 = SelectExpr(N.getOperand(2));
-      Tmp2 = SelectExpr(N.getOperand(1));
-    }
-    EmitSelectCC(N.getOperand(0), N.getValueType(), Tmp2, Tmp3, Result);
+    EmitSelectCC(N.getOperand(0), N.getOperand(1), N.getOperand(2),
+                 N.getValueType(), Result);
     return Result;
 
   case ISD::SDIV:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 53a82ba3802..df16e04b41c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1471,6 +1471,13 @@ def UCOMISSrr: I<0x2E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
 def UCOMISSrm: I<0x2E, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
                 "ucomiss {$src, $dst|$dst, $src}">, TB;
 
+// Pseudo-instructions that map to fld0 to xorps/xorpd for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def FLD0SS : I<0x57, MRMSrcReg, (ops RXMM:$dst),
+                "xorps $dst, $dst">, TB;
+def FLD0SD : I<0x57, MRMSrcReg, (ops RXMM:$dst),
+                "xorpd $dst, $dst">, TB, OpSize;
+
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
 def ADDSSrr : I<0x58, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
@@ -1489,6 +1496,10 @@ def ORPSrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
                 "orps {$src, $dst|$dst, $src}">, TB;
 def ORPDrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
                 "orpd {$src, $dst|$dst, $src}">, TB, OpSize;
+def XORPSrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
+                "xorps {$src, $dst|$dst, $src}">, TB;
+def XORPDrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
+                "xorpd {$src, $dst|$dst, $src}">, TB, OpSize;
 }
 def ANDNPSrr : I<0x55, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
                 "andnps {$src, $dst|$dst, $src}">, TB;
-- 
2.34.1