Implement a special algorithm for converting uint_to_fp for i32 values on

author Bill Wendling <isanbard@gmail.com>

Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)

committer Bill Wendling <isanbard@gmail.com>

Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)
author Bill Wendling <isanbard@gmail.com>
Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)
committer Bill Wendling <isanbard@gmail.com>
Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 73bd4b384b5f6c17157c141eb14d2cce85a0f98e..c5124f872654fa64b163bc0d5e350ddd7e84a07c 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -119,8 +119,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      if (X86ScalarSSEf64) {
        // We have an impenetrably clever algorithm for ui64->double only.
        setOperationAction(ISD::UINT_TO_FP   , MVT::i64  , Custom);
-      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
-      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
+
+      // We have faster algorithm for ui32->single only.
+      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
      } else
        setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
    }
@@ -4744,51 +4745,44 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
    return Result;
  }
  
-SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  MVT SrcVT = Op.getOperand(0).getValueType();
-  assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!");
-  
-  // We only handle SSE2 f64 target here; caller can handle the rest.
-  if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
-    return SDValue();
-  
-  // This algorithm is not obvious.  Here it is in C code, more or less:
-/*
- double uint64_to_double( uint32_t hi, uint32_t lo )
-  {
-    static const __m128i exp = { 0x4330000045300000ULL, 0 };
-    static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
-
-    // copy ints to xmm registers
-    __m128i xh = _mm_cvtsi32_si128( hi );
-    __m128i xl = _mm_cvtsi32_si128( lo );
-
-    // combine into low half of a single xmm register
-    __m128i x = _mm_unpacklo_epi32( xh, xl );
-    __m128d d;
-    double sd;
-
-    // merge in appropriate exponents to give the integer bits the 
-    // right magnitude
-    x = _mm_unpacklo_epi32( x, exp );
-
-    // subtract away the biases to deal with the IEEE-754 double precision
-    // implicit 1
-    d = _mm_sub_pd( (__m128d) x, bias );
-
-    // All conversions up to here are exact. The correctly rounded result is 
-    // calculated using the
-    // current rounding mode using the following horizontal add.
-    d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
-    _mm_store_sd( &sd, d );   //since we are returning doubles in XMM, this
-    // store doesn't really need to be here (except maybe to zero the other
-    // double)
-    return sd;
-  }
-*/
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
+  // This algorithm is not obvious. Here it is in C code, more or less:
+  /*
+    double uint64_to_double( uint32_t hi, uint32_t lo ) {
+      static const __m128i exp = { 0x4330000045300000ULL, 0 };
+      static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+      // Copy ints to xmm registers.
+      __m128i xh = _mm_cvtsi32_si128( hi );
+      __m128i xl = _mm_cvtsi32_si128( lo );
+
+      // Combine into low half of a single xmm register.
+      __m128i x = _mm_unpacklo_epi32( xh, xl );
+      __m128d d;
+      double sd;
+
+      // Merge in appropriate exponents to give the integer bits the right
+      // magnitude.
+      x = _mm_unpacklo_epi32( x, exp );
+
+      // Subtract away the biases to deal with the IEEE-754 double precision
+      // implicit 1.
+      d = _mm_sub_pd( (__m128d) x, bias );
+
+      // All conversions up to here are exact. The correctly rounded result is
+      // calculated using the current rounding mode using the following
+      // horizontal add.
+      d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+      _mm_store_sd( &sd, d );   // Because we are returning doubles in XMM, this
+                                // store doesn't really need to be here (except
+                                // maybe to zero the other double)
+      return sd;
+    }
+  */
  
    // Build some magic constants.
-  std::vector<Constant*>CV0;
+  std::vector<Constant*> CV0;
    CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
    CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
    CV0.push_back(ConstantInt::get(APInt(32, 0)));
@@ -4796,7 +4790,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
    Constant *C0 = ConstantVector::get(CV0);
    SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
  
-  std::vector<Constant*>CV1;
+  std::vector<Constant*> CV1;
    CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
    CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
    Constant *C1 = ConstantVector::get(CV1);
@@ -4826,13 +4820,16 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
    SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
                                  XR1, XR2, UnpcklMask);
    SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0,
-                         PseudoSourceValue::getConstantPool(), 0, false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, 16);
    SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
-                                Unpck1, CLod0, UnpcklMask);
+                               Unpck1, CLod0, UnpcklMask);
    SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2);
    SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1,
-                         PseudoSourceValue::getConstantPool(), 0, false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, 16);
    SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1);
+
    // Add the halves; easiest way is to swap them into another reg first.
    SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64,
                               Sub, Sub, ShufMask);
@@ -4841,6 +4838,63 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
                       DAG.getIntPtrConstant(0));
  }
  
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
+  // FP constant to bias correct the final result.
+  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+                                   MVT::f64);
+
+  // Load the 32-bit value into an XMM register.
+  SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
+                             DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+                                         Op.getOperand(0),
+                                         DAG.getIntPtrConstant(0)));
+
+  Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64,
+                     DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Load),
+                     DAG.getIntPtrConstant(0));
+
+  // Or the load with the bias.
+  SDValue Or = DAG.getNode(ISD::OR, MVT::v2i64,
+                           DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR,
+                                                   MVT::v2f64, Bias)),
+                           DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR,
+                                                   MVT::v2f64, Load)));
+  Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64,
+                   DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Or),
+                   DAG.getIntPtrConstant(0));
+
+  // Subtract the bias.
+  SDValue Sub = DAG.getNode(ISD::FSUB, MVT::f64, Or, Bias);
+
+  // Handle final rounding.
+  return DAG.getNode(ISD::FP_ROUND, MVT::f32, Sub,
+                     DAG.getIntPtrConstant(0));
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  MVT SrcVT = Op.getOperand(0).getValueType();
+
+  if (SrcVT == MVT::i64) {
+    // We only handle SSE2 f64 target here; caller can handle the rest.
+    if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
+      return SDValue();
+    
+    return LowerUINT_TO_FP_i64(Op, DAG);
+  } else if (SrcVT == MVT::i32) {
+    // We only handle SSE2 f32 target here; caller can handle the rest.
+    if (Op.getValueType() != MVT::f32 || !X86ScalarSSEf32)
+      return SDValue();
+    
+    return LowerUINT_TO_FP_i32(Op, DAG);
+  }
+
+  assert(0 && "Unknown UINT_TO_FP to lower!");
+  return SDValue();
+}
+
  std::pair<SDValue,SDValue> X86TargetLowering::
  FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
    assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index ea9c9311692785039755fdf39638e855a9c2577f..558b119954be92bd48efe2a9a02eace430edd02f 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -574,6 +574,8 @@ namespace llvm {
      SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
      SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
      SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG);
      SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
      SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
      SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
author	Bill Wendling <isanbard@gmail.com>
	Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)
committer	Bill Wendling <isanbard@gmail.com>
	Sat, 17 Jan 2009 03:56:04 +0000 (03:56 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history