From: Anton Korobeynikov Date: Fri, 16 Nov 2007 01:31:51 +0000 (+0000) Subject: Implement codegen for flt_rounds on x86 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=45b22fa9f1f085d7971cce6db4f11b353e1646c6;p=oota-llvm.git Implement codegen for flt_rounds on x86 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44183 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 27decaab308..9a86561f0dc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -205,7 +205,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); - + setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Expand); setOperationAction(ISD::CTLZ , MVT::i8 , Expand); @@ -4940,6 +4941,66 @@ SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, } } +SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { + /* + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + MVT::ValueType VT = Op.getValueType(); + + // Save FP Control Word to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, + DAG.getEntryNode(), StackSlot); + + // Load FP Control Word from stack slot + SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); + + // Transform as necessary + SDOperand CWD1 = + DAG.getNode(ISD::SRL, MVT::i16, + DAG.getNode(ISD::AND, MVT::i16, + CWD, DAG.getConstant(0x800, MVT::i16)), + DAG.getConstant(11, MVT::i8)); + SDOperand CWD2 = + DAG.getNode(ISD::SRL, MVT::i16, + DAG.getNode(ISD::AND, MVT::i16, + CWD, DAG.getConstant(0x400, MVT::i16)), + DAG.getConstant(9, MVT::i8)); + + SDOperand RetVal = + DAG.getNode(ISD::AND, MVT::i16, + DAG.getNode(ISD::ADD, MVT::i16, + DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), + DAG.getConstant(1, MVT::i16)), + DAG.getConstant(3, MVT::i16)); + + + return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -4982,6 +5043,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); } return SDOperand(); } @@ -5029,6 +5091,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; + case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2921f7b8c80..ac16ff42478 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -188,7 +188,10 @@ namespace llvm { // operand #1 callee (register or absolute) // operand #2 stack adjustment // operand #3 optional in flag - TC_RETURN + TC_RETURN, + + // Store FP control world into i16 memory + FNSTCW16m }; } @@ -477,6 +480,7 @@ namespace llvm { SDOperand LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, SelectionDAG &DAG); SDOperand LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG); SDOperand LowerTRAMPOLINE(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG); }; } diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 8ce8cdf40cd..d2feb42c205 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -29,6 +29,8 @@ def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; +def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + def X86fpget : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; def X86fpset : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet, @@ -47,6 +49,8 @@ def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, [SDNPHasChain]>; def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem, [SDNPHasChain]>; +def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, + [SDNPHasChain]>; //===----------------------------------------------------------------------===// // FPStack pattern fragments @@ -524,7 +528,8 @@ def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags (outs), (ins), "fnstsw", []>, DF; def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world - (outs), (ins i16mem:$dst), "fnstcw\t$dst", []>; + (outs), (ins i16mem:$dst), "fnstcw\t$dst", + [(X86fp_cwd_get16 addr:$dst)]>; def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;