bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
- void InstructionSelectPreprocess(SelectionDAG &DAG);
+ void PreprocessForRMW(SelectionDAG &DAG);
+ void PreprocessForFPConvert(SelectionDAG &DAG);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
Store.getOperand(2), Store.getOperand(3));
}
-/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
-/// selector to pick more load-modify-store instructions. This is a common
-/// case:
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// This is only run if not in -fast mode (aka -O0).
+/// This allows the instruction selector to pick more read-modify-write
+/// instructions. This is a common case:
///
/// [Load chain]
/// ^
/// \ /
/// \ /
/// [Store]
-void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
+void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
if (!ISD::isNON_TRUNCStore(I))
}
}
+
+/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
+/// nodes that target the FP stack to be store and load to the stack. This is a
+/// gross hack. We would like to simply mark these as being illegal, but when
+/// we do that, legalize produces these when it expands calls, then expands
+/// these in the same legalize pass. We would like dag combine to be able to
+/// hack on these between the call expansion and the node legalization. As such
+/// this pass basically does "really late" legalization of these inline with the
+/// X86 isel pass.
+void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ MVT::ValueType SrcVT = N->getOperand(0).getValueType();
+ MVT::ValueType DstVT = N->getValueType(0);
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ // If this is an FPStack extension (but not a truncation), it is a noop.
+ if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT::ValueType MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDOperand MemTmp = DAG.CreateStackTemporary(MemVT);
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0),
+ MemTmp, NULL, 0, MemVT);
+ SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
+ NULL, 0, MemVT);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ DAG.DeleteNode(N);
+ }
+}
+
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
MachineFunction::iterator FirstMBB = BB;
if (!FastISel)
- InstructionSelectPreprocess(DAG);
+ PreprocessForRMW(DAG);
+
+ // FIXME: This should only happen when not -fast.
+ PreprocessForFPConvert(DAG);
// Codegen the basic block.
#ifndef NDEBUG
X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ bool Fast = false;
RegInfo = TM.getRegisterInfo();
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- // Conversions to long double (in X87) go through memory.
- setConvertAction(MVT::f32, MVT::f80, Expand);
- setConvertAction(MVT::f64, MVT::f80, Expand);
-
- // Conversions from long double (in X87) go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // Floating truncations from f80 and extensions to f80 go through memory.
+ // If optimizing, we lie about this though and handle it in
+ // InstructionSelectPreprocess so that dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+ setConvertAction(MVT::f64, MVT::f80, Expand);
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
} else if (X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- // SSE->x87 conversions go through memory.
- setConvertAction(MVT::f32, MVT::f64, Expand);
- setConvertAction(MVT::f32, MVT::f80, Expand);
-
- // x87->SSE truncations need to go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f64, MVT::f32, Expand);
- // And x87->x87 truncations also.
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // SSE <-> X87 conversions go through memory. If optimizing, we lie about
+ // this though and handle it in InstructionSelectPreprocess so that
+ // dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f32, MVT::f64, Expand);
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ // And x87->x87 truncations also.
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- // Floating truncations need to go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f64, MVT::f32, Expand);
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // Floating truncations go through memory. If optimizing, we lie about
+ // this though and handle it in InstructionSelectPreprocess so that
+ // dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
+
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
// a register.
SDOperand Value = Op.getOperand(1);
- // If this is an FP return with ScalarSSE, we need to move the value from
- // an XMM register onto the fp-stack.
- if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
- SDOperand MemLoc;
-
- // If this is a load into a scalarsse value, don't store the loaded value
- // back to the stack, only to reload it: just replace the scalar-sse load.
- if (ISD::isNON_EXTLoad(Value.Val) &&
- Chain.reachesChainWithoutSideEffects(Value.getOperand(0))) {
- Chain = Value.getOperand(0);
- MemLoc = Value.getOperand(1);
- } else {
- // Spill the value to memory and reload it into top of stack.
- unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
- MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
- Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
- }
- SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
- SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
- Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
- Chain = Value.getValue(1);
- }
+ // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80.
+ // This will get legalized into a load/store if it can't get optimized away.
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
+ Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops[] = { Chain, Value };
// Copies from the FP stack are special, as ST0 isn't a valid register
// before the fp stackifier runs.
- // Copy ST0 into an RFP register with FP_GET_RESULT.
- SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
+ // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up
+ // in an SSE register, copy it out as F80 and do a truncate, otherwise use
+ // the specified value type.
+ MVT::ValueType GetResultTy = RVLocs[0].getValVT();
+ if (isScalarFPTypeInSSEReg(GetResultTy))
+ GetResultTy = MVT::f80;
+ SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
+
SDOperand GROps[] = { Chain, InFlag };
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
Chain = RetVal.getValue(1);
InFlag = RetVal.getValue(2);
-
- // If we are using ScalarSSE, store ST(0) to the stack and reload it into
- // an XMM register.
- if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
- SDOperand StoreLoc;
- const Value *SrcVal = 0;
- int SrcValOffset = 0;
- MVT::ValueType RetStoreVT = RVLocs[0].getValVT();
-
- // Determine where to store the value. If the call result is directly
- // used by a store, see if we can store directly into the location. In
- // this case, we'll end up producing a fst + movss[load] + movss[store] to
- // the same location, and the two movss's will be nuked as dead. This
- // optimizes common things like "*D = atof(..)" to not need an
- // intermediate stack slot.
- if (SDOperand(TheCall, 0).hasOneUse() &&
- SDOperand(TheCall, 1).hasOneUse()) {
- // In addition to direct uses, we also support a FP_ROUND that uses the
- // value, if it is directly stored somewhere.
- SDNode *User = *TheCall->use_begin();
- if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse())
- User = *User->use_begin();
-
- // Ok, we have one use of the value and one use of the chain. See if
- // they are the same node: a store.
- if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) {
- // Verify that the value being stored is either the call or a
- // truncation of the call.
- SDNode *StoreVal = N->getValue().Val;
- if (StoreVal == TheCall)
- ; // ok.
- else if (StoreVal->getOpcode() == ISD::FP_ROUND &&
- StoreVal->hasOneUse() &&
- StoreVal->getOperand(0).Val == TheCall)
- ; // ok.
- else
- N = 0; // not ok.
-
- if (N && N->getChain().Val == TheCall &&
- !N->isVolatile() && !N->isTruncatingStore() &&
- N->getAddressingMode() == ISD::UNINDEXED) {
- StoreLoc = N->getBasePtr();
- SrcVal = N->getSrcValue();
- SrcValOffset = N->getSrcValueOffset();
- RetStoreVT = N->getValue().getValueType();
- }
- }
- }
- // If we weren't able to optimize the result, just create a temporary
- // stack slot.
- if (StoreLoc.Val == 0) {
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
- StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
- }
-
- // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
- // shouldn't be necessary except that RFP cannot be live across
- // multiple blocks (which could happen if a select gets lowered into
- // multiple blocks and scheduled in between them). When stackifier is
- // fixed, they can be uncoupled.
- SDOperand Ops[] = {
- Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag
- };
- Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
- RetVal = DAG.getLoad(RetStoreVT, Chain,
- StoreLoc, SrcVal, SrcValOffset);
- Chain = RetVal.getValue(1);
-
- // If we optimized a truncate, then extend the result back to its desired
- // type.
- if (RVLocs[0].getValVT() != RetStoreVT)
- RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal);
- }
+ // If we want the result in an SSE register, use an FP_TRUNCATE to get it
+ // there.
+ if (GetResultTy != RVLocs[0].getValVT())
+ RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+
ResultVals.push_back(RetVal);
}