X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=81efc0b55ecf728df937d02c046a9fb2d707fe33;hb=b9e6fe1e3afe3c4cdbdddfb563adc10c439dc296;hp=0bb5e4b3cbcd0ada86b5120590a8428fd9f68385;hpb=4c3fd9f92f89810d659973d2666ab729758de64a;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0bb5e4b3cbc..81efc0b55ec 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -70,22 +71,47 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +// Limit the width of DAG chains. This is important in general to prevent +// prevent DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach, and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static cl::opt +MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), + cl::init(64), cl::Hidden); + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT); + /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. - if (!ValueVT.isVector() && ValueVT.isInteger()) { + if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); @@ -100,25 +126,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { - Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2, + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT); - Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); } if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); - Hi = getCopyFromParts(DAG, dl, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT); // Combine the round and odd parts. @@ -126,68 +152,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (TLI.isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); - Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), TLI.getPointerTy())); - Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); - Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); - } - } else if (ValueVT.isVector()) { - // Handle a multi-element vector. - EVT IntermediateVT, RegisterVT; - unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); - assert(NumRegs == NumParts - && "Part count doesn't match vector breakdown!"); - NumParts = NumRegs; // Silence a compiler warning. - assert(RegisterVT == PartVT - && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getValueType() && - "Part type doesn't match part!"); - - // Assemble the parts into intermediate operands. - SmallVector Ops(NumIntermediates); - if (NumIntermediates == NumParts) { - // If the register was not expanded, truncate or copy the value, - // as appropriate. - for (unsigned i = 0; i != NumParts; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, - PartVT, IntermediateVT); - } else if (NumParts > 0) { - // If the intermediate type was expanded, build the intermediate - // operands from the parts. - assert(NumParts % NumIntermediates == 0 && - "Must expand into a divisible number of parts!"); - unsigned Factor = NumParts / NumIntermediates; - for (unsigned i = 0; i != NumIntermediates; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } - - // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the - // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, - ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); } } @@ -197,182 +184,279 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (PartVT == ValueVT) return Val; - if (PartVT.isVector()) { - assert(ValueVT.isVector() && "Unknown vector conversion!"); - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - } - - if (ValueVT.isVector()) { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); - return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); - } - - if (PartVT.isInteger() && - ValueVT.isInteger()) { + if (PartVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, dl, PartVT, Val, + Val = DAG.getNode(AssertOp, DL, PartVT, Val, DAG.getValueType(ValueVT)); - return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - if (ValueVT.bitsLT(Val.getValueType())) { - // FP_ROUND's are always exact here. - return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, DAG.getIntPtrConstant(1)); - } - return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); } +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, + ValueVT, &Ops[0], NumIntermediates); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } + + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + + + + +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT); + /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PtrVT = TLI.getPointerTy(); EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); - if (!NumParts) + if (NumParts == 0) return; - if (!ValueVT.isVector()) { - if (PartVT == ValueVT) { - assert(NumParts == 1 && "No-op copy with multiple parts!"); - Parts[0] = Val; - return; - } - - if (NumParts * PartBits > ValueVT.getSizeInBits()) { - // If the parts cover more bits than the value has, promote the value. - if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - assert(NumParts == 1 && "Do not know what to promote to!"); - Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); - } else if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } else if (PartBits == ValueVT.getSizeInBits()) { - // Different types of the same size. - assert(NumParts == 1 && PartVT != ValueVT); - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { - // If the parts cover less bits than value has, truncate the value. - if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } - - // The value may have changed - recompute ValueVT. - ValueVT = Val.getValueType(); - assert(NumParts * PartBits == ValueVT.getSizeInBits() && - "Failed to tile the value with PartVT!"); - - if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); - Parts[0] = Val; - return; - } + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } - // Expand the value into multiple parts. - if (NumParts & (NumParts - 1)) { - // The number of parts is not a power of 2. Split off and copy the tail. + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { assert(PartVT.isInteger() && ValueVT.isInteger() && - "Do not know what to expand to!"); - unsigned RoundParts = 1 << Log2_32(NumParts); - unsigned RoundBits = RoundParts * PartBits; - unsigned OddParts = NumParts - RoundParts; - SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, - DAG.getConstant(RoundBits, - TLI.getPointerTy())); - getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, - OddParts, PartVT); - - if (TLI.isBigEndian()) - // The odd parts were reversed by getCopyToParts - unreverse them. - std::reverse(Parts + RoundParts, Parts + NumParts); - - NumParts = RoundParts; + "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); - // The number of parts is a power of 2. Repeatedly bisect the value using - // EXTRACT_ELEMENT. - Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, - EVT::getIntegerVT(*DAG.getContext(), - ValueVT.getSizeInBits()), - Val); - - for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { - for (unsigned i = 0; i < NumParts; i += StepSize) { - unsigned ThisBits = StepSize * PartBits / 2; - EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); - SDValue &Part0 = Parts[i]; - SDValue &Part1 = Parts[i+StepSize/2]; - - Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(1, PtrVT)); - Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(0, PtrVT)); - - if (ThisBits == PartBits && ThisVT != PartVT) { - Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part0); - Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part1); - } + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); } } + } - if (TLI.isBigEndian()) - std::reverse(Parts, Parts + OrigNumParts); + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} - return; - } - // Vector ValueVT. +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (NumParts == 1) { - if (PartVT != ValueVT) { - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - PartVT, Val, - DAG.getConstant(0, PtrVT)); - } + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); } Parts[0] = Val; @@ -383,7 +467,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, NumIntermediates, RegisterVT); + IntermediateVT, + NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); @@ -394,14 +479,12 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), - PtrVT)); + DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); else - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - IntermediateVT, Val, - DAG.getConstant(i, PtrVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntermediateVT, Val, DAG.getIntPtrConstant(i)); } // Split the intermediate operands into legal parts. @@ -409,7 +492,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -417,10 +500,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); } } + + + namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -460,11 +546,6 @@ namespace { EVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const SmallVector ®s, - const SmallVector ®vts, - const SmallVector &valuevts) - : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) { ComputeValueVTs(tli, Ty, ValueVTs); @@ -530,6 +611,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, SDValue &Chain, SDValue *Flag) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. @@ -623,8 +708,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); Part += NumParts; } @@ -701,6 +785,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } @@ -805,6 +890,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + MDNode *Variable = DI->getVariable(); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + SDV = DAG.getDbgValue(Variable, Val.getNode(), + Val.getResNo(), Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else { + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + // getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -820,12 +932,13 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } @@ -839,10 +952,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } -/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast(V)) { @@ -951,79 +1065,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { - assert(Inst->isSafeToSpeculativelyExecute() && - "Instruction with side effects deferred!"); - visit(*Inst); - DenseMap::iterator NIt = NodeMap.find(Inst); - if (NIt != NodeMap.end() && NIt->second.getNode()) - return NIt->second; + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } llvm_unreachable("Can't get register for value!"); return SDValue(); } -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl &OutVTs, - SmallVectorImpl &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl *Offsets = 0) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } - } -} - void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector Outs; @@ -1049,14 +1100,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); - EVT PtrVT = PtrValueVTs[0]; for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, - DAG.getConstant(Offsets[i], PtrVT)); + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i])); Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, false, 0); + // FIXME: better loc info would be nice. + Add, MachinePointerInfo(), false, false, 0); } Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), @@ -1315,12 +1367,12 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } - + return true; } void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1470,10 +1522,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // Insert the false branch. - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1646,7 +1699,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -2173,8 +2226,21 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, return numCmps; } +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) + if (JTCases[i].first.HeaderBB == First) + JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) + if (BitTestCases[i].Parent == First) + BitTestCases[i].Parent = Last; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2240,7 +2306,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallVector succs; @@ -2694,8 +2760,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2733,8 +2798,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -2772,11 +2836,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } Ty = StTy->getElementType(Field); - } else if (const UnionType *UnTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getZExtValue(); - - // Offset canonically 0 for unions, but type changes - Ty = UnTy->getElementType(Field); } else { Ty = cast(Ty)->getElementType(); @@ -2881,7 +2940,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -2893,6 +2952,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); SmallVector ValueVTs; SmallVector Offsets; @@ -2903,10 +2963,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile()) + if (I.isVolatile() || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(SV)) { + else if (AA->pointsToConstantMemory( + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2914,25 +2975,40 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); } - + SmallVector Values(NumValues); - SmallVector Chains(NumValues); + SmallVector Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); - for (unsigned i = 0; i != NumValues; ++i) { + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, - isNonTemporal, Alignment); + A, MachinePointerInfo(SV, Offsets[i]), isVolatile, + isNonTemporal, Alignment, TBAAInfo); Values[i] = L; - Chains[i] = L.getValue(1); + Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); else @@ -2962,23 +3038,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector Chains(NumValues); + SmallVector Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); - - for (unsigned i = 0; i != NumValues; ++i) { + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - Chains[i] = DAG.getStore(Root, getCurDebugLoc(), - SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, - isNonTemporal, Alignment); - } - - DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues)); + SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, MachinePointerInfo(PtrV, Offsets[i]), + isVolatile, isNonTemporal, Alignment, TBAAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + ++SDNodeOrder; + AssignOrderingToNode(StoreNode.getNode()); + DAG.setRoot(StoreNode); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC @@ -3004,7 +3094,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic) + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. @@ -3035,7 +3126,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), VTs, &Ops[0], Ops.size(), - Info.memVT, Info.ptrVal, Info.offset, + Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { @@ -3887,25 +3979,38 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, - const Value *V, MDNode *Variable, - uint64_t Offset, +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N) { - if (!isa(V)) + const Argument *Arg = dyn_cast(V); + if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + MachineBasicBlock *MBB = FuncInfo.MBB; if (MBB != &MF.front()) return false; unsigned Reg = 0; - if (N.getOpcode() == ISD::CopyFromReg) { + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + Reg = TRI->getFrameRegister(MF); + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + // If byval argument ofset is not recorded then ignore this. + if (!Offset) + Reg = 0; + } + + if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { Reg = cast(N.getOperand(1))->getReg(); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -3916,13 +4021,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, } if (!Reg) { + // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI == FuncInfo.ValueMap.end()) - return false; - Reg = VMI->second; + if (VMI != FuncInfo.ValueMap.end()) + Reg = VMI->second; + } + + if (!Reg && N.getNode()) { + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast(LNode->getBasePtr().getNode())) { + Reg = TRI->getFrameRegister(MF); + Offset = FINode->getIndex(); + } } - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + if (!Reg) + return false; + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); @@ -3931,9 +4048,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, } // VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc #endif /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If @@ -3978,7 +4097,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::memset: { @@ -3993,7 +4113,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0)); + MachinePointerInfo(I.getArgOperand(0)))); return 0; } case Intrinsic::memmove: { @@ -4009,61 +4129,47 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - - // If the source and destination are known to not be aliases, we can - // lower memmove as memcpy. - uint64_t Size = -1ULL; - if (ConstantSDNode *C = dyn_cast(Op3)) - Size = C->getZExtValue(); - if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == - AliasAnalysis::NoAlias) { - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); - return 0; - } - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); - if (!DIVariable(DI.getVariable()).Verify()) - return 0; - MDNode *Variable = DI.getVariable(); - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; const Value *Address = DI.getAddress(); - if (!Address) + if (!Address || !DIVariable(DI.getVariable()).Verify()) return 0; - if (const BitCastInst *BCI = dyn_cast(Address)) - Address = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast(Address); - if (AI) { - // Don't handle byval arguments or VLAs, for example. - // Non-byval arguments are handled here (they refer to the stack temporary - // alloca at this point). - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder // absolute, but not relative, values are different depending on whether // debug info exists. ++SDNodeOrder; + + // Check if address has undef value. + if (isa(Address) || + (Address->use_empty() && !isa(Address))) { + SDDbgValue*SDV = + DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + if (const BitCastInst *BCI = dyn_cast(Address)) + Address = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast(Address); + if (isParameter && !AI) { FrameIndexSDNode *FINode = dyn_cast(N.getNode()); if (FINode) @@ -4082,10 +4188,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { - // This isn't useful, but it shows what we're missing. - SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, isParameter); + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + // If variable is pinned by a alloca in dominating bb then + // use StaticAllocaMap. + if (const AllocaInst *AI = dyn_cast(Address)) { + if (AI->getParent() != DI.getParent()) { + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + SDV = DAG.getDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + } + } + // Otherwise add undef to help track missing debug info. + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } } return 0; } @@ -4110,31 +4234,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { - bool createUndef = false; - // FIXME : Why not use getValue() directly ? + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else if (isa(V) && !V->use_empty()) { - SDValue N = getValue(V); - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else - createUndef = true; - } else - createUndef = true; - if (createUndef) { + } else if (isa(V) && !V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), @@ -4155,7 +4272,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; - + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); @@ -4163,7 +4280,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + assert(FuncInfo.MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; @@ -4175,7 +4292,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *CallMBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CallMBB->isLandingPad()) AddCatchInfo(I, &MMI, CallMBB); @@ -4185,7 +4302,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. @@ -4251,11 +4368,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, - getRoot(), - getValue(I.getArgOperand(0)))); + getRoot(), getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::eh_sjlj_dispatch_setup: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return 0; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + DebugLoc dl = getCurDebugLoc(); + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI.getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(NewIntrinsic, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4399,8 +4580,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, - PseudoSourceValue::getFixedStack(FI), - 0, true, false, 0); + MachinePointerInfo::getFixedStack(FI), + true, false, 0); setValue(&I, Res); DAG.setRoot(Res); return 0; @@ -4479,14 +4660,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; + unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); - DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.getVTList(MVT::Other), + &Ops[0], 4, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + false, /* volatile */ + rw==0, /* read */ + rw==1)); /* write */ return 0; } - case Intrinsic::memory_barrier: { SDValue Ops[6]; Ops[0] = getRoot(); @@ -4505,7 +4694,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), - I.getArgOperand(0)); + MachinePointerInfo(I.getArgOperand(0))); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4559,16 +4748,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; + SmallVector Outs; SmallVector Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, FTy->getContext()); + FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; + int DemoteStackIdx = -100; if (!CanLowerReturn) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( @@ -4576,10 +4765,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -4635,6 +4824,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; + // If there's a possibility that fast-isel has already selected some amount + // of the current basic block, don't emit a tail call. + if (isTailCall && EnableFastISel) + isTailCall = false; + std::pair Result = TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), @@ -4659,7 +4853,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); + unsigned NumValues = Outs.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); @@ -4667,8 +4861,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, false, 1); + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, + Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), + false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -4676,7 +4872,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); - + // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. SmallVector RetTys; @@ -4689,7 +4885,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, EVT VT = RetTys[I]; EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - + SDValue ReturnValue = getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, RegisterVT, VT, AssertOp); @@ -4771,7 +4967,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, - Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, false /*nontemporal*/, 1 /* align=1 */); @@ -4867,7 +5063,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { visitInlineAsm(&I); return; } - + + // See if any floating point values are being passed to this function. This is + // used to emit an undefined reference to fltused on Windows. + const FunctionType *FT = + cast(I.getCalledValue()->getType()->getContainedType(0)); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (FT->isVarArg() && + !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + const Type* T = I.getArgOperand(i)->getType(); + for (po_iterator i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (!i->isFloatingPointTy()) continue; + MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); + break; + } + } + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { @@ -4945,7 +5159,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } } - + SDValue Callee; if (!RenameFn) Callee = getValue(I.getCalledValue()); @@ -4973,7 +5187,7 @@ public: /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; - explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } @@ -5048,6 +5262,8 @@ private: } }; +typedef SmallVector SDISelAsmOperandInfoVector; + } // end llvm namespace. /// isAllocatableRegister - If the specified register is safe to allocate, @@ -5285,30 +5501,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. - std::vector ConstraintOperands; + SDISelAsmOperandInfoVector ConstraintOperands; std::set OutputRegs, InputRegs; - // Do a prepass over the constraints, canonicalizing them, and building up the - // ConstraintOperands list. - std::vector - ConstraintInfos = IA->ParseConstraints(); - - bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); - - SDValue Chain, Flag; - - // We won't need to flush pending loads if this asm doesn't touch - // memory and is nonvolatile. - if (hasMemory || IA->hasSideEffects()) - Chain = getRoot(); - else - Chain = DAG.getRoot(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); EVT OpVT = MVT::Other; @@ -5345,9 +5548,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { - // Strip bitcasts, if any. This mostly comes up for functions. - OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); - if (const BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { @@ -5358,11 +5558,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.ConstraintVT = OpVT; + + // Indirect operand accesses access memory. + if (OpInfo.isIndirect) + hasMemory = true; + else { + for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + if (CType == TargetLowering::C_Memory) { + hasMemory = true; + break; + } + } + } } + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the @@ -5371,7 +5593,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; - + if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -5392,7 +5614,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert(OpInfo.Type == InlineAsm::isInput && + assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have @@ -5416,7 +5638,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), - OpInfo.CallOperand, StackSlot, NULL, 0, + OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -5434,8 +5657,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } - ConstraintInfos.clear(); - // Second pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { @@ -5553,7 +5774,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { " don't know how to handle tied " "indirect register inputs"); } - + RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); @@ -5572,7 +5793,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG, AsmNodeOperands); break; } - + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && "Unexpected number of operands"); @@ -5586,10 +5807,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { break; } - if (OpInfo.ConstraintType == TargetLowering::C_Other) { - assert(!OpInfo.isIndirect && - "Don't know how to handle indirect other inputs yet!"); + // Treat indirect 'X' constraint as memory. + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], Ops, DAG); @@ -5605,7 +5828,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } - + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && @@ -5714,7 +5937,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - StoresToEmit[i].second, 0, + MachinePointerInfo(StoresToEmit[i].second), false, false, 0); OutChains.push_back(Val); } @@ -5734,9 +5957,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0))); + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -5849,7 +6074,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; + MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = isReturnValueUsed; if (RetSExt) MyFlags.Flags.setSExt(); @@ -5885,7 +6110,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); @@ -5959,15 +6184,10 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SmallVector Ins; // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; - getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI); - - FuncInfo->CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), - F.isVarArg(), - OutVTs, OutsFlags, - F.getContext()); + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; @@ -6051,7 +6271,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); @@ -6118,6 +6338,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } + // Note down frame index for byval arguments. + if (I->hasByValAttr() && !ArgValues.empty()) + if (FrameIndexSDNode *FI = + dyn_cast(ArgValues[0].getNode())) + FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + if (!I->use_empty()) { SDValue Res; if (!ArgValues.empty())