setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
if (Subtarget->hasDQI()) {
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ }
+ }
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
- unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
+
+ // If this is an invoke in a 32-bit function using an MSVC personality, assume
+ // the function clobbers all registers. If an exception is thrown, the runtime
+ // will not restore CSRs.
+ // FIXME: Model this more precisely so that we can register allocate across
+ // the normal edge and spill and fill across the exceptional edge.
+ if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
+ const Function *CallerFn = MF.getFunction();
+ EHPersonality Pers =
+ CallerFn->hasPersonalityFn()
+ ? classifyEHPersonality(CallerFn->getPersonalityFn())
+ : EHPersonality::Unknown;
+ if (isMSVCEHPersonality(Pers))
+ Mask = RegInfo->getNoPreservedMask();
+ }
+
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
// Mask only contains negative index if an element is zero.
- if (std::any_of(Mask.begin(), Mask.end(),
+ if (std::any_of(Mask.begin(), Mask.end(),
[](int M){ return M == SM_SentinelZero; }))
return false;
break;
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
- MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout());
+ MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
return NV;
}
-static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector");
}
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+ SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, Imm);
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
- if (SDValue V2S = getScalarValueForVectorElement(
- V2, Mask[V2Index] - Mask.size(), DAG)) {
+ SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
+ DAG);
+ if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
- DAG.getDataLayout())));
+ DAG.getDataLayout(), VT)));
V2 = DAG.getBitcast(VT, V2);
}
}
}
}
- if (VT.isVector() && VT.getScalarType() == MVT::i1) {
- SDValue Op1Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
- Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
- else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
- Op1Scalar = Op1.getOperand(0);
- SDValue Op2Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
- Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
- else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
- Op2Scalar = Op2.getOperand(0);
- if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
- Op1Scalar.getValueType(),
- Cond, Op1Scalar, Op2Scalar);
- if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, newSelect);
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
- DAG.getIntPtrConstant(0, DL));
+ if (VT.isVector() && VT.getScalarType() == MVT::i1) {
+ SDValue Op1Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+ Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
+ else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+ Op1Scalar = Op1.getOperand(0);
+ SDValue Op2Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+ Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
+ else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+ Op2Scalar = Op2.getOperand(0);
+ if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ Op1Scalar.getValueType(),
+ Cond, Op1Scalar, Op2Scalar);
+ if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getBitcast(VT, newSelect);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+ DAG.getIntPtrConstant(0, DL));
}
}
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode;
+ // We allways add rounding mode to the Node.
+ // If the rounding mode is not specified, we add the
+ // "current direction" mode.
if (Op.getNumOperands() == 4)
- RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ RoundingMode =
+ DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
else
RoundingMode = Op.getOperand(4);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
- if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ if (IntrWithRoundingModeOpcode != 0)
+ if (cast<ConstantSDNode>(RoundingMode)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(), Src, RoundingMode),
Mask, PassThru, Subtarget, DAG);
- }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
- SDValue Passthru = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
+ // We add rounding mode to the Node when
+ // - RM Opcode is specified and
+ // - RM is not "current direction".
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(4);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(),
+ Src, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
- Mask, Passthru, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Rnd;
if (Op.getNumOperands() == 6)
Rnd = Op.getOperand(5);
- else
+ else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_3OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Imm = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // First, we check if the intrinsic have rounding mode (7 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 7)
+ Rnd = Op.getOperand(6);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Imm, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const MachineFunction &MF = DAG.getMachineFunction();
+
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
+ .Case("ebp", X86::EBP)
+ .Case("rbp", X86::RBP)
.Default(0);
+
+ if (Reg == X86::EBP || Reg == X86::RBP) {
+ if (!TFI.hasFP(MF))
+ report_fatal_error("register " + StringRef(RegName) +
+ " is allocatable: function has no frame pointer");
+#ifndef NDEBUG
+ else {
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
+ "Invalid Frame Register!");
+ }
+#endif
+ }
+
if (Reg)
return Reg;
+
report_fatal_error("Invalid register name global variable");
}
return DAG.getMergeValues(Ops, dl);
}
-// Return true if the requred (according to Opcode) shift-imm form is natively
+// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
}
// The shift amount is a variable, but it is the same for all vector lanes.
-// These instrcutions are defined together with shift-immediate.
+// These instructions are defined together with shift-immediate.
static
bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
-// Return true if the requred (according to Opcode) variable-shift form is
+// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
}
}
+ // v4i32 Non Uniform Shifts.
+ // If the shift amount is constant we can shift each lane using the SSE2
+ // immediate shifts, else we need to zero-extend each lane to the lower i64
+ // and shift using the SSE2 variable shifts.
+ // The separate results can then be blended together.
+ if (VT == MVT::v4i32) {
+ unsigned Opc = Op.getOpcode();
+ SDValue Amt0, Amt1, Amt2, Amt3;
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
+ } else {
+ // ISD::SHL is handled above but we include it here for completeness.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown target vector shift node");
+ case ISD::SHL:
+ Opc = X86ISD::VSHL;
+ break;
+ case ISD::SRL:
+ Opc = X86ISD::VSRL;
+ break;
+ case ISD::SRA:
+ Opc = X86ISD::VSRA;
+ break;
+ }
+ // The SSE2 shifts use the lower i64 as the same shift amount for
+ // all lanes and the upper i64 is ignored. These shuffle masks
+ // optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
+ SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
+ }
+
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
+ SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
+ SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
+ SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
+ SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
+ return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
+ }
+
if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
+ case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
+ case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
- case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE";
+ case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
+ case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
+ case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
+ case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
}
return nullptr;
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
-bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
EltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
- if (N1SplatC->getZExtValue() == 1)
+ if (N1SplatC->getAPIntValue() == 1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
return Res;
}
-int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;