setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ // These should use UDIVREM, so set them to expand
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
MVT VecTypes[] = {
bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+
+ // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+ // which isn't a simple VT.
if (!VT.isSimple() || VT == MVT::Other)
return false;
+
+ // XXX - CI changes say "Support for unaligned memory accesses" but I don't
+ // see what for specifically. The wording everywhere else seems to be the
+ // same.
+
+ // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
+ // no alignment restrictions.
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
+ // Using any pair of GPRs should be the same as any other pair.
+ if (IsFast)
+ *IsFast = true;
+ return VT.bitsGE(MVT::i64);
+ }
+
+ // XXX - The only mention I see of this in the ISA manual is for LDS direct
+ // reads the "byte address and must be dword aligned". Is it also true for the
+ // normal loads and stores?
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
+ return false;
+
+ // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
+ // byte-address are ignored, thus forcing Dword alignment.
+ if (IsFast)
+ *IsFast = true;
return VT.bitsGT(MVT::i32);
}
for (unsigned j = 0; j != NumElements; ++j)
Regs.push_back(DAG.getUNDEF(VT));
- InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
- Regs.data(), Regs.size()));
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
continue;
}
SplitVectorLoad(Op, DAG),
Load->getChain()
};
- return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ return DAG.getMergeValues(MergedValues, SDLoc(Op));
} else {
return LowerLOAD(Op, DAG);
}
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, 2, VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
case AMDGPUIntrinsic::SI_sample:
return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
MachineMemOperand::MOStore,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops,
- sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
default:
break;
if (I->getOpcode() == Opcode)
return *I;
}
- return 0;
+ return nullptr;
}
/// This transforms the control flow intrinsics to get the branch destination as
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
- SDNode *BR = 0;
+ SDNode *BR = nullptr;
if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine
// build the new intrinsic call
SDNode *Result = DAG.getNode(
Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res), Ops.data(), Ops.size()).getNode();
+ DAG.getVTList(Res), Ops).getNode();
if (BR) {
// Give the branch instruction our target
BR->getOperand(0),
BRCOND.getOperand(2)
};
- DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
MergedValues[1] = Load->getChain();
if (Ret.getNode()) {
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
}
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
SDValue CC = N->getOperand(2);
- ConstantSDNode * C = NULL;
+ ConstantSDNode * C = nullptr;
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
return false;
const SDValue &Op = Mov->getOperand(0);
}
return TRI.getPhysRegClass(Reg);
}
- default: return NULL;
+ default: return nullptr;
}
}
const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
// Commuted opcode if available
int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
- const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
assert(!DescRev || DescRev->getNumOperands() == NumOps);
// e64 version if available, -1 otherwise
int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
- const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
std::swap(Ops[0], Ops[1]);
Desc = DescRev;
- DescRev = 0;
+ DescRev = nullptr;
continue;
}
}
Immediate = -1;
Promote2e64 = true;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
}
}
Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(Node->getOperand(i));
- Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)