setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
- // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2.
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CNEG: return "ARMISD::CNEG";
+ case ARMISD::RBIT: return "ARMISD::RBIT";
+
case ARMISD::FTOSI: return "ARMISD::FTOSI";
case ARMISD::FTOUI: return "ARMISD::FTOUI";
case ARMISD::SITOF: return "ARMISD::SITOF";
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
}
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset);
+ PseudoSourceValue::getStack(), LocMemOffset,
+ false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SDValue
ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
+ // ARM target does not yet support tail call optimization.
+ isTailCall = false;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl,
+ DAG.GetOrdering(Chain.getNode()));
return CallResult.first;
}
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
} else {
// local exec model
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
}
// The address of the thread local variable is the add of the thread
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0,
+ false, false, 0);
return Result;
} else {
// If we have T2 ops, we can materialize the address directly via movt/movw
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
}
}
}
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0,
+ false, false, 0);
return Result;
}
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
-ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
return Result;
}
case Intrinsic::eh_sjlj_setjmp:
- return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1));
+ SDValue Val = Subtarget->isThumb() ?
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
+ DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
+ Val);
}
}
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
SDValue Res;
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
if (isDeviceBarrier) {
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other,
- Op.getOperand(0));
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
} else {
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other,
- Op.getOperand(0));
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
}
return Res;
}
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
SDValue
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- PseudoSourceValue::getFixedStack(FI), 0);
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0));
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0));
}
}
unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0);
+ PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0);
+ PseudoSourceValue::getJumpTable(), 0,
+ false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0);
+ PseudoSourceValue::getJumpTable(), 0, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ false, false, 0);
return FrameAddr;
}
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff);
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff);
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, false, false, 0);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff);
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff);
+ DstSV, DstSVOff + DstOff, false, false, 0);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
return DAG.getMergeValues(Ops, 2, dl);
}
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
.createVirtualRegister(ARM::GPRRegisterClass);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
unsigned ldrOpc, strOpc;
switch (Size) {
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1: ldrOpc = ARM::LDREXB; strOpc = ARM::STREXB; break;
- case 2: ldrOpc = ARM::LDREXH; strOpc = ARM::STREXH; break;
- case 4: ldrOpc = ARM::LDREX; strOpc = ARM::STREX; break;
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
}
MachineFunction *MF = BB->getParent();
// bne exitMBB
BB = loop1MBB;
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPrr))
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(dest).addReg(oldval));
- BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(exitMBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
BB = loop2MBB;
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
.addReg(ptr));
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPri))
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(loop1MBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
+
+ MF->DeleteMachineInstr(MI); // The instruction is gone now.
+
return BB;
}
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *F = BB->getParent();
+ MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
unsigned ldrOpc, strOpc;
switch (Size) {
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1: ldrOpc = ARM::LDREXB; strOpc = ARM::STREXB; break;
- case 2: ldrOpc = ARM::LDREXH; strOpc = ARM::STREXH; break;
- case 4: ldrOpc = ARM::LDREX; strOpc = ARM::STREX; break;
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
}
- MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, loopMBB);
- F->insert(It, exitMBB);
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
exitMBB->transferSuccessors(BB);
- MachineRegisterInfo &RegInfo = F->getRegInfo();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
unsigned scratch2 = (!BinOpcode) ? incr :
RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
// loopMBB:
// ldrex dest, ptr
- // add tmp, dest, incr
- // strex scratch, tmp, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
- if (BinOpcode)
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(dest).addReg(incr)).addReg(0);
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
.addReg(ptr));
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPri))
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(loopMBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
+
+ MF->DeleteMachineInstr(MI); // The instruction is gone now.
+
return BB;
}
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default:
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
- case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ADDrr);
-
- case ARM::ATOMIC_LOAD_AND_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ANDrr);
-
- case ARM::ATOMIC_LOAD_OR_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ORRrr);
-
- case ARM::ATOMIC_LOAD_XOR_I8: return EmitAtomicBinary(MI, BB, 1, ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I16: return EmitAtomicBinary(MI, BB, 2, ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I32: return EmitAtomicBinary(MI, BB, 4, ARM::EORrr);
-
- case ARM::ATOMIC_LOAD_NAND_I8: return EmitAtomicBinary(MI, BB, 1, ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I16:return EmitAtomicBinary(MI, BB, 2, ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I32:return EmitAtomicBinary(MI, BB, 4, ARM::BICrr);
-
- case ARM::ATOMIC_LOAD_SUB_I8: return EmitAtomicBinary(MI, BB, 1, ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I16: return EmitAtomicBinary(MI, BB, 2, ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, ARM::SUBrr);
-
- case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
- case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
- case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
return SDValue();
}
-/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// fmrrd(fmdrr x, y) -> x,y
return SDValue();
}
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (LHS == CondLHS && RHS == CondRHS) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (LHS == CondRHS && RHS == CondLHS) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // This can be vmin if we can prove that the LHS is not a NaN.
+ // (If either operand is NaN, the comparison will be false and the result
+ // will be the RHS, which matches vmin if RHS is the NaN.)
+ if (DAG.isKnownNeverNaN(LHS))
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // Likewise, for ULT/ULE we need to know that RHS is not a NaN.
+ if (DAG.isKnownNeverNaN(RHS))
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // This can be vmax if we can prove that the LHS is not a NaN.
+ // (If either operand is NaN, the comparison will be false and the result
+ // will be the RHS, which matches vmax if RHS is the NaN.)
+ if (DAG.isKnownNeverNaN(LHS))
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // Likewise, for UGT/UGE we need to know that RHS is not a NaN.
+ if (DAG.isKnownNeverNaN(RHS))
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}
if (!Subtarget->hasV6Ops())
// Pre-v6 does not support unaligned mem access.
return false;
- else if (!Subtarget->hasV6Ops()) {
- // v6 may or may not support unaligned mem access.
+ else {
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ // FIXME: This is pretty conservative. Should we provide cmdline option to
+ // control the behaviour?
if (!Subtarget->isTargetDarwin())
return false;
}
ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint.size() == 1) {
- // GCC RS6000 Constraint Letters
+ // GCC ARM Constraint Letters
switch (Constraint[0]) {
case 'l':
- if (Subtarget->isThumb1Only())
+ if (Subtarget->isThumb())
return std::make_pair(0U, ARM::tGPRRegisterClass);
else
return std::make_pair(0U, ARM::GPRRegisterClass);
case 'w':
if (VT == MVT::f32)
return std::make_pair(0U, ARM::SPRRegisterClass);
- if (VT == MVT::f64)
+ if (VT.getSizeInBits() == 64)
return std::make_pair(0U, ARM::DPRRegisterClass);
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, ARM::QPRRegisterClass);
ARM::S20,ARM::S21,ARM::S22,ARM::S23,
ARM::S24,ARM::S25,ARM::S26,ARM::S27,
ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
- if (VT == MVT::f64)
+ if (VT.getSizeInBits() == 64)
return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10,ARM::D11,