From f7da8821b4c491b1c2ce7ac2374e46d8abdba518 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 26 Apr 2011 19:44:18 +0000 Subject: [PATCH] ARM and Thumb2 support for atomic MIN/MAX/UMIN/UMAX loads. rdar://9326019 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 143 +++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 5 + lib/Target/ARM/ARMInstrInfo.td | 36 ++++++++ 3 files changed, 184 insertions(+) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94dc6c2b859..6974c18c2cd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -609,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } @@ -5031,6 +5043,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc, extendOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; + extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + extendOpc = 0; + break; + } + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // (sign extend dest, if required) + // cmp dest, incr + // cmov.cond scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + + // Sign extend the value, if necessary. + if (signExtend && extendOpc) { + oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + } + + // Build compare and cmov instructions. + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(oldval).addReg(incr)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) + .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), @@ -5162,6 +5277,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_MIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); + + case ARM::ATOMIC_LOAD_MAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); + + case ARM::ATOMIC_LOAD_UMIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); + + case ARM::ATOMIC_LOAD_UMAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5befcee6f29..a2e626062ac 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -484,6 +484,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const; bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 48110f805fe..9c912c10998 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -3256,6 +3256,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -3274,6 +3286,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -3292,6 +3316,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, -- 2.34.1