From: Bruno Cardoso Lopes Date: Sat, 28 May 2011 04:07:29 +0000 (+0000) Subject: Add support for ARM ldrexd/strexd intrinsics. They both use i32 register pairs X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a0112d0c39aa31fe555ecf7296923ca30f68f811;p=oota-llvm.git Add support for ARM ldrexd/strexd intrinsics. They both use i32 register pairs to load/store i64 values. Since there's no current support to explicitly declare such restrictions, implement it by using specific hardcoded register pairs during isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132248 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index 3954412a76e..fa8034e0c2c 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -35,6 +35,16 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// +// Load and Store exclusive doubleword + +let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". + def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty], + [IntrReadArgMem]>; +} + //===----------------------------------------------------------------------===// // VFP diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index abe5a316a45..9ad516dafb9 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2691,6 +2691,111 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { default: break; + case Intrinsic::arm_ldrexd: { + SDValue MemAddr = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + SDValue Chain = N->getOperand(0); + + unsigned NewOpc = ARM::LDREXD; + if (Subtarget->isThumb() && Subtarget->hasThumb2()) + NewOpc = ARM::t2LDREXD; + + // arm_ldrexd returns a i64 value in {i32, i32} + std::vector ResTys; + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); + + // place arguments in the right order + SmallVector Ops; + Ops.push_back(MemAddr); + Ops.push_back(getAL(CurDAG)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(Chain); + SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), + Ops.size()); + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(Ld)->setMemRefs(MemOp, MemOp + 1); + + // Until there's support for specifing explicit register constraints + // like the use of even/odd register pair, hardcode ldrexd to always + // use the pair [R0, R1] to hold the load result. + Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0, + SDValue(Ld, 0), SDValue(0,0)); + Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1, + SDValue(Ld, 1), Chain.getValue(1)); + + // Remap uses. + SDValue Glue = Chain.getValue(1); + if (!SDValue(N, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + ARM::R0, MVT::i32, Glue); + Glue = Result.getValue(2); + ReplaceUses(SDValue(N, 0), Result); + } + if (!SDValue(N, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + ARM::R1, MVT::i32, Glue); + Glue = Result.getValue(2); + ReplaceUses(SDValue(N, 1), Result); + } + + ReplaceUses(SDValue(N, 2), SDValue(Ld, 2)); + return NULL; + } + + case Intrinsic::arm_strexd: { + DebugLoc dl = N->getDebugLoc(); + SDValue Chain = N->getOperand(0); + SDValue Val0 = N->getOperand(2); + SDValue Val1 = N->getOperand(3); + SDValue MemAddr = N->getOperand(4); + + // Until there's support for specifing explicit register constraints + // like the use of even/odd register pair, hardcode strexd to always + // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored. + Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0, + SDValue(0, 0)); + Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1)); + + SDValue Glue = Chain.getValue(1); + Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + ARM::R2, MVT::i32, Glue); + Glue = Val0.getValue(1); + Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + ARM::R3, MVT::i32, Glue); + + // Store exclusive double return a i32 value which is the return status + // of the issued store. + std::vector ResTys; + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); + + // place arguments in the right order + SmallVector Ops; + Ops.push_back(Val0); + Ops.push_back(Val1); + Ops.push_back(MemAddr); + Ops.push_back(getAL(CurDAG)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(Chain); + + unsigned NewOpc = ARM::STREXD; + if (Subtarget->isThumb() && Subtarget->hasThumb2()) + NewOpc = ARM::t2STREXD; + + SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), + Ops.size()); + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(St)->setMemRefs(MemOp, MemOp + 1); + + return St; + } + case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f23d890d514..a78c3ed2a4a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7658,6 +7658,28 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_strexd: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i64; + Info.ptrVal = I.getArgOperand(2); + Info.offset = 0; + Info.align = 8; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + return true; + } + case Intrinsic::arm_ldrexd: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = 8; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } default: break; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e9e11b04652..fd550219fe2 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -3366,8 +3366,9 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, "ldrexh", "\t$Rt, $addr", []>; def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, "ldrex", "\t$Rt, $addr", []>; -def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), - NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; +let hasExtraDefRegAllocReq = 1 in + def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { @@ -3377,10 +3378,12 @@ def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; +} + +let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; -} // Clear-Exclusive is for disassembly only. def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index b8159f13a3a..e86f2767cad 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2881,7 +2881,9 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone let Inst{19-16} = addr; let Inst{15-12} = Rt; } -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr), +let hasExtraDefRegAllocReq = 1 in +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), + (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { @@ -2912,6 +2914,9 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), let Inst{19-16} = addr; let Inst{15-12} = Rt; } +} + +let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, @@ -2920,7 +2925,6 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } -} // Clear-Exclusive is for disassembly only. def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", diff --git a/test/CodeGen/ARM/ldstrexd.ll b/test/CodeGen/ARM/ldstrexd.ll new file mode 100644 index 00000000000..0c0911a86e7 --- /dev/null +++ b/test/CodeGen/ARM/ldstrexd.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s + +%0 = type { i32, i32 } + +; CHECK: f0: +; CHECK: ldrexd +define i64 @f0(i8* %p) nounwind readonly { +entry: + %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p) + %0 = extractvalue %0 %ldrexd, 1 + %1 = extractvalue %0 %ldrexd, 0 + %2 = zext i32 %0 to i64 + %3 = zext i32 %1 to i64 + %shl = shl nuw i64 %2, 32 + %4 = or i64 %shl, %3 + ret i64 %4 +} + +; CHECK: f1: +; CHECK: strexd +define i32 @f1(i8* %ptr, i64 %val) nounwind { +entry: + %tmp4 = trunc i64 %val to i32 + %tmp6 = lshr i64 %val, 32 + %tmp7 = trunc i64 %tmp6 to i32 + %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr) + ret i32 %strexd +} + +declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly +declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind +