return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 4);
}
+ case Intrinsic::arm_neon_vld2lane: {
+ SDValue MemAddr, MemUpdate, MemOpc;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ return NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld2lane type");
+ case MVT::v8i8: Opc = ARM::VLD2LNd8; break;
+ case MVT::v4i16: Opc = ARM::VLD2LNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VLD2LNd32; break;
+ }
+ SDValue Chain = N->getOperand(0);
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+ N->getOperand(3), N->getOperand(4),
+ N->getOperand(5), Chain };
+ return CurDAG->getTargetNode(Opc, dl, VT, VT, MVT::Other, Ops, 7);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ SDValue MemAddr, MemUpdate, MemOpc;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ return NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld3lane type");
+ case MVT::v8i8: Opc = ARM::VLD3LNd8; break;
+ case MVT::v4i16: Opc = ARM::VLD3LNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VLD3LNd32; break;
+ }
+ SDValue Chain = N->getOperand(0);
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+ N->getOperand(3), N->getOperand(4),
+ N->getOperand(5), N->getOperand(6), Chain };
+ return CurDAG->getTargetNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ SDValue MemAddr, MemUpdate, MemOpc;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ return NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld4lane type");
+ case MVT::v8i8: Opc = ARM::VLD4LNd8; break;
+ case MVT::v4i16: Opc = ARM::VLD4LNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VLD4LNd32; break;
+ }
+ SDValue Chain = N->getOperand(0);
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+ N->getOperand(3), N->getOperand(4),
+ N->getOperand(5), N->getOperand(6),
+ N->getOperand(7), Chain };
+ std::vector<EVT> ResTys(4, VT);
+ ResTys.push_back(MVT::Other);
+ return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 9);
+ }
+
case Intrinsic::arm_neon_vst2: {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return SDValue();
}
+static SDValue LowerNeonVLDLaneIntrinsic(SDValue Op, SelectionDAG &DAG,
+ unsigned NumVecs) {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+
+ if (!VT.is64BitVector())
+ return SDValue(); // unimplemented
+
+ // Change the lane number operand to be a TargetConstant; otherwise it
+ // will be legalized into a register.
+ ConstantSDNode *Lane = dyn_cast<ConstantSDNode>(Node->getOperand(NumVecs+3));
+ if (!Lane) {
+ assert(false && "vld lane number must be a constant");
+ return SDValue();
+ }
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[NumVecs+3] = DAG.getTargetConstant(Lane->getZExtValue(), MVT::i32);
+ return DAG.UpdateNodeOperands(Op, &Ops[0], Ops.size());
+}
+
SDValue
ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
return LowerNeonVLDIntrinsic(Op, DAG, 3);
case Intrinsic::arm_neon_vld4:
return LowerNeonVLDIntrinsic(Op, DAG, 4);
+ case Intrinsic::arm_neon_vld2lane:
+ return LowerNeonVLDLaneIntrinsic(Op, DAG, 2);
+ case Intrinsic::arm_neon_vld3lane:
+ return LowerNeonVLDLaneIntrinsic(Op, DAG, 3);
+ case Intrinsic::arm_neon_vld4lane:
+ return LowerNeonVLDLaneIntrinsic(Op, DAG, 4);
case Intrinsic::arm_neon_vst3:
return LowerNeonVSTIntrinsic(Op, DAG, 3);
case Intrinsic::arm_neon_vst4:
def VLD4d8 : VLD4D<"vld4.8">;
def VLD4d16 : VLD4D<"vld4.16">;
def VLD4d32 : VLD4D<"vld4.32">;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LND<string OpcodeStr>
+ : NLdSt<(outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ NoItinerary,
+ !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"),
+ "$src1 = $dst1, $src2 = $dst2", []>;
+
+def VLD2LNd8 : VLD2LND<"vld2.8">;
+def VLD2LNd16 : VLD2LND<"vld2.16">;
+def VLD2LNd32 : VLD2LND<"vld2.32">;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LND<string OpcodeStr>
+ : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), NoItinerary,
+ !strconcat(OpcodeStr,
+ "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"),
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+def VLD3LNd8 : VLD3LND<"vld3.8">;
+def VLD3LNd16 : VLD3LND<"vld3.16">;
+def VLD3LNd32 : VLD3LND<"vld3.32">;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LND<string OpcodeStr>
+ : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), NoItinerary,
+ !strconcat(OpcodeStr,
+ "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"),
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+
+def VLD4LNd8 : VLD4LND<"vld4.8">;
+def VLD4LNd16 : VLD4LND<"vld4.16">;
+def VLD4LNd32 : VLD4LND<"vld4.32">;
}
// VST1 : Vector Store (multiple single elements)
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
FirstOpnd = 0;
NumRegs = 2;
return true;
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
+ case ARM::VLD3LNd8:
+ case ARM::VLD3LNd16:
+ case ARM::VLD3LNd32:
FirstOpnd = 0;
NumRegs = 3;
return true;
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
FirstOpnd = 0;
NumRegs = 4;
return true;