From: Jim Grosbach Date: Sat, 7 Nov 2009 21:25:39 +0000 (+0000) Subject: Support alignment specifier for NEON vld/vst instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8a5ec86a3d4d599984e52c0c5a3a6a436607cf3e;p=oota-llvm.git Support alignment specifier for NEON vld/vst instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86404 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index c603708652f..6ee1b68efaa 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -541,13 +541,15 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback + // addrmode6 := reg with optional writeback and alignment // - // This is stored in three operands [regaddr, regupdate, opc]. The first is - // the address register. The second register holds the value of a post-access - // increment for writeback or reg0 if no writeback or if the writeback - // increment is the size of the memory access. The third operand encodes - // whether there is writeback to the address register. + // This is stored in four operands [regaddr, regupdate, opc, align]. The + // first is the address register. The second register holds the value of + // a post-access increment for writeback or reg0 if no writeback or if the + // writeback increment is the size of the memory access. The third + // operand encodes whether there is writeback to the address register. The + // fourth operand is the value of the alignment specifier to use or zero if + // no explicit alignment. static inline unsigned getAM6Opc(bool WB = false) { return (int)WB; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 806a85ee68e..0eb1eb6dc1b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -81,7 +81,7 @@ public: bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc); + SDValue &Opc, SDValue &Align); bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -489,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc) { + SDValue &Opc, SDValue &Align) { Addr = N; // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); + // Default to no alignment. + Align = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1008,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1034,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1045,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1069,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, + Align, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1096,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1124,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1145,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, @@ -1161,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+3] = Chain; + Ops[NumVecs+4] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1186,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1224,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); unsigned Opc = 0; if (is64BitVector) { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 71e527b844a..dfa2bdd071a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -340,9 +340,9 @@ def addrmode5 : Operand, // addrmode6 := reg with optional writeback // def addrmode6 : Operand, - ComplexPattern { + ComplexPattern { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm); + let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); } // addrmodepc := pc + reg diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index c130ced6247..020710bb45d 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -638,9 +638,17 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); const MachineOperand &MO3 = MI->getOperand(Op+2); + const MachineOperand &MO4 = MI->getOperand(Op+3); - // FIXME: No support yet for specifying alignment. - O << "[" << getRegisterName(MO1.getReg()) << "]"; + O << "[" << getRegisterName(MO1.getReg()); + if (MO4.getImm()) { + if (Subtarget->isTargetDarwin()) + O << ", :"; + else + O << " @"; + O << MO4.getImm(); + } + O << "]"; if (ARM_AM::getAM6WBFlag(MO3.getImm())) { if (MO2.getReg() == 0) diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 8b2bcd0e730..206677b0a85 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST2LNq16a: case ARM::VST2LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 0; Stride = 2; @@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNq16b: case ARM::VST2LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; return true; case ARM::VST3q8a: case ARM::VST3q16a: case ARM::VST3q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 0; Stride = 2; @@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3q8b: case ARM::VST3q16b: case ARM::VST3q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 1; Stride = 2; @@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16a: case ARM::VST3LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; @@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16b: case ARM::VST3LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST4q8a: case ARM::VST4q16a: case ARM::VST4q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 0; Stride = 2; @@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4q8b: case ARM::VST4q16b: case ARM::VST4q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 1; Stride = 2; @@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16a: case ARM::VST4LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; @@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16b: case ARM::VST4LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2;