From: Jim Grosbach <grosbach@apple.com>
Date: Sat, 7 Nov 2009 21:25:39 +0000 (+0000)
Subject: Support alignment specifier for NEON vld/vst instructions
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8a5ec86a3d4d599984e52c0c5a3a6a436607cf3e;p=oota-llvm.git

Support alignment specifier for NEON vld/vst instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86404 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index c603708652f..6ee1b68efaa 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -541,13 +541,15 @@ namespace ARM_AM {
   //
   // This is used for NEON load / store instructions.
   //
-  // addrmode6 := reg with optional writeback
+  // addrmode6 := reg with optional writeback and alignment
   //
-  // This is stored in three operands [regaddr, regupdate, opc].  The first is
-  // the address register.  The second register holds the value of a post-access
-  // increment for writeback or reg0 if no writeback or if the writeback
-  // increment is the size of the memory access.  The third operand encodes
-  // whether there is writeback to the address register.
+  // This is stored in four operands [regaddr, regupdate, opc, align].  The
+  // first is the address register.  The second register holds the value of
+  // a post-access increment for writeback or reg0 if no writeback or if the
+  // writeback increment is the size of the memory access.  The third
+  // operand encodes whether there is writeback to the address register. The
+  // fourth operand is the value of the alignment specifier to use or zero if
+  // no explicit alignment.
 
   static inline unsigned getAM6Opc(bool WB = false) {
     return (int)WB;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 806a85ee68e..0eb1eb6dc1b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -81,7 +81,7 @@ public:
   bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
   bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
-                       SDValue &Opc);
+                       SDValue &Opc, SDValue &Align);
 
   bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
                         SDValue &Label);
@@ -489,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
 
 bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
                                       SDValue &Addr, SDValue &Update,
-                                      SDValue &Opc) {
+                                      SDValue &Opc, SDValue &Align) {
   Addr = N;
   // Default to no writeback.
   Update = CurDAG->getRegister(0, MVT::i32);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+  // Default to no alignment.
+  Align = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
 }
 
@@ -1008,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1034,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
 
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1045,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
     // Quad registers are directly supported for VLD2,
     // loading 2 pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
     std::vector<EVT> ResTys(4, VT);
     ResTys.push_back(MVT::Other);
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
     Chain = SDValue(VLd, 4);
 
     // Combine the even and odd subregs to produce the result.
@@ -1069,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
 
     // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
-    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
+    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
     Chain = SDValue(VLdA, NumVecs+1);
 
     // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
-    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
-    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
+    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
+                             Align, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
     Chain = SDValue(VLdB, NumVecs+1);
 
     // Combine the even and odd subregs to produce the result.
@@ -1096,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1124,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
   Ops.push_back(MemOpc);
+  Ops.push_back(Align);
 
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
       Ops.push_back(N->getOperand(Vec+3));
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1145,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
                                                    N->getOperand(Vec+3)));
     }
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
@@ -1161,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+4);
+                                        MVT::Other, Ops.data(), NumVecs+5);
   Chain = SDValue(VStA, 1);
 
   // Store the odd subregs.
   Ops[0] = SDValue(VStA, 0); // MemAddr
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                 N->getOperand(Vec+3));
-  Ops[NumVecs+3] = Chain;
+  Ops[NumVecs+4] = Chain;
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+4);
+                                        MVT::Other, Ops.data(), NumVecs+5);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -1186,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1224,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
   Ops.push_back(MemOpc);
+  Ops.push_back(Align);
 
   unsigned Opc = 0;
   if (is64BitVector) {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 71e527b844a..dfa2bdd071a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>,
 // addrmode6 := reg with optional writeback
 //
 def addrmode6 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
   let PrintMethod = "printAddrMode6Operand";
-  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
 }
 
 // addrmodepc := pc + reg
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index c130ced6247..020710bb45d 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -638,9 +638,17 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   const MachineOperand &MO3 = MI->getOperand(Op+2);
+  const MachineOperand &MO4 = MI->getOperand(Op+3);
 
-  // FIXME: No support yet for specifying alignment.
-  O << "[" << getRegisterName(MO1.getReg()) << "]";
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO4.getImm()) {
+    if (Subtarget->isTargetDarwin())
+      O << ", :";
+    else
+      O << " @";
+    O << MO4.getImm();
+  }
+  O << "]";
 
   if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
     if (MO2.getReg() == 0)
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 8b2bcd0e730..206677b0a85 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST2LNd8:
   case ARM::VST2LNd16:
   case ARM::VST2LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     return true;
 
   case ARM::VST2q8:
   case ARM::VST2q16:
   case ARM::VST2q32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     return true;
 
   case ARM::VST2LNq16a:
   case ARM::VST2LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
@@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST2LNq16b:
   case ARM::VST2LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     Offset = 1;
     Stride = 2;
@@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3LNd8:
   case ARM::VST3LNd16:
   case ARM::VST3LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     return true;
 
   case ARM::VST3q8a:
   case ARM::VST3q16a:
   case ARM::VST3q32a:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
@@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3q8b:
   case ARM::VST3q16b:
   case ARM::VST3q32b:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST3LNq16a:
   case ARM::VST3LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
@@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST3LNq16b:
   case ARM::VST3LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4LNd8:
   case ARM::VST4LNd16:
   case ARM::VST4LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     return true;
 
   case ARM::VST4q8a:
   case ARM::VST4q16a:
   case ARM::VST4q32a:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
@@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4q8b:
   case ARM::VST4q16b:
   case ARM::VST4q32b:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
@@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST4LNq16a:
   case ARM::VST4LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
@@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST4LNq16b:
   case ARM::VST4LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;