[Layering] Move DebugInfo.h into the IR library where its implementation

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelDAGToDAG.cpp
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index 4d79c78700ae9fc87ec36b6bdebd5a546fd6381c..dac4b32cfecdd90bbdac4223affa8f76c42a5e94 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -109,12 +109,19 @@ public:
  
    SDNode* Select(SDNode*);
  private:
  
    SDNode* Select(SDNode*);
  private:
+  /// Get the opcode for table lookup instruction
+  unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
+
+  /// Select NEON table lookup intrinsics.  NumVecs should be 1, 2, 3 or 4.
+  /// IsExt is to indicate if the result will be extended with an argument.
+  SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
+
    /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
    /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
-  SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
+  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
                      const uint16_t *Opcode);
  
    /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
                      const uint16_t *Opcode);
  
    /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
-  SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
+  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
                      const uint16_t *Opcodes);
  
    /// Form sequences of consecutive 64/128-bit registers for use in NEON
                      const uint16_t *Opcodes);
  
    /// Form sequences of consecutive 64/128-bit registers for use in NEON
@@ -128,6 +135,19 @@ private:
    /// functions. Those should almost always be called instead.
    SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
                        unsigned SubRegs[]);
    /// functions. Those should almost always be called instead.
    SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
                        unsigned SubRegs[]);
+
+  /// Select NEON load-duplicate intrinsics.  NumVecs should be 2, 3 or 4.
+  /// The opcode array specifies the instructions used for load.
+  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+                       const uint16_t *Opcodes);
+
+  /// Select NEON load/store lane intrinsics.  NumVecs should be 2, 3 or 4.
+  /// The opcode arrays specify the instructions used for load/store.
+  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+                          unsigned NumVecs, const uint16_t *Opcodes);
+
+  SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+                               SDValue Operand);
  };
  }
  
  };
  }
  
@@ -476,7 +496,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
    case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
    case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
    case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
    case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
    case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
-  case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
    case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
    case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
    case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
    case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
    case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
    case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
@@ -485,7 +504,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
    case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
    case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
    case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
    case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
    case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
-  case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
    case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
    case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
    case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
    case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
    case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
    case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
@@ -494,12 +512,38 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
    case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
    case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
    case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
    case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
    case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
-  case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
    case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
    case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
    case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
    case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
  
    case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
    case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
    case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
    case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
  
+  case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
+  case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
+  case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
+  case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
+  case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
+  case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
+  case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
+  case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
+
+  case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
+  case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
+  case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
+  case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
+  case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
+  case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
+  case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
+  case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
+
+  case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
+  case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
+  case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
+  case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
+  case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
+  case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
+  case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
+  case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
+
    case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
    case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
    case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
    case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
    case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
    case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
@@ -512,7 +556,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
    case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
    case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
    case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
    case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
    case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
-  case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
    case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
    case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
    case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
    case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
    case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
    case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
@@ -521,7 +564,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
    case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
    case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
    case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
    case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
    case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
-  case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
    case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
    case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
    case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
    case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
    case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
    case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
@@ -530,37 +572,115 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
    case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
    case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
    case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
    case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
    case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
    case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
-  case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
    case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
    case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
    case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
    case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
    case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
    case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
    case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
    case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
+
+  case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
+  case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
+  case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
+  case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
+  case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
+  case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
+  case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
+  case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
+
+  case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
+  case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
+  case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
+  case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
+  case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
+  case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
+  case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
+  case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
+
+  case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
+  case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
+  case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
+  case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
+  case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
+  case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
+  case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
+  case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
+
+  // Post-index of duplicate loads
+  case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
+  case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
+  case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
+  case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
+  case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
+  case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
+  case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
+  case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
+
+  case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
+  case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
+  case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
+  case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
+  case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
+  case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
+  case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
+  case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
+
+  case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
+  case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
+  case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
+  case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
+  case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
+  case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
+  case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
+  case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
+
+  // Post-index of lane loads
+  case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
+  case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
+  case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
+  case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
+
+  case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
+  case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
+  case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
+  case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
+
+  case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
+  case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
+  case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
+  case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
+
+  // Post-index of lane stores
+  case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
+  case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
+  case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
+  case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
+
+  case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
+  case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
+  case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
+  case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
+
+  case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
+  case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
+  case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
+  case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
    }
    return Opc; // If not one we handle, return it unchanged.
  }
  
    }
    return Opc; // If not one we handle, return it unchanged.
  }
  
-SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
-                                       bool isUpdating,
+SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
+                                       unsigned NumVecs,
                                         const uint16_t *Opcodes) {
    assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
  
    EVT VT = N->getValueType(0);
    unsigned OpcodeIndex;
                                         const uint16_t *Opcodes) {
    assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
  
    EVT VT = N->getValueType(0);
    unsigned OpcodeIndex;
-  switch (VT.getSimpleVT().SimpleTy) {
+  bool is64BitVector = VT.is64BitVector();
+  switch (VT.getScalarType().getSizeInBits()) {
+  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
    default: llvm_unreachable("unhandled vector load type");
    default: llvm_unreachable("unhandled vector load type");
-  case MVT::v8i8:  OpcodeIndex = 0; break;
-  case MVT::v4i16: OpcodeIndex = 1; break;
-  case MVT::v2f32:
-  case MVT::v2i32: OpcodeIndex = 2; break;
-  case MVT::v1f64:
-  case MVT::v1i64: OpcodeIndex = 3; break;
-  case MVT::v16i8: OpcodeIndex = 4; break;
-  case MVT::v8f16:
-  case MVT::v8i16: OpcodeIndex = 5; break;
-  case MVT::v4f32:
-  case MVT::v4i32: OpcodeIndex = 6; break;
-  case MVT::v2f64:
-  case MVT::v2i64: OpcodeIndex = 7; break;
    }
    unsigned Opc = Opcodes[OpcodeIndex];
  
    }
    unsigned Opc = Opcodes[OpcodeIndex];
  
@@ -577,9 +697,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
  
    Ops.push_back(N->getOperand(0)); // Push back the Chain
  
  
    Ops.push_back(N->getOperand(0)); // Push back the Chain
  
-  std::vector<EVT> ResTys;
-  bool is64BitVector = VT.is64BitVector();
-
+  SmallVector<EVT, 3> ResTys;
+  // Push back the type of return super register
    if (NumVecs == 1)
      ResTys.push_back(VT);
    else if (NumVecs == 3)
    if (NumVecs == 1)
      ResTys.push_back(VT);
    else if (NumVecs == 3)
@@ -620,8 +739,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
    return NULL;
  }
  
    return NULL;
  }
  
-SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
-                                       bool isUpdating,
+SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
+                                       unsigned NumVecs,
                                         const uint16_t *Opcodes) {
    assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
    SDLoc dl(N);
                                         const uint16_t *Opcodes) {
    assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
    SDLoc dl(N);
@@ -630,28 +749,20 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
    MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  
    unsigned AddrOpIdx = isUpdating ? 1 : 2;
    MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  
    unsigned AddrOpIdx = isUpdating ? 1 : 2;
-  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  unsigned Vec0Idx = 3;
    EVT VT = N->getOperand(Vec0Idx).getValueType();
    unsigned OpcodeIndex;
    EVT VT = N->getOperand(Vec0Idx).getValueType();
    unsigned OpcodeIndex;
-  switch (VT.getSimpleVT().SimpleTy) {
+  bool is64BitVector = VT.is64BitVector();
+  switch (VT.getScalarType().getSizeInBits()) {
+  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
    default: llvm_unreachable("unhandled vector store type");
    default: llvm_unreachable("unhandled vector store type");
-  case MVT::v8i8:  OpcodeIndex = 0; break;
-  case MVT::v4i16: OpcodeIndex = 1; break;
-  case MVT::v2f32:
-  case MVT::v2i32: OpcodeIndex = 2; break;
-  case MVT::v1f64:
-  case MVT::v1i64: OpcodeIndex = 3; break;
-  case MVT::v16i8: OpcodeIndex = 4; break;
-  case MVT::v8f16:
-  case MVT::v8i16: OpcodeIndex = 5; break;
-  case MVT::v4f32:
-  case MVT::v4i32: OpcodeIndex = 6; break;
-  case MVT::v2f64:
-  case MVT::v2i64: OpcodeIndex = 7; break;
    }
    unsigned Opc = Opcodes[OpcodeIndex];
  
    }
    unsigned Opc = Opcodes[OpcodeIndex];
  
-  std::vector<EVT> ResTys;
+  SmallVector<EVT, 2> ResTys;
    if (isUpdating)
      ResTys.push_back(MVT::i64);
    ResTys.push_back(MVT::Other); // Type for the Chain
    if (isUpdating)
      ResTys.push_back(MVT::i64);
    ResTys.push_back(MVT::Other); // Type for the Chain
@@ -665,7 +776,6 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
        Opc = getVLDSTRegisterUpdateOpcode(Opc);
      Ops.push_back(Inc);
    }
        Opc = getVLDSTRegisterUpdateOpcode(Opc);
      Ops.push_back(Inc);
    }
-  bool is64BitVector = VT.is64BitVector();
  
    SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
                                 N->op_begin() + Vec0Idx + NumVecs);
  
    SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
                                 N->op_begin() + Vec0Idx + NumVecs);
@@ -682,6 +792,237 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
    return VSt;
  }
  
    return VSt;
  }
  
+SDValue
+AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+                                          SDValue Operand) {
+  SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
+                        VT, VTD, MVT::Other,
+                        CurDAG->getTargetConstant(0, MVT::i64),
+                        Operand,
+                        CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
+  return SDValue(Reg, 0);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+                                          unsigned NumVecs,
+                                          const uint16_t *Opcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
+  SDLoc dl(N);
+
+  EVT VT = N->getValueType(0);
+  unsigned OpcodeIndex;
+  bool is64BitVector = VT.is64BitVector();
+  switch (VT.getScalarType().getSizeInBits()) {
+  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+  default: llvm_unreachable("unhandled vector duplicate lane load type");
+  }
+  unsigned Opc = Opcodes[OpcodeIndex];
+
+  SDValue SuperReg;
+  SmallVector<SDValue, 6> Ops;
+  Ops.push_back(N->getOperand(1)); // Push back the Memory Address
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(2);
+    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+      Opc = getVLDSTRegisterUpdateOpcode(Opc);
+    Ops.push_back(Inc);
+  }
+  Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+  SmallVector<EVT, 3> ResTys;
+  // Push back the type of return super register
+  if (NumVecs == 3)
+    ResTys.push_back(MVT::Untyped);
+  else {
+    EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+                                 is64BitVector ? NumVecs : NumVecs * 2);
+    ResTys.push_back(ResTy);
+  }
+  if (isUpdating)
+    ResTys.push_back(MVT::i64); // Type of the updated register
+  ResTys.push_back(MVT::Other); // Type of the Chain
+  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+
+  SuperReg = SDValue(VLdDup, 0);
+  unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+  // Update uses of each registers in super register
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  // Update uses of the Chain
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+  return NULL;
+}
+
+// We only have 128-bit vector type of load/store lane instructions.
+// If it is 64-bit vector, we also select it to the 128-bit instructions.
+// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
+// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
+SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+                                             bool isUpdating, unsigned NumVecs,
+                                             const uint16_t *Opcodes) {
+  assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+  SDLoc dl(N);
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3;
+
+  SDValue Chain = N->getOperand(0);
+  unsigned Lane =
+      cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+  EVT VT64; // 64-bit Vector Type
+
+  if (is64BitVector) {
+    VT64 = VT;
+    VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
+                          VT.getVectorNumElements() * 2);
+  }
+
+  unsigned OpcodeIndex;
+  switch (VT.getScalarType().getSizeInBits()) {
+  case 8: OpcodeIndex = 0; break;
+  case 16: OpcodeIndex = 1; break;
+  case 32: OpcodeIndex = 2; break;
+  case 64: OpcodeIndex = 3; break;
+  default: llvm_unreachable("unhandled vector lane load/store type");
+  }
+  unsigned Opc = Opcodes[OpcodeIndex];
+
+  SmallVector<EVT, 3> ResTys;
+  if (IsLoad) {
+    // Push back the type of return super register
+    if (NumVecs == 3)
+      ResTys.push_back(MVT::Untyped);
+    else {
+      EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+                                   is64BitVector ? NumVecs : NumVecs * 2);
+      ResTys.push_back(ResTy);
+    }
+  }
+  if (isUpdating)
+    ResTys.push_back(MVT::i64); // Type of the updated register
+  ResTys.push_back(MVT::Other); // Type of Chain
+  SmallVector<SDValue, 5> Ops;
+  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+      Opc = getVLDSTRegisterUpdateOpcode(Opc);
+    Ops.push_back(Inc);
+  }
+
+  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+                               N->op_begin() + Vec0Idx + NumVecs);
+  if (is64BitVector)
+    for (unsigned i = 0; i < Regs.size(); i++)
+      Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
+  SDValue SuperReg = createQTuple(Regs);
+
+  Ops.push_back(SuperReg); // Source Reg
+  SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
+  Ops.push_back(LaneValue);
+  Ops.push_back(Chain); // Push back the Chain
+
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+  if (!IsLoad)
+    return VLdLn;
+
+  // Extract the subregisters.
+  SuperReg = SDValue(VLdLn, 0);
+  unsigned Sub0 = AArch64::qsub_0;
+  // Update uses of each registers in super register
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+    SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
+    if (is64BitVector) {
+      SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
+    }
+    ReplaceUses(SDValue(N, Vec), SUB0);
+  }
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+  return NULL;
+}
+
+unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
+                                        unsigned NumOfVec) {
+  assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
+
+  unsigned Opc = 0;
+  switch (NumOfVec) {
+  default:
+    break;
+  case 1:
+    if (IsExt)
+      Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
+    else
+      Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
+    break;
+  case 2:
+    if (IsExt)
+      Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
+    else
+      Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
+    break;
+  case 3:
+    if (IsExt)
+      Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
+    else
+      Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
+    break;
+  case 4:
+    if (IsExt)
+      Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
+    else
+      Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
+    break;
+  }
+
+  return Opc;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
+                                        bool IsExt) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+  SDLoc dl(N);
+
+  // Check the element of look up table is 64-bit or not
+  unsigned Vec0Idx = IsExt ? 2 : 1;
+  assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
+         "The element of lookup table for vtbl and vtbx must be 128-bit");
+
+  // Check the return value type is 64-bit or not
+  EVT ResVT = N->getValueType(0);
+  bool is64BitRes = ResVT.is64BitVector();
+
+  // Create new SDValue for vector list
+  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+                               N->op_begin() + Vec0Idx + NumVecs);
+  SDValue TblReg = createQTuple(Regs);
+  unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
+
+  SmallVector<SDValue, 3> Ops;
+  if (IsExt)
+    Ops.push_back(N->getOperand(1));
+  Ops.push_back(TblReg);
+  Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
+  return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
+}
+
  SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
    // Dump information about the Node being selected
    DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
  SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
    // Dump information about the Node being selected
    DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -772,15 +1113,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
      return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
                                  TFI, CurDAG->getTargetConstant(0, PtrTy));
    }
      return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
                                  TFI, CurDAG->getTargetConstant(0, PtrTy));
    }
-  case ISD::ConstantPool: {
-    // Constant pools are fine, just create a Target entry.
-    ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
-    const Constant *C = CN->getConstVal();
-    SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
-
-    ReplaceUses(SDValue(Node, 0), CP);
-    return NULL;
-  }
    case ISD::Constant: {
      SDNode *ResNode = 0;
      if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
    case ISD::Constant: {
      SDNode *ResNode = 0;
      if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
@@ -835,34 +1167,61 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
        AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
        AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
      };
        AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
        AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
      };
-    return SelectVLD(Node, 1, true, Opcodes);
+    return SelectVLD(Node, true, 1, Opcodes);
    }
    case AArch64ISD::NEON_LD2_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
    }
    case AArch64ISD::NEON_LD2_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
-      AArch64::LD2WB_2S_fixed,  AArch64::LD1WB2V_1D_fixed,
+      AArch64::LD2WB_2S_fixed,  AArch64::LD1x2WB_1D_fixed,
        AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
        AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
      };
        AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
        AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
      };
-    return SelectVLD(Node, 2, true, Opcodes);
+    return SelectVLD(Node, true, 2, Opcodes);
    }
    case AArch64ISD::NEON_LD3_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
    }
    case AArch64ISD::NEON_LD3_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
-      AArch64::LD3WB_2S_fixed,  AArch64::LD1WB3V_1D_fixed,
+      AArch64::LD3WB_2S_fixed,  AArch64::LD1x3WB_1D_fixed,
        AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
        AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
      };
        AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
        AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
      };
-    return SelectVLD(Node, 3, true, Opcodes);
+    return SelectVLD(Node, true, 3, Opcodes);
    }
    case AArch64ISD::NEON_LD4_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
    }
    case AArch64ISD::NEON_LD4_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
-      AArch64::LD4WB_2S_fixed,  AArch64::LD1WB4V_1D_fixed,
+      AArch64::LD4WB_2S_fixed,  AArch64::LD1x4WB_1D_fixed,
        AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
        AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
      };
        AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
        AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
      };
-    return SelectVLD(Node, 4, true, Opcodes);
+    return SelectVLD(Node, true, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_LD1x2_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD1x2WB_8B_fixed,  AArch64::LD1x2WB_4H_fixed,
+      AArch64::LD1x2WB_2S_fixed,  AArch64::LD1x2WB_1D_fixed,
+      AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
+      AArch64::LD1x2WB_4S_fixed,  AArch64::LD1x2WB_2D_fixed
+    };
+    return SelectVLD(Node, true, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_LD1x3_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD1x3WB_8B_fixed,  AArch64::LD1x3WB_4H_fixed,
+      AArch64::LD1x3WB_2S_fixed,  AArch64::LD1x3WB_1D_fixed,
+      AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
+      AArch64::LD1x3WB_4S_fixed,  AArch64::LD1x3WB_2D_fixed
+    };
+    return SelectVLD(Node, true, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_LD1x4_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD1x4WB_8B_fixed,  AArch64::LD1x4WB_4H_fixed,
+      AArch64::LD1x4WB_2S_fixed,  AArch64::LD1x4WB_1D_fixed,
+      AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
+      AArch64::LD1x4WB_4S_fixed,  AArch64::LD1x4WB_2D_fixed
+    };
+    return SelectVLD(Node, true, 4, Opcodes);
    }
    case AArch64ISD::NEON_ST1_UPD: {
      static const uint16_t Opcodes[] = {
    }
    case AArch64ISD::NEON_ST1_UPD: {
      static const uint16_t Opcodes[] = {
@@ -871,34 +1230,179 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
        AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
        AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
      };
        AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
        AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
      };
-    return SelectVST(Node, 1, true, Opcodes);
+    return SelectVST(Node, true, 1, Opcodes);
    }
    case AArch64ISD::NEON_ST2_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
    }
    case AArch64ISD::NEON_ST2_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
-      AArch64::ST2WB_2S_fixed,  AArch64::ST1WB2V_1D_fixed,
+      AArch64::ST2WB_2S_fixed,  AArch64::ST1x2WB_1D_fixed,
        AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
        AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
      };
        AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
        AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
      };
-    return SelectVST(Node, 2, true, Opcodes);
+    return SelectVST(Node, true, 2, Opcodes);
    }
    case AArch64ISD::NEON_ST3_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
    }
    case AArch64ISD::NEON_ST3_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
-      AArch64::ST3WB_2S_fixed,  AArch64::ST1WB3V_1D_fixed,
+      AArch64::ST3WB_2S_fixed,  AArch64::ST1x3WB_1D_fixed,
        AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
        AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
      };
        AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
        AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
      };
-    return SelectVST(Node, 3, true, Opcodes);
+    return SelectVST(Node, true, 3, Opcodes);
    }
    case AArch64ISD::NEON_ST4_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
    }
    case AArch64ISD::NEON_ST4_UPD: {
      static const uint16_t Opcodes[] = {
        AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
-      AArch64::ST4WB_2S_fixed,  AArch64::ST1WB4V_1D_fixed,
+      AArch64::ST4WB_2S_fixed,  AArch64::ST1x4WB_1D_fixed,
        AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
        AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
      };
        AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
        AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
      };
-    return SelectVST(Node, 4, true, Opcodes);
+    return SelectVST(Node, true, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_LD2DUP: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
+        AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
+        AArch64::LD2R_4S, AArch64::LD2R_2D
+    };
+    return SelectVLDDup(Node, false, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_LD3DUP: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
+        AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
+        AArch64::LD3R_4S, AArch64::LD3R_2D
+    };
+    return SelectVLDDup(Node, false, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_LD4DUP: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
+        AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
+        AArch64::LD4R_4S, AArch64::LD4R_2D
+    };
+    return SelectVLDDup(Node, false, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_LD2DUP_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD2R_WB_8B_fixed,  AArch64::LD2R_WB_4H_fixed,
+      AArch64::LD2R_WB_2S_fixed,  AArch64::LD2R_WB_1D_fixed,
+      AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
+      AArch64::LD2R_WB_4S_fixed,  AArch64::LD2R_WB_2D_fixed
+    };
+    return SelectVLDDup(Node, true, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_LD3DUP_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD3R_WB_8B_fixed,  AArch64::LD3R_WB_4H_fixed,
+      AArch64::LD3R_WB_2S_fixed,  AArch64::LD3R_WB_1D_fixed,
+      AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
+      AArch64::LD3R_WB_4S_fixed,  AArch64::LD3R_WB_2D_fixed
+    };
+    return SelectVLDDup(Node, true, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_LD4DUP_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::LD4R_WB_8B_fixed,  AArch64::LD4R_WB_4H_fixed,
+      AArch64::LD4R_WB_2S_fixed,  AArch64::LD4R_WB_1D_fixed,
+      AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
+      AArch64::LD4R_WB_4S_fixed,  AArch64::LD4R_WB_2D_fixed
+    };
+    return SelectVLDDup(Node, true, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_LD2LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
+        AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, true, true, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_LD3LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
+        AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, true, true, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_LD4LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
+        AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, true, true, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_ST2LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
+        AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, false, true, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_ST3LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
+        AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, false, true, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_ST4LN_UPD: {
+    static const uint16_t Opcodes[] = {
+        AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
+        AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
+    };
+    return SelectVLDSTLane(Node, false, true, 4, Opcodes);
+  }
+  case AArch64ISD::NEON_ST1x2_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::ST1x2WB_8B_fixed,  AArch64::ST1x2WB_4H_fixed,
+      AArch64::ST1x2WB_2S_fixed,  AArch64::ST1x2WB_1D_fixed,
+      AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
+      AArch64::ST1x2WB_4S_fixed,  AArch64::ST1x2WB_2D_fixed
+    };
+    return SelectVST(Node, true, 2, Opcodes);
+  }
+  case AArch64ISD::NEON_ST1x3_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::ST1x3WB_8B_fixed,  AArch64::ST1x3WB_4H_fixed,
+      AArch64::ST1x3WB_2S_fixed,  AArch64::ST1x3WB_1D_fixed,
+      AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
+      AArch64::ST1x3WB_4S_fixed,  AArch64::ST1x3WB_2D_fixed
+    };
+    return SelectVST(Node, true, 3, Opcodes);
+  }
+  case AArch64ISD::NEON_ST1x4_UPD: {
+    static const uint16_t Opcodes[] = {
+      AArch64::ST1x4WB_8B_fixed,  AArch64::ST1x4WB_4H_fixed,
+      AArch64::ST1x4WB_2S_fixed,  AArch64::ST1x4WB_1D_fixed,
+      AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
+      AArch64::ST1x4WB_4S_fixed,  AArch64::ST1x4WB_2D_fixed
+    };
+    return SelectVST(Node, true, 4, Opcodes);
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+    bool IsExt = false;
+    switch (IntNo) {
+      default:
+        break;
+      case Intrinsic::aarch64_neon_vtbx1:
+        IsExt = true;
+      case Intrinsic::aarch64_neon_vtbl1:
+        return SelectVTBL(Node, 1, IsExt);
+      case Intrinsic::aarch64_neon_vtbx2:
+        IsExt = true;
+      case Intrinsic::aarch64_neon_vtbl2:
+        return SelectVTBL(Node, 2, IsExt);
+      case Intrinsic::aarch64_neon_vtbx3:
+        IsExt = true;
+      case Intrinsic::aarch64_neon_vtbl3:
+        return SelectVTBL(Node, 3, IsExt);
+      case Intrinsic::aarch64_neon_vtbx4:
+        IsExt = true;
+      case Intrinsic::aarch64_neon_vtbl4:
+        return SelectVTBL(Node, 4, IsExt);
+    }
+    break;
    }
    case ISD::INTRINSIC_VOID:
    case ISD::INTRINSIC_W_CHAIN: {
    }
    case ISD::INTRINSIC_VOID:
    case ISD::INTRINSIC_W_CHAIN: {
@@ -906,66 +1410,149 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
      switch (IntNo) {
      default:
        break;
      switch (IntNo) {
      default:
        break;
-
      case Intrinsic::arm_neon_vld1: {
      case Intrinsic::arm_neon_vld1: {
-      static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
-                                          AArch64::LD1_2S,  AArch64::LD1_1D,
-                                          AArch64::LD1_16B, AArch64::LD1_8H,
-                                          AArch64::LD1_4S,  AArch64::LD1_2D };
-      return SelectVLD(Node, 1, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::LD1_8B,  AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
+          AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
+      };
+      return SelectVLD(Node, false, 1, Opcodes);
      }
      case Intrinsic::arm_neon_vld2: {
      }
      case Intrinsic::arm_neon_vld2: {
-      static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
-                                          AArch64::LD2_2S,  AArch64::LD1_2V_1D,
-                                          AArch64::LD2_16B, AArch64::LD2_8H,
-                                          AArch64::LD2_4S,  AArch64::LD2_2D };
-      return SelectVLD(Node, 2, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::LD2_8B,  AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
+          AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
+      };
+      return SelectVLD(Node, false, 2, Opcodes);
      }
      case Intrinsic::arm_neon_vld3: {
      }
      case Intrinsic::arm_neon_vld3: {
-      static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
-                                          AArch64::LD3_2S,  AArch64::LD1_3V_1D,
-                                          AArch64::LD3_16B, AArch64::LD3_8H,
-                                          AArch64::LD3_4S,  AArch64::LD3_2D };
-      return SelectVLD(Node, 3, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::LD3_8B,  AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
+          AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
+      };
+      return SelectVLD(Node, false, 3, Opcodes);
      }
      case Intrinsic::arm_neon_vld4: {
      }
      case Intrinsic::arm_neon_vld4: {
-      static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
-                                          AArch64::LD4_2S,  AArch64::LD1_4V_1D,
-                                          AArch64::LD4_16B, AArch64::LD4_8H,
-                                          AArch64::LD4_4S,  AArch64::LD4_2D };
-      return SelectVLD(Node, 4, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::LD4_8B,  AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
+          AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
+      };
+      return SelectVLD(Node, false, 4, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vld1x2: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD1x2_8B, AArch64::LD1x2_4H,  AArch64::LD1x2_2S,
+          AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
+          AArch64::LD1x2_4S, AArch64::LD1x2_2D
+      };
+      return SelectVLD(Node, false, 2, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vld1x3: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD1x3_8B, AArch64::LD1x3_4H,  AArch64::LD1x3_2S,
+          AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
+          AArch64::LD1x3_4S, AArch64::LD1x3_2D
+      };
+      return SelectVLD(Node, false, 3, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vld1x4: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD1x4_8B, AArch64::LD1x4_4H,  AArch64::LD1x4_2S,
+          AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
+          AArch64::LD1x4_4S, AArch64::LD1x4_2D
+      };
+      return SelectVLD(Node, false, 4, Opcodes);
      }
      case Intrinsic::arm_neon_vst1: {
      }
      case Intrinsic::arm_neon_vst1: {
-      static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
-                                          AArch64::ST1_2S,  AArch64::ST1_1D,
-                                          AArch64::ST1_16B, AArch64::ST1_8H,
-                                          AArch64::ST1_4S,  AArch64::ST1_2D };
-      return SelectVST(Node, 1, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::ST1_8B,  AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
+          AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
+      };
+      return SelectVST(Node, false, 1, Opcodes);
      }
      case Intrinsic::arm_neon_vst2: {
      }
      case Intrinsic::arm_neon_vst2: {
-      static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
-                                          AArch64::ST2_2S,  AArch64::ST1_2V_1D,
-                                          AArch64::ST2_16B, AArch64::ST2_8H,
-                                          AArch64::ST2_4S,  AArch64::ST2_2D };
-      return SelectVST(Node, 2, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::ST2_8B,  AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
+          AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
+      };
+      return SelectVST(Node, false, 2, Opcodes);
      }
      case Intrinsic::arm_neon_vst3: {
      }
      case Intrinsic::arm_neon_vst3: {
-      static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
-                                          AArch64::ST3_2S,  AArch64::ST1_3V_1D,
-                                          AArch64::ST3_16B, AArch64::ST3_8H,
-                                          AArch64::ST3_4S,  AArch64::ST3_2D };
-      return SelectVST(Node, 3, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::ST3_8B,  AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
+          AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
+      };
+      return SelectVST(Node, false, 3, Opcodes);
      }
      case Intrinsic::arm_neon_vst4: {
      }
      case Intrinsic::arm_neon_vst4: {
-      static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
-                                          AArch64::ST4_2S,  AArch64::ST1_4V_1D,
-                                          AArch64::ST4_16B, AArch64::ST4_8H,
-                                          AArch64::ST4_4S,  AArch64::ST4_2D };
-      return SelectVST(Node, 4, false, Opcodes);
+      static const uint16_t Opcodes[] = {
+          AArch64::ST4_8B,  AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
+          AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
+      };
+      return SelectVST(Node, false, 4, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vst1x2: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST1x2_8B, AArch64::ST1x2_4H,  AArch64::ST1x2_2S,
+          AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
+          AArch64::ST1x2_4S, AArch64::ST1x2_2D
+      };
+      return SelectVST(Node, false, 2, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vst1x3: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST1x3_8B, AArch64::ST1x3_4H,  AArch64::ST1x3_2S,
+          AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
+          AArch64::ST1x3_4S, AArch64::ST1x3_2D
+      };
+      return SelectVST(Node, false, 3, Opcodes);
+    }
+    case Intrinsic::aarch64_neon_vst1x4: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST1x4_8B, AArch64::ST1x4_4H,  AArch64::ST1x4_2S,
+          AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
+          AArch64::ST1x4_4S, AArch64::ST1x4_2D
+      };
+      return SelectVST(Node, false, 4, Opcodes);
+    }
+    case Intrinsic::arm_neon_vld2lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
+      };
+      return SelectVLDSTLane(Node, true, false, 2, Opcodes);
      }
      }
+    case Intrinsic::arm_neon_vld3lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
+      };
+      return SelectVLDSTLane(Node, true, false, 3, Opcodes);
      }
      }
+    case Intrinsic::arm_neon_vld4lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
+      };
+      return SelectVLDSTLane(Node, true, false, 4, Opcodes);
+    }
+    case Intrinsic::arm_neon_vst2lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
+      };
+      return SelectVLDSTLane(Node, false, false, 2, Opcodes);
+    }
+    case Intrinsic::arm_neon_vst3lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
+      };
+      return SelectVLDSTLane(Node, false, false, 3, Opcodes);
+    }
+    case Intrinsic::arm_neon_vst4lane: {
+      static const uint16_t Opcodes[] = {
+          AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
+      };
+      return SelectVLDSTLane(Node, false, false, 4, Opcodes);
+    }
+    } // End of switch IntNo
      break;
      break;
-  }
+  } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
    default:
      break; // Let generic code handle it
    }
    default:
      break; // Let generic code handle it
    }