From: Hao Liu <Hao.Liu@arm.com>
Date: Wed, 4 Sep 2013 09:28:24 +0000 (+0000)
Subject: Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=19fdc268c316b3b0bdcb2b558449819f4f402d6a;p=oota-llvm.git

Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
      sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshrun,sqshrn,uqshr,sqrshrn,uqrshrn,sshll,ushll
 and 4 convert instructions:
      scvtf,ucvtf,fcvtzs,fcvtzu


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189925 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index d7b1947db12..0a71ea46545 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -38,4 +38,32 @@ def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
 
 // Vector Multiply Extended (Floating Point)
 def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
+
+class Neon_N2V_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
+              [IntrNoMem]>;
+class Neon_N3V_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+              [IntrNoMem]>;
+class Neon_N2V_Narrow_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMExtendedElementVectorType<0>, llvm_i32_ty],
+              [IntrNoMem]>;
+
+// Vector rounding shift right by immediate (Signed)
+def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
+def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
+def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
+
+def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
+def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
+
+def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
+def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
 }
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 72963da8cba..d94bdc67bf0 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -465,6 +465,8 @@ def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
     SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
 def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
     SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
+    SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>,[]>;
 
 // This operator does not do subvector type checking.  The ARM
 // backend, at least, needs it.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index d12302e685b..b68c43a63cb 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -77,8 +77,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SRA);
+  setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::SHL);
 
+  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+
   // AArch64 does not have i1 loads, or much of anything for i1 really.
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -283,6 +286,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
 
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+
     setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
     setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
     setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
@@ -834,6 +839,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "AArch64ISD::NEON_CMPZ";
   case AArch64ISD::NEON_TST:
     return "AArch64ISD::NEON_TST";
+  case AArch64ISD::NEON_DUPIMM:
+    return "AArch64ISD::NEON_DUPIMM";
+  case AArch64ISD::NEON_QSHLs:
+    return "AArch64ISD::NEON_QSHLs";
+  case AArch64ISD::NEON_QSHLu:
+    return "AArch64ISD::NEON_QSHLu";
   default:
     return NULL;
   }
@@ -3257,7 +3268,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 
 /// Check if this is a valid build_vector for the immediate operand of
 /// a vector shift left operation.  That value must be in the range:
-/// 0 <= Value < ElementBits for a left shift
+/// 0 <= Value < ElementBits
 static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
@@ -3266,10 +3277,25 @@ static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
   return (Cnt >= 0 && Cnt < ElementBits);
 }
 
-static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+/// Check if this is a valid build_vector for the immediate operand of a
+/// vector shift right operation. The value must be in the range:
+///   1 <= Value <= ElementBits
+static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 1 && Cnt <= ElementBits);
+}
+
+/// Checks for immediate versions of vector shifts and lowers them.
+static SDValue PerformShiftCombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
                                    const AArch64Subtarget *ST) {
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
+  if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
+    return PerformSRACombine(N, DCI);
 
   // Nothing to be done for scalar shifts.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -3278,10 +3304,54 @@ static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI
 
   assert(ST->hasNEON() && "unexpected vector shift");
   int64_t Cnt;
-  if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
-    SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)),
-                              VT, DAG.getConstant(Cnt, MVT::i32));
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("unexpected shift opcode");
+
+  case ISD::SHL:
+    if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
+      SDValue RHS =
+          DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+                      DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+    }
+    break;
+
+  case ISD::SRA:
+  case ISD::SRL:
+    if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
+      SDValue RHS =
+          DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+                      DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
+    }
+    break;
+  }
+
+  return SDValue();
+}
+
+/// ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+
+  switch (IntNo) {
+  default:
+    // Don't do anything for most intrinsics.
+    break;
+
+  case Intrinsic::arm_neon_vqshifts:
+  case Intrinsic::arm_neon_vqshiftu:
+    EVT VT = N->getOperand(1).getValueType();
+    int64_t Cnt;
+    if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
+      break;
+    unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
+                             ? AArch64ISD::NEON_QSHLs
+                             : AArch64ISD::NEON_QSHLu;
+    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
   }
 
   return SDValue();
@@ -3294,8 +3364,12 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::AND: return PerformANDCombine(N, DCI);
   case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
-  case ISD::SRA: return PerformSRACombine(N, DCI);
-  case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget());
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    return PerformShiftCombine(N, DCI, getSubtarget());
+  case ISD::INTRINSIC_WO_CHAIN:
+    return PerformIntrinsicCombine(N, DCI.DAG);
   }
   return SDValue();
 }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index c9795b249e3..7c7d0386924 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -19,7 +19,7 @@
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
-
+#include "llvm/IR/Intrinsics.h"
 
 namespace llvm {
 namespace AArch64ISD {
@@ -135,7 +135,11 @@ namespace AArch64ISD {
     NEON_TST,
 
     // Operation for the immediate in vector shift
-    NEON_DUPIMM
+    NEON_DUPIMM,
+
+    // Vector saturating shift
+    NEON_QSHLs,
+    NEON_QSHLu
   };
 }
 
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index fb6d65450de..9712a5a3f4f 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -44,6 +44,12 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, 
                     [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
 
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                     SDTCisVT<2, i32>]>;
+def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
+def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; 
+
+
 //===----------------------------------------------------------------------===//
 // Multiclasses
 //===----------------------------------------------------------------------===//
@@ -1413,58 +1419,133 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
 }
 
 // Vector Shift (Immediate) 
-
+// Immediate in [0, 63]
 def imm0_63 : Operand<i32> {
   let ParserMatchClass = uimm6_asmoperand;
 }
 
-class N2VShiftLeft<bit q, bit u, bits<5> opcode, string asmop, string T,
-                   RegisterClass VPRC, ValueType Ty, Operand ImmTy>
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The immh:immb field is encoded like so:
+//
+//    Offset    Encoding
+//     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
+//     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
+//     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
+//     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
+  let Name = "ShrImm" # OFFSET;
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "ShrImm" # OFFSET;
+}
+
+class shr_imm<string OFFSET> : Operand<i32> {
+  let EncoderMethod = "getShiftRightImm" # OFFSET;
+  let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
+  let ParserMatchClass = 
+    !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
+}
+
+def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
+def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
+def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
+def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
+
+def shr_imm8 : shr_imm<"8">;
+def shr_imm16 : shr_imm<"16">;
+def shr_imm32 : shr_imm<"32">;
+def shr_imm64 : shr_imm<"64">;
+
+class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
+               RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (Ty VPRC:$Rd),
-                        (Ty (shl (Ty VPRC:$Rn), 
+                        (Ty (OpNode (Ty VPRC:$Rn),
                           (Ty (Neon_dupImm (i32 imm:$Imm))))))],
                      NoItinerary>;
 
 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
   // 64-bit vector types.
-  def _8B  : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> {
+  def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
-  def _4H  : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> {
+  def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
-  def _2S  : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> {
+  def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
   // 128-bit vector types.
-  def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> {
+  def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
-  def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> {
+  def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
-  def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> {
+  def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
-  def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> {
+  def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
   }
 }
 
-def Neon_top16B : PatFrag<(ops node:$in), 
+multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+  def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+                     OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+                     OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+                     OpNode> {
+     let Inst{22-21} = 0b01;
+  }
+
+  def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+                      OpNode> {
+                      let Inst{22-19} = 0b0001;
+                    }
+
+  def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+                     OpNode> {
+                     let Inst{22-20} = 0b001;
+                    }
+
+  def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+                     OpNode> {
+                      let Inst{22-21} = 0b01;
+                    }
+
+  def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+                     OpNode> {
+                      let Inst{22} = 0b1;
+                    }
+}
+
+// Shift left
+defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
+
+// Shift right
+defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
+defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
+
+def Neon_top16B : PatFrag<(ops node:$in),
                           (extract_subvector (v16i8 node:$in), (iPTR 8))>;
-def Neon_top8H : PatFrag<(ops node:$in), 
+def Neon_top8H : PatFrag<(ops node:$in),
                          (extract_subvector (v8i16 node:$in), (iPTR 4))>;
-def Neon_top4S : PatFrag<(ops node:$in), 
+def Neon_top4S : PatFrag<(ops node:$in),
                          (extract_subvector (v4i32 node:$in), (iPTR 2))>;
 
 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
@@ -1474,21 +1555,21 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                      (ins VPR64:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
                      [(set (DestTy VPR128:$Rd),
-                        (DestTy (shl 
+                        (DestTy (shl
                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
                             (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
                      NoItinerary>;
 
 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
-                       string SrcT, ValueType DestTy, ValueType SrcTy, 
+                       string SrcT, ValueType DestTy, ValueType SrcTy,
                        int StartIndex, Operand ImmTy,
                        SDPatternOperator ExtOp, PatFrag getTop>
   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
                      (ins VPR128:$Rn, ImmTy:$Imm),
                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
-                     [(set (DestTy VPR128:$Rd), 
+                     [(set (DestTy VPR128:$Rd),
                         (DestTy (shl
-                          (DestTy (ExtOp 
+                          (DestTy (ExtOp
                             (SrcTy (getTop VPR128:$Rn)))),
                               (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
                      NoItinerary>;
@@ -1497,33 +1578,33 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
                          SDNode ExtOp> {
   // 64-bit vector types.
   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
-                         uimm3, ExtOp>{
+                         uimm3, ExtOp> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
-                         uimm4, ExtOp>{
+                         uimm4, ExtOp> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
-                         uimm5, ExtOp>{
+                         uimm5, ExtOp> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
   // 128-bit vector types
   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
-                              v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{
+                              v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
-                             v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{
+                             v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
-                             v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{
+                             v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
@@ -1547,13 +1628,521 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
 }
 
-// Shift left immediate
-defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
-
-// Shift left long immediate
+// Shift left long
 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
 
+// Rounding/Saturating shift
+class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
+                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  SDPatternOperator OpNode>
+  : NeonI_2VShiftImm<q, u, opcode,
+                     (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+                     [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
+                        (i32 imm:$Imm))))],
+                     NoItinerary>;
+
+// shift right (vector by immediate)
+multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
+                           SDPatternOperator OpNode> {
+  def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+                         OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+                         OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+                         OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+                         OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+                        OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+                        OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+                        OpNode> {
+    let Inst{22} = 0b1;
+  }
+}
+
+multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
+                          SDPatternOperator OpNode> {
+  // 64-bit vector types.
+  def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
+                        OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
+                        OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
+                        OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  // 128-bit vector types.
+  def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
+                         OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
+                        OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
+                        OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
+                        OpNode> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// Rounding shift right
+defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
+                                int_aarch64_neon_vsrshr>;
+defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
+                                int_aarch64_neon_vurshr>;
+
+// Saturating shift left unsigned
+defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
+
+// Saturating shift left
+defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
+defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
+
+class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
+                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  SDNode OpNode>
+  : NeonI_2VShiftImm<q, u, opcode,
+           (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+           asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+           [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+              (Ty (OpNode (Ty VPRC:$Rn),
+                (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
+           NoItinerary> {
+  let Constraints = "$src = $Rd";
+}
+
+// Shift Right accumulate
+multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+  def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+                        OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+                        OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+                        OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+                         OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+                        OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+                        OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+                        OpNode> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// Shift right and accumulate
+defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
+defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
+
+// Rounding shift accumulate
+class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
+                    RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                    SDPatternOperator OpNode>
+  : NeonI_2VShiftImm<q, u, opcode,
+                     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+                     [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+                        (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
+                     NoItinerary> {
+  let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
+                             SDPatternOperator OpNode> {
+  def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+                          OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+                          OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+                          OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+                           OpNode> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+                          OpNode> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+                          OpNode> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+                          OpNode> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// Rounding shift right and accumulate
+defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
+defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
+
+// Shift insert by immediate
+class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
+                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  SDPatternOperator OpNode>
+    : NeonI_2VShiftImm<q, u, opcode,
+           (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+           asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+           [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
+             (i32 imm:$Imm))))],
+           NoItinerary> {
+  let Constraints = "$src = $Rd";
+}
+
+// shift left insert (vector by immediate)
+multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
+  def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
+                        int_aarch64_neon_vsli> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
+                        int_aarch64_neon_vsli> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
+                        int_aarch64_neon_vsli> {
+    let Inst{22-21} = 0b01;
+  }
+
+    // 128-bit vector types
+  def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
+                         int_aarch64_neon_vsli> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
+                        int_aarch64_neon_vsli> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
+                        int_aarch64_neon_vsli> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
+                        int_aarch64_neon_vsli> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// shift right insert (vector by immediate)
+multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
+    // 64-bit vector types.
+  def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+                        int_aarch64_neon_vsri> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+                        int_aarch64_neon_vsri> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+                        int_aarch64_neon_vsri> {
+    let Inst{22-21} = 0b01;
+  }
+
+    // 128-bit vector types
+  def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+                         int_aarch64_neon_vsri> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+                        int_aarch64_neon_vsri> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+                        int_aarch64_neon_vsri> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+                        int_aarch64_neon_vsri> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// Shift left and insert
+defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
+
+// Shift right and insert
+defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
+
+class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+                    string SrcT, Operand ImmTy>
+  : NeonI_2VShiftImm<q, u, opcode,
+                     (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+                     [], NoItinerary>;
+
+class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+                       string SrcT, Operand ImmTy>
+  : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+                     (ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+                     [], NoItinerary> {
+  let Constraints = "$src = $Rd";
+}
+
+// left long shift by immediate
+multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
+  def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
+    let Inst{22-21} = 0b01;
+  }
+
+  // Shift Narrow High
+  def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
+                              shr_imm8> {
+    let Inst{22-19} = 0b0001;
+  }
+
+  def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
+                             shr_imm16> {
+    let Inst{22-20} = 0b001;
+  }
+
+  def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
+                             shr_imm32> {
+    let Inst{22-21} = 0b01;
+  }
+}
+
+// Shift right narrow
+defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
+
+// Shift right narrow (prefix Q is saturating, prefix R is rounding)
+defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
+defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
+defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
+defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
+defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
+defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
+defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
+
+def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
+                           (v2i64 (concat_vectors (v1i64 node:$Rm),
+                                                  (v1i64 node:$Rn)))>;
+
+def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v8i16 (srl (v8i16 node:$lhs),
+                               (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v4i32 (srl (v4i32 node:$lhs),
+                               (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v2i64 (srl (v2i64 node:$lhs),
+                               (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v8i16 (sra (v8i16 node:$lhs),
+                               (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v4i32 (sra (v4i32 node:$lhs),
+                               (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+                             (v2i64 (sra (v2i64 node:$lhs),
+                               (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+
+// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
+multiclass Neon_shiftNarrow_patterns<string shr> {
+  def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
+              imm:$Imm))),
+            (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
+              imm:$Imm))),
+            (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
+              imm:$Imm))),
+            (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
+
+  def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+              (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
+                VPR128:$Rn, imm:$Imm)))))),
+            (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+              (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
+                VPR128:$Rn, imm:$Imm)))))),
+            (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+              (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
+                VPR128:$Rn, imm:$Imm)))))),
+            (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
+  def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
+            (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
+            (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
+            (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
+
+  def : Pat<(Neon_combine (v1i64 VPR64:$src),
+              (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
+            (!cast<Instruction>(prefix # "_16B")
+              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(Neon_combine (v1i64 VPR64:$src),
+              (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
+            (!cast<Instruction>(prefix # "_8H")
+              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+  def : Pat<(Neon_combine (v1i64 VPR64:$src),
+              (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
+            (!cast<Instruction>(prefix # "_4S")
+              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+}
+
+defm : Neon_shiftNarrow_patterns<"lshr">;
+defm : Neon_shiftNarrow_patterns<"ashr">;
+
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
+
+// Convert fix-point and float-pointing
+class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
+                RegisterClass VPRC, ValueType DestTy, ValueType SrcTy,
+                Operand ImmTy, SDPatternOperator IntOp>
+  : NeonI_2VShiftImm<q, u, opcode,
+                     (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+                     [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
+                       (i32 imm:$Imm))))],
+                     NoItinerary>;
+
+multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
+                              SDPatternOperator IntOp> {
+  def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
+                      shr_imm32, IntOp> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
+                      shr_imm32, IntOp> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
+                      shr_imm64, IntOp> {
+    let Inst{22} = 0b1;
+  }
+}
+
+multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
+                              SDPatternOperator IntOp> {
+  def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
+                      shr_imm32, IntOp> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
+                      shr_imm32, IntOp> {
+    let Inst{22-21} = 0b01;
+  }
+
+  def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
+                      shr_imm64, IntOp> {
+    let Inst{22} = 0b1;
+  }
+}
+
+// Convert fixed-point to floating-point
+defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
+                                   int_arm_neon_vcvtfxs2fp>;
+defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
+                                   int_arm_neon_vcvtfxu2fp>;
+
+// Convert floating-point to fixed-point
+defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
+                                   int_arm_neon_vcvtfp2fxs>;
+defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
+                                   int_arm_neon_vcvtfp2fxu>;
+
 // Scalar Arithmetic
 
 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
@@ -1726,6 +2315,10 @@ def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
 
 // ...and scalar bitcasts...
 
+def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
+
 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))),
                  (f64 (EXTRACT_SUBREG (v8i8  VPR64:$src), sub_64))>;
 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))),
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 43e91ac4e01..68d4be472cf 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -664,6 +664,25 @@ public:
     return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
   }
 
+  // if 0 < value <= w, return true
+  bool isShrFixedWidth(int w) const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= w;
+  }
+
+  bool isShrImm8() const { return isShrFixedWidth(8); }
+
+  bool isShrImm16() const { return isShrFixedWidth(16); }
+
+  bool isShrImm32() const { return isShrFixedWidth(32); }
+
+  bool isShrImm64() const { return isShrFixedWidth(64); }
+
   bool isNeonMovImmShiftLSL() const {
     if (!isShiftOrExtend())
       return false;
@@ -2240,6 +2259,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_Width64:
     return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
                  "expected integer in range [<lsb>, 63]");
+  case Match_ShrImm8:
+    return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 8]");
+  case Match_ShrImm16:
+    return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 16]");
+  case Match_ShrImm32:
+    return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 32]");
+  case Match_ShrImm64:
+    return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 64]");
   }
 
   llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a88a8e8e9e6..5b57b50a98d 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -113,6 +113,18 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
                                         uint64_t Address,
                                         const void *Decoder);
 
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
 template<int RegWidth>
 static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
                                              unsigned FullImm,
@@ -413,7 +425,33 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(8 - Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(16 - Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(32 - Val));
+  return MCDisassembler::Success;
+}
 
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(64 - Val));
+  return MCDisassembler::Success;
+}
 
 template<int RegWidth>
 static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index b9770b385b8..7bfaeccebce 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -59,6 +59,14 @@ public:
   unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
                                    SmallVectorImpl<MCFixup> &Fixups) const;
 
+  unsigned getShiftRightImm8(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRightImm16(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRightImm32(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
 
   // Labels are handled mostly the same way: a symbol is needed, and
   // just gets some fixup attached.
@@ -310,6 +318,25 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
   return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
 }
 
+unsigned AArch64MCCodeEmitter::getShiftRightImm8(
+    const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+  return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm16(
+    const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+  return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm32(
+    const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+  return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm64(
+    const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+  return 64 - MI.getOperand(Op).getImm();
+}
 
 template<AArch64::Fixups fixupDesired> unsigned
 AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll
new file mode 100644
index 00000000000..19d1b219646
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-shift.ll
@@ -0,0 +1,1524 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vshr_n_s8
+; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vshr_n_s16
+; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vshr_n_s32
+; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_s8
+; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_s16
+; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_s32
+; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_s64
+; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vshr_n_u8
+; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vshr_n_u16
+; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vshr_n_u32
+; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_u8
+; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_u16
+; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_u32
+; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_u64
+; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_s8
+; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_s16
+; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_s32
+; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_s8
+; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_s16
+; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_s32
+; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_s64
+; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_u8
+; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_u16
+; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_u32
+; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_u8
+; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_u16
+; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_u32
+; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_u64
+; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_s8
+; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_s16
+; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_s32
+; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_s8
+; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_s16
+; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_s32
+; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_s64
+; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_u8
+; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_u16
+; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_u32
+; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_u8
+; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_u16
+; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_u32
+; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_u64
+; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_s8
+; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_s16
+; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_s32
+; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_s8
+; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_s16
+; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_s32
+; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_s64
+; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_u8
+; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_u16
+; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_u32
+; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_u8
+; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_u16
+; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_u32
+; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_u64
+; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_s8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+
+define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_s16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsri
+}
+
+
+define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsri_n_s32
+; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsri
+}
+
+
+define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_s8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+
+define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_s16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsri
+}
+
+
+define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsriq_n_s32
+; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsri
+}
+
+
+define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsriq_n_s64
+; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsri
+}
+
+define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_p8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_p16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsri
+}
+
+define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_p8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_p16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsri
+}
+
+define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_s8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_s16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsli
+}
+
+define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsli_n_s32
+; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_s8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_s16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsli
+}
+
+define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsliq_n_s32
+; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsli
+}
+
+define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsliq_n_s64
+; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsli
+}
+
+define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_p8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_p16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_p8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_p16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsli
+}
+
+define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_s8
+; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl
+}
+
+
+define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_s16
+; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_s32
+; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_s8
+; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_s16
+; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_s32
+; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_s64
+; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+
+define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_u8
+; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl_n
+}
+
+
+define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_u16
+; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_u32
+; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_u8
+; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_u16
+; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_u32
+; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_u64
+; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshlu_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vqshlu
+}
+
+
+define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshlu_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vqshlu
+}
+
+
+define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshlu_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vqshlu
+}
+
+
+define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshluq_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vqshlu
+}
+
+
+define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshluq_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vqshlu
+}
+
+
+define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshluq_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vqshlu
+}
+
+
+define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshluq_n_s64
+; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vqshlu
+}
+
+
+define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_s16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_s32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_s64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_u16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_u32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_u64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_s16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_s32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_s64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_u16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_u32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_u64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrun_n_s16
+; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrun
+}
+
+
+define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrun_n_s32
+; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrun
+}
+
+define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrun_n_s64
+; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrun
+}
+
+define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrun_high_n_s16
+; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrun_high_n_s32
+; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrun_high_n_s64
+; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrn_n_s16
+; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vrshrn
+}
+
+
+define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrn_n_s32
+; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vrshrn
+}
+
+
+define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrn_n_s64
+; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vrshrn
+}
+
+define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vrshrn_high_n_s16
+; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vrshrn_high_n_s32
+; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vrshrn_high_n_s64
+; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrun_n_s16
+; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrun
+}
+
+define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrun_n_s32
+; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrun
+}
+
+define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrun_n_s64
+; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrun
+}
+
+define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrun_high_n_s16
+; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrun_high_n_s32
+; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrun_high_n_s64
+; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_s16
+; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_s32
+; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_s64
+; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_u16
+; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_u32
+; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_u64
+; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_s16
+; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_s32
+; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_s64
+; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_u16
+; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_u32
+; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_u64
+; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_s16
+; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_s32
+; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_s64
+; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_u16
+; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_u32
+; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_u64
+; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_s16
+; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_s32
+; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_s64
+; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_u16
+; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_u32
+; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_u64
+; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_s64
+; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_u64
+; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
+; CHECK: test_vcvtq_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_s64_f64
+; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_u64_f64
+; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
+
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) 
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) 
+
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) 
+
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index bc54b50eb2e..52305f17434 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -845,12 +845,12 @@
 // Vector Saturating Shift Left (Signed and Unsigned Integer)
 //----------------------------------------------------------------------
         // Mismatched vector types
-        sqshl v0.2s, v15.2s, v16.2d
+        sqshl v0.2s, v15.4s, v16.2d
         uqshl v1.8b, v25.4h, v6.8h
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        sqshl v0.2s, v15.2s, v16.2d
-// CHECK-ERROR:                                 ^
+// CHECK-ERROR:        sqshl v0.2s, v15.4s, v16.2d 
+// CHECK-ERROR:                         ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        uqshl v1.8b, v25.4h, v6.8h
 // CHECK-ERROR:                         ^
@@ -1288,3 +1288,723 @@
 // CHECK-ERROR:        ushll2 v0.2d, v1.4s, #33
 // CHECK-ERROR:                             ^
 
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8h, #3
+         sshr v0.4h, v1.4s, #3
+         sshr v0.2s, v1.2d, #3
+         sshr v0.16b, v1.16b, #9
+         sshr v0.8h, v1.8h, #17
+         sshr v0.4s, v1.4s, #33
+         sshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8h, #3
+         ushr v0.4h, v1.4s, #3
+         ushr v0.2s, v1.2d, #3
+         ushr v0.16b, v1.16b, #9
+         ushr v0.8h, v1.8h, #17
+         ushr v0.4s, v1.4s, #33
+         ushr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ushr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ushr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ushr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ushr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8h, #3
+         ssra v0.4h, v1.4s, #3
+         ssra v0.2s, v1.2d, #3
+         ssra v0.16b, v1.16b, #9
+         ssra v0.8h, v1.8h, #17
+         ssra v0.4s, v1.4s, #33
+         ssra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ssra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ssra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ssra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ssra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8h, #3
+         usra v0.4h, v1.4s, #3
+         usra v0.2s, v1.2d, #3
+         usra v0.16b, v1.16b, #9
+         usra v0.8h, v1.8h, #17
+         usra v0.4s, v1.4s, #33
+         usra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         usra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         usra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         usra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         usra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8h, #3
+         srshr v0.4h, v1.4s, #3
+         srshr v0.2s, v1.2d, #3
+         srshr v0.16b, v1.16b, #9
+         srshr v0.8h, v1.8h, #17
+         srshr v0.4s, v1.4s, #33
+         srshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8h, #3
+         urshr v0.4h, v1.4s, #3
+         urshr v0.2s, v1.2d, #3
+         urshr v0.16b, v1.16b, #9
+         urshr v0.8h, v1.8h, #17
+         urshr v0.4s, v1.4s, #33
+         urshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         urshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         urshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         urshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         urshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8h, #3
+         srsra v0.4h, v1.4s, #3
+         srsra v0.2s, v1.2d, #3
+         srsra v0.16b, v1.16b, #9
+         srsra v0.8h, v1.8h, #17
+         srsra v0.4s, v1.4s, #33
+         srsra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srsra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srsra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srsra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srsra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8h, #3
+         ursra v0.4h, v1.4s, #3
+         ursra v0.2s, v1.2d, #3
+         ursra v0.16b, v1.16b, #9
+         ursra v0.8h, v1.8h, #17
+         ursra v0.4s, v1.4s, #33
+         ursra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ursra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ursra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ursra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ursra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8h, #3
+         sri v0.4h, v1.4s, #3
+         sri v0.2s, v1.2d, #3
+         sri v0.16b, v1.16b, #9
+         sri v0.8h, v1.8h, #17
+         sri v0.4s, v1.4s, #33
+         sri v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sri v0.16b, v1.16b, #9
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sri v0.8h, v1.8h, #17
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sri v0.4s, v1.4s, #33
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sri v0.2d, v1.2d, #65
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8h, #3
+         sli v0.4h, v1.4s, #3
+         sli v0.2s, v1.2d, #3
+         sli v0.16b, v1.16b, #8
+         sli v0.8h, v1.8h, #16
+         sli v0.4s, v1.4s, #32
+         sli v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sli v0.16b, v1.16b, #8
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sli v0.8h, v1.8h, #16
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sli v0.4s, v1.4s, #32
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sli v0.2d, v1.2d, #64
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8h, #3
+         sqshlu v0.4h, v1.4s, #3
+         sqshlu v0.2s, v1.2d, #3
+         sqshlu v0.16b, v1.16b, #8
+         sqshlu v0.8h, v1.8h, #16
+         sqshlu v0.4s, v1.4s, #32
+         sqshlu v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.8b, v1.8h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.4h, v1.4s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshlu v0.16b, v1.16b, #8
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshlu v0.8h, v1.8h, #16
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshlu v0.4s, v1.4s, #32
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshlu v0.2d, v1.2d, #64
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8h, #3
+         sqshl v0.4h, v1.4s, #3
+         sqshl v0.2s, v1.2d, #3
+         sqshl v0.16b, v1.16b, #8
+         sqshl v0.8h, v1.8h, #16
+         sqshl v0.4s, v1.4s, #32
+         sqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8h, #3
+         uqshl v0.4h, v1.4s, #3
+         uqshl v0.2s, v1.2d, #3
+         uqshl v0.16b, v1.16b, #8
+         uqshl v0.8h, v1.8h, #16
+         uqshl v0.4s, v1.4s, #32
+         uqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         uqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         uqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         uqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         uqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8b, #3
+         shrn v0.4h, v1.4h, #3
+         shrn v0.2s, v1.2s, #3
+         shrn2 v0.16b, v1.8h, #17
+         shrn2 v0.8h, v1.4s, #33
+         shrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         shrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         shrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         shrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8b, #3
+         sqshrun v0.4h, v1.4h, #3
+         sqshrun v0.2s, v1.2s, #3
+         sqshrun2 v0.16b, v1.8h, #17
+         sqshrun2 v0.8h, v1.4s, #33
+         sqshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8b, #3
+         rshrn v0.4h, v1.4h, #3
+         rshrn v0.2s, v1.2s, #3
+         rshrn2 v0.16b, v1.8h, #17
+         rshrn2 v0.8h, v1.4s, #33
+         rshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         rshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         rshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         rshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8b, #3
+         sqrshrun v0.4h, v1.4h, #3
+         sqrshrun v0.2s, v1.2s, #3
+         sqrshrun2 v0.16b, v1.8h, #17
+         sqrshrun2 v0.8h, v1.4s, #33
+         sqrshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                 ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8b, #3
+         sqshrn v0.4h, v1.4h, #3
+         sqshrn v0.2s, v1.2s, #3
+         sqshrn2 v0.16b, v1.8h, #17
+         sqshrn2 v0.8h, v1.4s, #33
+         sqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8b, #3
+         uqshrn v0.4h, v1.4h, #3
+         uqshrn v0.2s, v1.2s, #3
+         uqshrn2 v0.16b, v1.8h, #17
+         uqshrn2 v0.8h, v1.4s, #33
+         uqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8b, #3
+         sqrshrn v0.4h, v1.4h, #3
+         sqrshrn v0.2s, v1.2s, #3
+         sqrshrn2 v0.16b, v1.8h, #17
+         sqrshrn2 v0.8h, v1.4s, #33
+         sqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8b, #3
+         uqrshrn v0.4h, v1.4h, #3
+         uqrshrn v0.2s, v1.2s, #3
+         uqrshrn2 v0.16b, v1.8h, #17
+         uqrshrn2 v0.8h, v1.4s, #33
+         uqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2d, #3
+         scvtf v0.4s, v1.4h, #3
+         scvtf v0.2d, v1.2s, #3
+         ucvtf v0.2s, v1.2s, #33
+         ucvtf v0.4s, v1.4s, #33
+         ucvtf v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.4s, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2d, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.2s, v1.2s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ucvtf v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2d, #3
+         fcvtzs v0.4s, v1.4h, #3
+         fcvtzs v0.2d, v1.2s, #3
+         fcvtzu v0.2s, v1.2s, #33
+         fcvtzu v0.4s, v1.4s, #33
+         fcvtzu v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.4s, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2d, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.2s, v1.2s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.4s, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         fcvtzu v0.2d, v1.2d, #65
+// CHECK-ERROR:                              ^
+
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
new file mode 100644
index 00000000000..9e6e1aaf86c
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -0,0 +1,434 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8b, #3
+         sshr v0.4h, v1.4h, #3
+         sshr v0.2s, v1.2s, #3
+         sshr v0.16b, v1.16b, #3
+         sshr v0.8h, v1.8h, #3
+         sshr v0.4s, v1.4s, #3
+         sshr v0.2d, v1.2d, #3
+// CHECK:	sshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x0f]
+// CHECK:	sshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x0f]
+// CHECK:	sshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x0f]
+// CHECK:	sshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x4f]
+// CHECK:	sshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x4f]
+// CHECK:	sshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x4f]
+// CHECK:	sshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8b, #3
+         ushr v0.4h, v1.4h, #3
+         ushr v0.2s, v1.2s, #3
+         ushr v0.16b, v1.16b, #3
+         ushr v0.8h, v1.8h, #3
+         ushr v0.4s, v1.4s, #3
+         ushr v0.2d, v1.2d, #3
+
+// CHECK: 	ushr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x2f]
+// CHECK: 	ushr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x2f]
+// CHECK:	ushr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x2f]
+// CHECK:	ushr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x6f]
+// CHECK:	ushr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x6f]
+// CHECK:	ushr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x6f]
+// CHECK:	ushr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8b, #3
+         ssra v0.4h, v1.4h, #3
+         ssra v0.2s, v1.2s, #3
+         ssra v0.16b, v1.16b, #3
+         ssra v0.8h, v1.8h, #3
+         ssra v0.4s, v1.4s, #3
+         ssra v0.2d, v1.2d, #3
+
+// CHECK:	ssra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x0f]
+// CHECK:	ssra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x0f]
+// CHECK:	ssra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x0f]
+// CHECK:	ssra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x4f]
+// CHECK:	ssra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x4f]
+// CHECK:	ssra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x4f]
+// CHECK:	ssra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8b, #3
+         usra v0.4h, v1.4h, #3
+         usra v0.2s, v1.2s, #3
+         usra v0.16b, v1.16b, #3
+         usra v0.8h, v1.8h, #3
+         usra v0.4s, v1.4s, #3
+         usra v0.2d, v1.2d, #3
+
+// CHECK:	usra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x2f]
+// CHECK:	usra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x2f]
+// CHECK:	usra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x2f]
+// CHECK:	usra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x6f]
+// CHECK:	usra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x6f]
+// CHECK:	usra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x6f]
+// CHECK:	usra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8b, #3
+         srshr v0.4h, v1.4h, #3
+         srshr v0.2s, v1.2s, #3
+         srshr v0.16b, v1.16b, #3
+         srshr v0.8h, v1.8h, #3
+         srshr v0.4s, v1.4s, #3
+         srshr v0.2d, v1.2d, #3
+
+// CHECK:	srshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x0f]
+// CHECK:	srshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x0f]
+// CHECK:	srshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x0f]
+// CHECK:	srshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x4f]
+// CHECK:	srshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x4f]
+// CHECK:	srshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x4f]
+// CHECK:	srshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8b, #3
+         urshr v0.4h, v1.4h, #3
+         urshr v0.2s, v1.2s, #3
+         urshr v0.16b, v1.16b, #3
+         urshr v0.8h, v1.8h, #3
+         urshr v0.4s, v1.4s, #3
+         urshr v0.2d, v1.2d, #3
+
+// CHECK:	urshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x2f]
+// CHECK:	urshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x2f]
+// CHECK:	urshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x2f]
+// CHECK:	urshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x6f]
+// CHECK:	urshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x6f]
+// CHECK:	urshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x6f]
+// CHECK:	urshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8b, #3
+         srsra v0.4h, v1.4h, #3
+         srsra v0.2s, v1.2s, #3
+         srsra v0.16b, v1.16b, #3
+         srsra v0.8h, v1.8h, #3
+         srsra v0.4s, v1.4s, #3
+         srsra v0.2d, v1.2d, #3
+
+// CHECK:	srsra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x0f]
+// CHECK:	srsra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x0f]
+// CHECK:	srsra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x0f]
+// CHECK:	srsra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x4f]
+// CHECK:	srsra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x4f]
+// CHECK:	srsra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x4f]
+// CHECK:	srsra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8b, #3
+         ursra v0.4h, v1.4h, #3
+         ursra v0.2s, v1.2s, #3
+         ursra v0.16b, v1.16b, #3
+         ursra v0.8h, v1.8h, #3
+         ursra v0.4s, v1.4s, #3
+         ursra v0.2d, v1.2d, #3
+
+// CHECK:	ursra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x2f]
+// CHECK:	ursra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x2f]
+// CHECK:	ursra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x2f]
+// CHECK:	ursra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x6f]
+// CHECK:	ursra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x6f]
+// CHECK:	ursra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x6f]
+// CHECK:	ursra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8b, #3
+         sri v0.4h, v1.4h, #3
+         sri v0.2s, v1.2s, #3
+         sri v0.16b, v1.16b, #3
+         sri v0.8h, v1.8h, #3
+         sri v0.4s, v1.4s, #3
+         sri v0.2d, v1.2d, #3
+
+// CHECK:	sri	v0.8b, v1.8b, #3        // encoding: [0x20,0x44,0x0d,0x2f]
+// CHECK:	sri	v0.4h, v1.4h, #3        // encoding: [0x20,0x44,0x1d,0x2f]
+// CHECK:	sri	v0.2s, v1.2s, #3        // encoding: [0x20,0x44,0x3d,0x2f]
+// CHECK:	sri	v0.16b, v1.16b, #3      // encoding: [0x20,0x44,0x0d,0x6f]
+// CHECK:	sri	v0.8h, v1.8h, #3        // encoding: [0x20,0x44,0x1d,0x6f]
+// CHECK:	sri	v0.4s, v1.4s, #3        // encoding: [0x20,0x44,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8b, #3
+         sli v0.4h, v1.4h, #3
+         sli v0.2s, v1.2s, #3
+         sli v0.16b, v1.16b, #3
+         sli v0.8h, v1.8h, #3
+         sli v0.4s, v1.4s, #3
+         sli v0.2d, v1.2d, #3
+
+// CHECK:	sli	v0.8b, v1.8b, #3        // encoding: [0x20,0x54,0x0b,0x2f]
+// CHECK:	sli	v0.4h, v1.4h, #3        // encoding: [0x20,0x54,0x13,0x2f]
+// CHECK:	sli	v0.2s, v1.2s, #3        // encoding: [0x20,0x54,0x23,0x2f]
+// CHECK:	sli	v0.16b, v1.16b, #3      // encoding: [0x20,0x54,0x0b,0x6f]
+// CHECK:	sli	v0.8h, v1.8h, #3        // encoding: [0x20,0x54,0x13,0x6f]
+// CHECK:	sli	v0.4s, v1.4s, #3        // encoding: [0x20,0x54,0x23,0x6f]
+// CHECK:	sli	v0.2d, v1.2d, #3        // encoding: [0x20,0x54,0x43,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8b, #3
+         sqshlu v0.4h, v1.4h, #3
+         sqshlu v0.2s, v1.2s, #3
+         sqshlu v0.16b, v1.16b, #3
+         sqshlu v0.8h, v1.8h, #3
+         sqshlu v0.4s, v1.4s, #3
+         sqshlu v0.2d, v1.2d, #3
+
+// CHECK:	sqshlu	v0.8b, v1.8b, #3        // encoding: [0x20,0x64,0x0b,0x2f]
+// CHECK:	sqshlu	v0.4h, v1.4h, #3        // encoding: [0x20,0x64,0x13,0x2f]
+// CHECK:	sqshlu	v0.2s, v1.2s, #3        // encoding: [0x20,0x64,0x23,0x2f]
+// CHECK:	sqshlu	v0.16b, v1.16b, #3      // encoding: [0x20,0x64,0x0b,0x6f]
+// CHECK:	sqshlu	v0.8h, v1.8h, #3        // encoding: [0x20,0x64,0x13,0x6f]
+// CHECK:	sqshlu	v0.4s, v1.4s, #3        // encoding: [0x20,0x64,0x23,0x6f]
+// CHECK:	sqshlu	v0.2d, v1.2d, #3        // encoding: [0x20,0x64,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8b, #3
+         sqshl v0.4h, v1.4h, #3
+         sqshl v0.2s, v1.2s, #3
+         sqshl v0.16b, v1.16b, #3
+         sqshl v0.8h, v1.8h, #3
+         sqshl v0.4s, v1.4s, #3
+         sqshl v0.2d, v1.2d, #3
+
+// CHECK:	sqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x0f]
+// CHECK:	sqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x0f]
+// CHECK:	sqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x0f]
+// CHECK:	sqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x4f]
+// CHECK:	sqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x4f]
+// CHECK:	sqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x4f]
+// CHECK:	sqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x4f]
+
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8b, #3
+         uqshl v0.4h, v1.4h, #3
+         uqshl v0.2s, v1.2s, #3
+         uqshl v0.16b, v1.16b, #3
+         uqshl v0.8h, v1.8h, #3
+         uqshl v0.4s, v1.4s, #3
+         uqshl v0.2d, v1.2d, #3
+
+// CHECK:	uqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x2f]
+// CHECK:	uqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x2f]
+// CHECK:	uqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x2f]
+// CHECK:	uqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x6f]
+// CHECK:	uqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x6f]
+// CHECK:	uqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x6f]
+// CHECK:	uqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8h, #3
+         shrn v0.4h, v1.4s, #3
+         shrn v0.2s, v1.2d, #3
+         shrn2 v0.16b, v1.8h, #3
+         shrn2 v0.8h, v1.4s, #3
+         shrn2 v0.4s, v1.2d, #3
+
+// CHECK:	shrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x0f]
+// CHECK:	shrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x0f]
+// CHECK:	shrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x0f]
+// CHECK:	shrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x84,0x0d,0x4f]
+// CHECK:	shrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x4f]
+// CHECK:	shrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8h, #3
+         sqshrun v0.4h, v1.4s, #3
+         sqshrun v0.2s, v1.2d, #3
+         sqshrun2 v0.16b, v1.8h, #3
+         sqshrun2 v0.8h, v1.4s, #3
+         sqshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrun	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x2f]
+// CHECK:	sqshrun	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x2f]
+// CHECK:	sqshrun	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x2f]
+// CHECK:	sqshrun2	v0.16b, v1.8h, #3 	// encoding: [0x20,0x84,0x0d,0x6f]
+// CHECK:	sqshrun2	v0.8h, v1.4s, #3 	// encoding: [0x20,0x84,0x1d,0x6f]
+// CHECK:	sqshrun2	v0.4s, v1.2d, #3 	// encoding: [0x20,0x84,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8h, #3
+         rshrn v0.4h, v1.4s, #3
+         rshrn v0.2s, v1.2d, #3
+         rshrn2 v0.16b, v1.8h, #3
+         rshrn2 v0.8h, v1.4s, #3
+         rshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	rshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x8c,0x0d,0x0f]
+// CHECK:	rshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x0f]
+// CHECK:	rshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x0f]
+// CHECK:	rshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x8c,0x0d,0x4f]
+// CHECK:	rshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x4f]
+// CHECK:	rshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8h, #3
+         sqrshrun v0.4h, v1.4s, #3
+         sqrshrun v0.2s, v1.2d, #3
+         sqrshrun2 v0.16b, v1.8h, #3
+         sqrshrun2 v0.8h, v1.4s, #3
+         sqrshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrun	v0.8b, v1.8h, #3    // encoding: [0x20,0x8c,0x0d,0x2f]
+// CHECK:	sqrshrun	v0.4h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x2f]
+// CHECK:	sqrshrun	v0.2s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x2f]
+// CHECK:	sqrshrun2	v0.16b, v1.8h, #3   // encoding: [0x20,0x8c,0x0d,0x6f]
+// CHECK:	sqrshrun2	v0.8h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x6f]
+// CHECK:	sqrshrun2	v0.4s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8h, #3
+         sqshrn v0.4h, v1.4s, #3
+         sqshrn v0.2s, v1.2d, #3
+         sqshrn2 v0.16b, v1.8h, #3
+         sqshrn2 v0.8h, v1.4s, #3
+         sqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x0f]
+// CHECK:	sqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x0f]
+// CHECK:	sqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x0f]
+// CHECK:	sqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x4f]
+// CHECK:	sqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x4f]
+// CHECK:	sqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8h, #3
+         uqshrn v0.4h, v1.4s, #3
+         uqshrn v0.2s, v1.2d, #3
+         uqshrn2 v0.16b, v1.8h, #3
+         uqshrn2 v0.8h, v1.4s, #3
+         uqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x2f]
+// CHECK:	uqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x2f]
+// CHECK:	uqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x2f]
+// CHECK:	uqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x6f]
+// CHECK:	uqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x6f]
+// CHECK:	uqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8h, #3
+         sqrshrn v0.4h, v1.4s, #3
+         sqrshrn v0.2s, v1.2d, #3
+         sqrshrn2 v0.16b, v1.8h, #3
+         sqrshrn2 v0.8h, v1.4s, #3
+         sqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x0f]
+// CHECK:	sqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x0f]
+// CHECK:	sqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x0f]
+// CHECK:	sqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x4f]
+// CHECK:	sqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x4f]
+// CHECK:	sqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8h, #3
+         uqrshrn v0.4h, v1.4s, #3
+         uqrshrn v0.2s, v1.2d, #3
+         uqrshrn2 v0.16b, v1.8h, #3
+         uqrshrn2 v0.8h, v1.4s, #3
+         uqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x2f]
+// CHECK:	uqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x2f]
+// CHECK:	uqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x2f]
+// CHECK:	uqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x6f]
+// CHECK:	uqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x6f]
+// CHECK:	uqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2s, #3
+         scvtf v0.4s, v1.4s, #3
+         scvtf v0.2d, v1.2d, #3
+         ucvtf v0.2s, v1.2s, #3
+         ucvtf v0.4s, v1.4s, #3
+         ucvtf v0.2d, v1.2d, #3
+
+// CHECK:	scvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x0f]
+// CHECK:	scvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x4f]
+// CHECK:	scvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK:	ucvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x2f]
+// CHECK:	ucvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x6f]
+// CHECK:	ucvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2s, #3
+         fcvtzs v0.4s, v1.4s, #3
+         fcvtzs v0.2d, v1.2d, #3
+         fcvtzu v0.2s, v1.2s, #3
+         fcvtzu v0.4s, v1.4s, #3
+         fcvtzu v0.2d, v1.2d, #3
+
+
+// CHECK:	fcvtzs	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x0f]
+// CHECK:	fcvtzs	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x4f]
+// CHECK:	fcvtzs	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK:	fcvtzu	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x2f]
+// CHECK:	fcvtzu	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x6f]
+// CHECK:	fcvtzu	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x6f]
+
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index e599abaacd0..a7029b21408 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -694,3 +694,398 @@
 0x20 0xa4 0x13 0x4f
 0x20 0xa4 0x13 0x2f
 0x20 0xa4 0x0b 0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sshr v0.8b, v1.8b, #3
+# CHECK: sshr v0.4h, v1.4h, #3
+# CHECK: sshr v0.2s, v1.2s, #3
+# CHECK: sshr v0.16b, v1.16b, #3
+# CHECK: sshr v0.8h, v1.8h, #3
+# CHECK: sshr v0.4s, v1.4s, #3
+# CHECK: sshr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x0f
+0x20,0x04,0x1d,0x0f
+0x20,0x04,0x3d,0x0f
+0x20,0x04,0x0d,0x4f
+0x20,0x04,0x1d,0x4f
+0x20,0x04,0x3d,0x4f
+0x20,0x04,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ushr v0.8b, v1.8b, #3
+# CHECK: ushr v0.4h, v1.4h, #3
+# CHECK: ushr v0.2s, v1.2s, #3
+# CHECK: ushr v0.16b, v1.16b, #3
+# CHECK: ushr v0.8h, v1.8h, #3
+# CHECK: ushr v0.4s, v1.4s, #3
+# CHECK: ushr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x2f
+0x20,0x04,0x1d,0x2f
+0x20,0x04,0x3d,0x2f
+0x20,0x04,0x0d,0x6f
+0x20,0x04,0x1d,0x6f
+0x20,0x04,0x3d,0x6f
+0x20,0x04,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: ssra v0.8b, v1.8b, #3
+# CHECK: ssra v0.4h, v1.4h, #3
+# CHECK: ssra v0.2s, v1.2s, #3
+# CHECK: ssra v0.16b, v1.16b, #3
+# CHECK: ssra v0.8h, v1.8h, #3
+# CHECK: ssra v0.4s, v1.4s, #3
+# CHECK: ssra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x0f
+0x20,0x14,0x1d,0x0f
+0x20,0x14,0x3d,0x0f
+0x20,0x14,0x0d,0x4f
+0x20,0x14,0x1d,0x4f
+0x20,0x14,0x3d,0x4f
+0x20,0x14,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: usra v0.8b, v1.8b, #3
+# CHECK: usra v0.4h, v1.4h, #3
+# CHECK: usra v0.2s, v1.2s, #3
+# CHECK: usra v0.16b, v1.16b, #3
+# CHECK: usra v0.8h, v1.8h, #3
+# CHECK: usra v0.4s, v1.4s, #3
+# CHECK: usra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x2f
+0x20,0x14,0x1d,0x2f
+0x20,0x14,0x3d,0x2f
+0x20,0x14,0x0d,0x6f
+0x20,0x14,0x1d,0x6f
+0x20,0x14,0x3d,0x6f
+0x20,0x14,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srshr v0.8b, v1.8b, #3
+# CHECK: srshr v0.4h, v1.4h, #3
+# CHECK: srshr v0.2s, v1.2s, #3
+# CHECK: srshr v0.16b, v1.16b, #3
+# CHECK: srshr v0.8h, v1.8h, #3
+# CHECK: srshr v0.4s, v1.4s, #3
+# CHECK: srshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x0f
+0x20,0x24,0x1d,0x0f
+0x20,0x24,0x3d,0x0f
+0x20,0x24,0x0d,0x4f
+0x20,0x24,0x1d,0x4f
+0x20,0x24,0x3d,0x4f
+0x20,0x24,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: urshr v0.8b, v1.8b, #3
+# CHECK: urshr v0.4h, v1.4h, #3
+# CHECK: urshr v0.2s, v1.2s, #3
+# CHECK: urshr v0.16b, v1.16b, #3
+# CHECK: urshr v0.8h, v1.8h, #3
+# CHECK: urshr v0.4s, v1.4s, #3
+# CHECK: urshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x2f
+0x20,0x24,0x1d,0x2f
+0x20,0x24,0x3d,0x2f
+0x20,0x24,0x0d,0x6f
+0x20,0x24,0x1d,0x6f
+0x20,0x24,0x3d,0x6f
+0x20,0x24,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srsra v0.8b, v1.8b, #3
+# CHECK: srsra v0.4h, v1.4h, #3
+# CHECK: srsra v0.2s, v1.2s, #3
+# CHECK: srsra v0.16b, v1.16b, #3
+# CHECK: srsra v0.8h, v1.8h, #3
+# CHECK: srsra v0.4s, v1.4s, #3
+# CHECK: srsra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x0f
+0x20,0x34,0x1d,0x0f
+0x20,0x34,0x3d,0x0f
+0x20,0x34,0x0d,0x4f
+0x20,0x34,0x1d,0x4f
+0x20,0x34,0x3d,0x4f
+0x20,0x34,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ursra v0.8b, v1.8b, #3
+# CHECK: ursra v0.4h, v1.4h, #3
+# CHECK: ursra v0.2s, v1.2s, #3
+# CHECK: ursra v0.16b, v1.16b, #3
+# CHECK: ursra v0.8h, v1.8h, #3
+# CHECK: ursra v0.4s, v1.4s, #3
+# CHECK: ursra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x2f
+0x20,0x34,0x1d,0x2f
+0x20,0x34,0x3d,0x2f
+0x20,0x34,0x0d,0x6f
+0x20,0x34,0x1d,0x6f
+0x20,0x34,0x3d,0x6f
+0x20,0x34,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and insert
+#-----------------------------------------------------------------------------
+# CHECK: sri v0.8b, v1.8b, #3
+# CHECK: sri v0.4h, v1.4h, #3
+# CHECK: sri v0.2s, v1.2s, #3
+# CHECK: sri v0.16b, v1.16b, #3
+# CHECK: sri v0.8h, v1.8h, #3
+# CHECK: sri v0.4s, v1.4s, #3
+# CHECK: sri v0.2d, v1.2d, #3
+0x20,0x44,0x0d,0x2f
+0x20,0x44,0x1d,0x2f
+0x20,0x44,0x3d,0x2f
+0x20,0x44,0x0d,0x6f
+0x20,0x44,0x1d,0x6f
+0x20,0x44,0x3d,0x6f
+0x20,0x44,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift left and insert
+#-----------------------------------------------------------------------------
+# CHECK: sli v0.8b, v1.8b, #3
+# CHECK: sli v0.4h, v1.4h, #3
+# CHECK: sli v0.2s, v1.2s, #3
+# CHECK: sli v0.16b, v1.16b, #3
+# CHECK: sli v0.8h, v1.8h, #3
+# CHECK: sli v0.4s, v1.4s, #3
+# CHECK: sli v0.2d, v1.2d, #3
+0x20,0x54,0x0b,0x2f
+0x20,0x54,0x13,0x2f
+0x20,0x54,0x23,0x2f
+0x20,0x54,0x0b,0x6f
+0x20,0x54,0x13,0x6f
+0x20,0x54,0x23,0x6f
+0x20,0x54,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left unsigned
+#-----------------------------------------------------------------------------
+# CHECK: sqshlu v0.8b, v1.8b, #3
+# CHECK: sqshlu v0.4h, v1.4h, #3
+# CHECK: sqshlu v0.2s, v1.2s, #3
+# CHECK: sqshlu v0.16b, v1.16b, #3
+# CHECK: sqshlu v0.8h, v1.8h, #3
+# CHECK: sqshlu v0.4s, v1.4s, #3
+# CHECK: sqshlu v0.2d, v1.2d, #3
+0x20,0x64,0x0b,0x2f
+0x20,0x64,0x13,0x2f
+0x20,0x64,0x23,0x2f
+0x20,0x64,0x0b,0x6f
+0x20,0x64,0x13,0x6f
+0x20,0x64,0x23,0x6f
+0x20,0x64,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshl v0.8b, v1.8b, #3
+# CHECK: sqshl v0.4h, v1.4h, #3
+# CHECK: sqshl v0.2s, v1.2s, #3
+# CHECK: sqshl v0.16b, v1.16b, #3
+# CHECK: sqshl v0.8h, v1.8h, #3
+# CHECK: sqshl v0.4s, v1.4s, #3
+# CHECK: sqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x0f
+0x20,0x74,0x13,0x0f
+0x20,0x74,0x23,0x0f
+0x20,0x74,0x0b,0x4f
+0x20,0x74,0x13,0x4f
+0x20,0x74,0x23,0x4f
+0x20,0x74,0x43,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshl v0.8b, v1.8b, #3
+# CHECK: uqshl v0.4h, v1.4h, #3
+# CHECK: uqshl v0.2s, v1.2s, #3
+# CHECK: uqshl v0.16b, v1.16b, #3
+# CHECK: uqshl v0.8h, v1.8h, #3
+# CHECK: uqshl v0.4s, v1.4s, #3
+# CHECK: uqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x2f
+0x20,0x74,0x13,0x2f
+0x20,0x74,0x23,0x2f
+0x20,0x74,0x0b,0x6f
+0x20,0x74,0x13,0x6f
+0x20,0x74,0x23,0x6f
+0x20,0x74,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: shrn v0.8b, v1.8h, #3
+# CHECK: shrn v0.4h, v1.4s, #3
+# CHECK: shrn v0.2s, v1.2d, #3
+# CHECK: shrn2 v0.16b, v1.8h, #3
+# CHECK: shrn2 v0.8h, v1.4s, #3
+# CHECK: shrn2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x0f
+0x20,0x84,0x1d,0x0f
+0x20,0x84,0x3d,0x0f
+0x20,0x84,0x0d,0x4f
+0x20,0x84,0x1d,0x4f
+0x20,0x84,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrun v0.8b, v1.8h, #3
+# CHECK: sqshrun v0.4h, v1.4s, #3
+# CHECK: sqshrun v0.2s, v1.2d, #3
+# CHECK: sqshrun2 v0.16b, v1.8h, #3
+# CHECK: sqshrun2 v0.8h, v1.4s, #3
+# CHECK: sqshrun2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x2f
+0x20,0x84,0x1d,0x2f
+0x20,0x84,0x3d,0x2f
+0x20,0x84,0x0d,0x6f
+0x20,0x84,0x1d,0x6f
+0x20,0x84,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: rshrn v0.8b, v1.8h, #3
+# CHECK: rshrn v0.4h, v1.4s, #3
+# CHECK: rshrn v0.2s, v1.2d, #3
+# CHECK: rshrn2 v0.16b, v1.8h, #3
+# CHECK: rshrn2 v0.8h, v1.4s, #3
+# CHECK: rshrn2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x0f
+0x20,0x8c,0x1d,0x0f
+0x20,0x8c,0x3d,0x0f
+0x20,0x8c,0x0d,0x4f
+0x20,0x8c,0x1d,0x4f
+0x20,0x8c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrun v0.8b, v1.8h, #3
+# CHECK: sqrshrun v0.4h, v1.4s, #3
+# CHECK: sqrshrun v0.2s, v1.2d, #3
+# CHECK: sqrshrun2 v0.16b, v1.8h, #3
+# CHECK: sqrshrun2 v0.8h, v1.4s, #3
+# CHECK: sqrshrun2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x2f
+0x20,0x8c,0x1d,0x2f
+0x20,0x8c,0x3d,0x2f
+0x20,0x8c,0x0d,0x6f
+0x20,0x8c,0x1d,0x6f
+0x20,0x8c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrn v0.8b, v1.8h, #3
+# CHECK: sqshrn v0.4h, v1.4s, #3
+# CHECK: sqshrn v0.2s, v1.2d, #3
+# CHECK: sqshrn2 v0.16b, v1.8h, #3
+# CHECK: sqshrn2 v0.8h, v1.4s, #3
+# CHECK: sqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x0f
+0x20,0x94,0x1d,0x0f
+0x20,0x94,0x3d,0x0f
+0x20,0x94,0x0d,0x4f
+0x20,0x94,0x1d,0x4f
+0x20,0x94,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshrn v0.8b, v1.8h, #3
+# CHECK: uqshrn v0.4h, v1.4s, #3
+# CHECK: uqshrn v0.2s, v1.2d, #3
+# CHECK: uqshrn2 v0.16b, v1.8h, #3
+# CHECK: uqshrn2 v0.8h, v1.4s, #3
+# CHECK: uqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x2f
+0x20,0x94,0x1d,0x2f
+0x20,0x94,0x3d,0x2f
+0x20,0x94,0x0d,0x6f
+0x20,0x94,0x1d,0x6f
+0x20,0x94,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrn v0.8b, v1.8h, #3
+# CHECK: sqrshrn v0.4h, v1.4s, #3
+# CHECK: sqrshrn v0.2s, v1.2d, #3
+# CHECK: sqrshrn2 v0.16b, v1.8h, #3
+# CHECK: sqrshrn2 v0.8h, v1.4s, #3
+# CHECK: sqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x0f
+0x20,0x9c,0x1d,0x0f
+0x20,0x9c,0x3d,0x0f
+0x20,0x9c,0x0d,0x4f
+0x20,0x9c,0x1d,0x4f
+0x20,0x9c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqrshrn v0.8b, v1.8h, #3
+# CHECK: uqrshrn v0.4h, v1.4s, #3
+# CHECK: uqrshrn v0.2s, v1.2d, #3
+# CHECK: uqrshrn2 v0.16b, v1.8h, #3
+# CHECK: uqrshrn2 v0.8h, v1.4s, #3
+# CHECK: uqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x2f
+0x20,0x9c,0x1d,0x2f
+0x20,0x9c,0x3d,0x2f
+0x20,0x9c,0x0d,0x6f
+0x20,0x9c,0x1d,0x6f
+0x20,0x9c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Fixed-point convert to floating-point
+#-----------------------------------------------------------------------------
+# CHECK: scvtf v0.2s, v1.2s, #3
+# CHECK: scvtf v0.4s, v1.4s, #3
+# CHECK: scvtf v0.2d, v1.2d, #3
+# CHECK: ucvtf v0.2s, v1.2s, #3
+# CHECK: ucvtf v0.4s, v1.4s, #3
+# CHECK: ucvtf v0.2d, v1.2d, #3
+
+0x20,0xe4,0x3d,0x0f
+0x20,0xe4,0x3d,0x4f
+0x20,0xe4,0x7d,0x4f
+0x20,0xe4,0x3d,0x2f
+0x20,0xe4,0x3d,0x6f
+0x20,0xe4,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Floating-point convert to fixed-point
+#-----------------------------------------------------------------------------
+# CHECK: fcvtzs v0.2s, v1.2s, #3
+# CHECK: fcvtzs v0.4s, v1.4s, #3
+# CHECK: fcvtzs v0.2d, v1.2d, #3
+# CHECK: fcvtzu v0.2s, v1.2s, #3
+# CHECK: fcvtzu v0.4s, v1.4s, #3
+# CHECK: fcvtzu v0.2d, v1.2d, #3
+0x20,0xfc,0x3d,0x0f
+0x20,0xfc,0x3d,0x4f
+0x20,0xfc,0x7d,0x4f
+0x20,0xfc,0x3d,0x2f
+0x20,0xfc,0x3d,0x6f
+0x20,0xfc,0x7d,0x6f