test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
   2
   3 ; arm64 already has these. Essentially just a copy/paste from Clang output from
   4 ; arm_neon.h
   5
   6 define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
   7 ; CHECK-LABEL: test_ldst1_v16i8:
   8 ; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
   9 ; CHECK: st1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
  10   %tmp = load <16 x i8>* %ptr
  11   store <16 x i8> %tmp, <16 x i8>* %ptr2
  12   ret void
  13 }
  14
  15 define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
  16 ; CHECK-LABEL: test_ldst1_v8i16:
  17 ; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
  18 ; CHECK: st1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
  19   %tmp = load <8 x i16>* %ptr
  20   store <8 x i16> %tmp, <8 x i16>* %ptr2
  21   ret void
  22 }
  23
  24 define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
  25 ; CHECK-LABEL: test_ldst1_v4i32:
  26 ; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
  27 ; CHECK: st1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
  28   %tmp = load <4 x i32>* %ptr
  29   store <4 x i32> %tmp, <4 x i32>* %ptr2
  30   ret void
  31 }
  32
  33 define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
  34 ; CHECK-LABEL: test_ldst1_v2i64:
  35 ; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
  36 ; CHECK: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
  37   %tmp = load <2 x i64>* %ptr
  38   store <2 x i64> %tmp, <2 x i64>* %ptr2
  39   ret void
  40 }
  41
  42 define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
  43 ; CHECK-LABEL: test_ldst1_v8i8:
  44 ; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
  45 ; CHECK: st1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
  46   %tmp = load <8 x i8>* %ptr
  47   store <8 x i8> %tmp, <8 x i8>* %ptr2
  48   ret void
  49 }
  50
  51 define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
  52 ; CHECK-LABEL: test_ldst1_v4i16:
  53 ; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
  54 ; CHECK: st1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
  55   %tmp = load <4 x i16>* %ptr
  56   store <4 x i16> %tmp, <4 x i16>* %ptr2
  57   ret void
  58 }
  59
  60 define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
  61 ; CHECK-LABEL: test_ldst1_v2i32:
  62 ; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
  63 ; CHECK: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
  64   %tmp = load <2 x i32>* %ptr
  65   store <2 x i32> %tmp, <2 x i32>* %ptr2
  66   ret void
  67 }
  68
  69 define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
  70 ; CHECK-LABEL: test_ldst1_v1i64:
  71 ; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
  72 ; CHECK: st1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
  73   %tmp = load <1 x i64>* %ptr
  74   store <1 x i64> %tmp, <1 x i64>* %ptr2
  75   ret void
  76 }
  77
  78 %struct.int8x16x2_t = type { [2 x <16 x i8>] }
  79 %struct.int16x8x2_t = type { [2 x <8 x i16>] }
  80 %struct.int32x4x2_t = type { [2 x <4 x i32>] }
  81 %struct.int64x2x2_t = type { [2 x <2 x i64>] }
  82 %struct.float32x4x2_t = type { [2 x <4 x float>] }
  83 %struct.float64x2x2_t = type { [2 x <2 x double>] }
  84 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
  85 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
  86 %struct.int32x2x2_t = type { [2 x <2 x i32>] }
  87 %struct.int64x1x2_t = type { [2 x <1 x i64>] }
  88 %struct.float32x2x2_t = type { [2 x <2 x float>] }
  89 %struct.float64x1x2_t = type { [2 x <1 x double>] }
  90 %struct.int8x16x3_t = type { [3 x <16 x i8>] }
  91 %struct.int16x8x3_t = type { [3 x <8 x i16>] }
  92 %struct.int32x4x3_t = type { [3 x <4 x i32>] }
  93 %struct.int64x2x3_t = type { [3 x <2 x i64>] }
  94 %struct.float32x4x3_t = type { [3 x <4 x float>] }
  95 %struct.float64x2x3_t = type { [3 x <2 x double>] }
  96 %struct.int8x8x3_t = type { [3 x <8 x i8>] }
  97 %struct.int16x4x3_t = type { [3 x <4 x i16>] }
  98 %struct.int32x2x3_t = type { [3 x <2 x i32>] }
  99 %struct.int64x1x3_t = type { [3 x <1 x i64>] }
 100 %struct.float32x2x3_t = type { [3 x <2 x float>] }
 101 %struct.float64x1x3_t = type { [3 x <1 x double>] }
 102 %struct.int8x16x4_t = type { [4 x <16 x i8>] }
 103 %struct.int16x8x4_t = type { [4 x <8 x i16>] }
 104 %struct.int32x4x4_t = type { [4 x <4 x i32>] }
 105 %struct.int64x2x4_t = type { [4 x <2 x i64>] }
 106 %struct.float32x4x4_t = type { [4 x <4 x float>] }
 107 %struct.float64x2x4_t = type { [4 x <2 x double>] }
 108 %struct.int8x8x4_t = type { [4 x <8 x i8>] }
 109 %struct.int16x4x4_t = type { [4 x <4 x i16>] }
 110 %struct.int32x2x4_t = type { [4 x <2 x i32>] }
 111 %struct.int64x1x4_t = type { [4 x <1 x i64>] }
 112 %struct.float32x2x4_t = type { [4 x <2 x float>] }
 113 %struct.float64x1x4_t = type { [4 x <1 x double>] }
 114
 115
 116 define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
 117 ; CHECK-LABEL: test_vld1q_s8
 118 ; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
 119   %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
 120   ret <16 x i8> %vld1
 121 }
 122
 123 define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
 124 ; CHECK-LABEL: test_vld1q_s16
 125 ; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
 126   %1 = bitcast i16* %a to i8*
 127   %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
 128   ret <8 x i16> %vld1
 129 }
 130
 131 define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
 132 ; CHECK-LABEL: test_vld1q_s32
 133 ; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 134   %1 = bitcast i32* %a to i8*
 135   %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
 136   ret <4 x i32> %vld1
 137 }
 138
 139 define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
 140 ; CHECK-LABEL: test_vld1q_s64
 141 ; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 142   %1 = bitcast i64* %a to i8*
 143   %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
 144   ret <2 x i64> %vld1
 145 }
 146
 147 define <4 x float> @test_vld1q_f32(float* readonly %a) {
 148 ; CHECK-LABEL: test_vld1q_f32
 149 ; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 150   %1 = bitcast float* %a to i8*
 151   %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
 152   ret <4 x float> %vld1
 153 }
 154
 155 define <2 x double> @test_vld1q_f64(double* readonly %a) {
 156 ; CHECK-LABEL: test_vld1q_f64
 157 ; CHECK: ld1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 158   %1 = bitcast double* %a to i8*
 159   %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
 160   ret <2 x double> %vld1
 161 }
 162
 163 define <8 x i8> @test_vld1_s8(i8* readonly %a) {
 164 ; CHECK-LABEL: test_vld1_s8
 165 ; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
 166   %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
 167   ret <8 x i8> %vld1
 168 }
 169
 170 define <4 x i16> @test_vld1_s16(i16* readonly %a) {
 171 ; CHECK-LABEL: test_vld1_s16
 172 ; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
 173   %1 = bitcast i16* %a to i8*
 174   %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
 175   ret <4 x i16> %vld1
 176 }
 177
 178 define <2 x i32> @test_vld1_s32(i32* readonly %a) {
 179 ; CHECK-LABEL: test_vld1_s32
 180 ; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 181   %1 = bitcast i32* %a to i8*
 182   %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
 183   ret <2 x i32> %vld1
 184 }
 185
 186 define <1 x i64> @test_vld1_s64(i64* readonly %a) {
 187 ; CHECK-LABEL: test_vld1_s64
 188 ; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 189   %1 = bitcast i64* %a to i8*
 190   %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
 191   ret <1 x i64> %vld1
 192 }
 193
 194 define <2 x float> @test_vld1_f32(float* readonly %a) {
 195 ; CHECK-LABEL: test_vld1_f32
 196 ; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 197   %1 = bitcast float* %a to i8*
 198   %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
 199   ret <2 x float> %vld1
 200 }
 201
 202 define <1 x double> @test_vld1_f64(double* readonly %a) {
 203 ; CHECK-LABEL: test_vld1_f64
 204 ; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 205   %1 = bitcast double* %a to i8*
 206   %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
 207   ret <1 x double> %vld1
 208 }
 209
 210 define <8 x i8> @test_vld1_p8(i8* readonly %a) {
 211 ; CHECK-LABEL: test_vld1_p8
 212 ; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
 213   %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
 214   ret <8 x i8> %vld1
 215 }
 216
 217 define <4 x i16> @test_vld1_p16(i16* readonly %a) {
 218 ; CHECK-LABEL: test_vld1_p16
 219 ; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
 220   %1 = bitcast i16* %a to i8*
 221   %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
 222   ret <4 x i16> %vld1
 223 }
 224
 225 define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
 226 ; CHECK-LABEL: test_vld2q_s8
 227 ; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
 228   %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
 229   %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
 230   %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
 231   %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
 232   %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
 233   ret %struct.int8x16x2_t %.fca.0.1.insert
 234 }
 235
 236 define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
 237 ; CHECK-LABEL: test_vld2q_s16
 238 ; CHECK: ld2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
 239   %1 = bitcast i16* %a to i8*
 240   %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
 241   %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
 242   %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
 243   %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
 244   %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
 245   ret %struct.int16x8x2_t %.fca.0.1.insert
 246 }
 247
 248 define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
 249 ; CHECK-LABEL: test_vld2q_s32
 250 ; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 251   %1 = bitcast i32* %a to i8*
 252   %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
 253   %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
 254   %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
 255   %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
 256   %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
 257   ret %struct.int32x4x2_t %.fca.0.1.insert
 258 }
 259
 260 define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
 261 ; CHECK-LABEL: test_vld2q_s64
 262 ; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 263   %1 = bitcast i64* %a to i8*
 264   %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
 265   %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
 266   %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
 267   %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
 268   %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
 269   ret %struct.int64x2x2_t %.fca.0.1.insert
 270 }
 271
 272 define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
 273 ; CHECK-LABEL: test_vld2q_f32
 274 ; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 275   %1 = bitcast float* %a to i8*
 276   %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
 277   %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
 278   %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
 279   %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
 280   %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
 281   ret %struct.float32x4x2_t %.fca.0.1.insert
 282 }
 283
 284 define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
 285 ; CHECK-LABEL: test_vld2q_f64
 286 ; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 287   %1 = bitcast double* %a to i8*
 288   %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
 289   %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
 290   %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
 291   %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
 292   %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
 293   ret %struct.float64x2x2_t %.fca.0.1.insert
 294 }
 295
 296 define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
 297 ; CHECK-LABEL: test_vld2_s8
 298 ; CHECK: ld2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
 299   %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
 300   %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
 301   %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
 302   %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
 303   %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
 304   ret %struct.int8x8x2_t %.fca.0.1.insert
 305 }
 306
 307 define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
 308 ; CHECK-LABEL: test_vld2_s16
 309 ; CHECK: ld2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
 310   %1 = bitcast i16* %a to i8*
 311   %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
 312   %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
 313   %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
 314   %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
 315   %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
 316   ret %struct.int16x4x2_t %.fca.0.1.insert
 317 }
 318
 319 define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
 320 ; CHECK-LABEL: test_vld2_s32
 321 ; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 322   %1 = bitcast i32* %a to i8*
 323   %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
 324   %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
 325   %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
 326   %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
 327   %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
 328   ret %struct.int32x2x2_t %.fca.0.1.insert
 329 }
 330
 331 define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
 332 ; CHECK-LABEL: test_vld2_s64
 333 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 334   %1 = bitcast i64* %a to i8*
 335   %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
 336   %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
 337   %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
 338   %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
 339   %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
 340   ret %struct.int64x1x2_t %.fca.0.1.insert
 341 }
 342
 343 define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
 344 ; CHECK-LABEL: test_vld2_f32
 345 ; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 346   %1 = bitcast float* %a to i8*
 347   %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
 348   %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
 349   %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
 350   %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
 351   %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
 352   ret %struct.float32x2x2_t %.fca.0.1.insert
 353 }
 354
 355 define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
 356 ; CHECK-LABEL: test_vld2_f64
 357 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 358   %1 = bitcast double* %a to i8*
 359   %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
 360   %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
 361   %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
 362   %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
 363   %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
 364   ret %struct.float64x1x2_t %.fca.0.1.insert
 365 }
 366
 367 define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
 368 ; CHECK-LABEL: test_vld3q_s8
 369 ; CHECK: ld3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
 370   %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
 371   %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
 372   %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
 373   %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
 374   %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
 375   %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
 376   %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
 377   ret %struct.int8x16x3_t %.fca.0.2.insert
 378 }
 379
 380 define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
 381 ; CHECK-LABEL: test_vld3q_s16
 382 ; CHECK: ld3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
 383   %1 = bitcast i16* %a to i8*
 384   %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
 385   %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
 386   %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
 387   %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
 388   %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
 389   %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
 390   %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
 391   ret %struct.int16x8x3_t %.fca.0.2.insert
 392 }
 393
 394 define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
 395 ; CHECK-LABEL: test_vld3q_s32
 396 ; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 397   %1 = bitcast i32* %a to i8*
 398   %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
 399   %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
 400   %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
 401   %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
 402   %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
 403   %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
 404   %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
 405   ret %struct.int32x4x3_t %.fca.0.2.insert
 406 }
 407
 408 define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
 409 ; CHECK-LABEL: test_vld3q_s64
 410 ; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 411   %1 = bitcast i64* %a to i8*
 412   %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
 413   %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
 414   %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
 415   %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
 416   %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
 417   %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
 418   %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
 419   ret %struct.int64x2x3_t %.fca.0.2.insert
 420 }
 421
 422 define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
 423 ; CHECK-LABEL: test_vld3q_f32
 424 ; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 425   %1 = bitcast float* %a to i8*
 426   %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
 427   %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
 428   %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
 429   %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
 430   %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
 431   %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
 432   %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
 433   ret %struct.float32x4x3_t %.fca.0.2.insert
 434 }
 435
 436 define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
 437 ; CHECK-LABEL: test_vld3q_f64
 438 ; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 439   %1 = bitcast double* %a to i8*
 440   %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
 441   %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
 442   %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
 443   %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
 444   %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
 445   %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
 446   %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
 447   ret %struct.float64x2x3_t %.fca.0.2.insert
 448 }
 449
 450 define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
 451 ; CHECK-LABEL: test_vld3_s8
 452 ; CHECK: ld3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
 453   %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
 454   %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
 455   %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
 456   %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
 457   %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
 458   %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
 459   %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
 460   ret %struct.int8x8x3_t %.fca.0.2.insert
 461 }
 462
 463 define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
 464 ; CHECK-LABEL: test_vld3_s16
 465 ; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
 466   %1 = bitcast i16* %a to i8*
 467   %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
 468   %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
 469   %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
 470   %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
 471   %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
 472   %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
 473   %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
 474   ret %struct.int16x4x3_t %.fca.0.2.insert
 475 }
 476
 477 define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
 478 ; CHECK-LABEL: test_vld3_s32
 479 ; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 480   %1 = bitcast i32* %a to i8*
 481   %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
 482   %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
 483   %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
 484   %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
 485   %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
 486   %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
 487   %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
 488   ret %struct.int32x2x3_t %.fca.0.2.insert
 489 }
 490
 491 define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
 492 ; CHECK-LABEL: test_vld3_s64
 493 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 494   %1 = bitcast i64* %a to i8*
 495   %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
 496   %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
 497   %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
 498   %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
 499   %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
 500   %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
 501   %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
 502   ret %struct.int64x1x3_t %.fca.0.2.insert
 503 }
 504
 505 define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
 506 ; CHECK-LABEL: test_vld3_f32
 507 ; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 508   %1 = bitcast float* %a to i8*
 509   %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
 510   %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
 511   %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
 512   %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
 513   %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
 514   %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
 515   %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
 516   ret %struct.float32x2x3_t %.fca.0.2.insert
 517 }
 518
 519 define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
 520 ; CHECK-LABEL: test_vld3_f64
 521 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 522   %1 = bitcast double* %a to i8*
 523   %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
 524   %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
 525   %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
 526   %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
 527   %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
 528   %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
 529   %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
 530   ret %struct.float64x1x3_t %.fca.0.2.insert
 531 }
 532
 533 define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
 534 ; CHECK-LABEL: test_vld4q_s8
 535 ; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
 536   %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
 537   %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
 538   %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
 539   %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
 540   %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
 541   %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
 542   %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
 543   %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
 544   %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
 545   ret %struct.int8x16x4_t %.fca.0.3.insert
 546 }
 547
 548 define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
 549 ; CHECK-LABEL: test_vld4q_s16
 550 ; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
 551   %1 = bitcast i16* %a to i8*
 552   %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
 553   %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
 554   %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
 555   %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
 556   %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
 557   %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
 558   %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
 559   %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
 560   %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
 561   ret %struct.int16x8x4_t %.fca.0.3.insert
 562 }
 563
 564 define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
 565 ; CHECK-LABEL: test_vld4q_s32
 566 ; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 567   %1 = bitcast i32* %a to i8*
 568   %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
 569   %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
 570   %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
 571   %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
 572   %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
 573   %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
 574   %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
 575   %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
 576   %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
 577   ret %struct.int32x4x4_t %.fca.0.3.insert
 578 }
 579
 580 define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
 581 ; CHECK-LABEL: test_vld4q_s64
 582 ; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 583   %1 = bitcast i64* %a to i8*
 584   %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
 585   %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
 586   %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
 587   %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
 588   %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
 589   %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
 590   %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
 591   %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
 592   %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
 593   ret %struct.int64x2x4_t %.fca.0.3.insert
 594 }
 595
 596 define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
 597 ; CHECK-LABEL: test_vld4q_f32
 598 ; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
 599   %1 = bitcast float* %a to i8*
 600   %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
 601   %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
 602   %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
 603   %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
 604   %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
 605   %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
 606   %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
 607   %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
 608   %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
 609   ret %struct.float32x4x4_t %.fca.0.3.insert
 610 }
 611
 612 define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
 613 ; CHECK-LABEL: test_vld4q_f64
 614 ; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
 615   %1 = bitcast double* %a to i8*
 616   %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
 617   %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
 618   %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
 619   %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
 620   %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
 621   %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
 622   %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
 623   %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
 624   %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
 625   ret %struct.float64x2x4_t %.fca.0.3.insert
 626 }
 627
 628 define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
 629 ; CHECK-LABEL: test_vld4_s8
 630 ; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
 631   %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
 632   %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
 633   %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
 634   %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
 635   %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
 636   %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
 637   %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
 638   %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
 639   %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
 640   ret %struct.int8x8x4_t %.fca.0.3.insert
 641 }
 642
 643 define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
 644 ; CHECK-LABEL: test_vld4_s16
 645 ; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
 646   %1 = bitcast i16* %a to i8*
 647   %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
 648   %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
 649   %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
 650   %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
 651   %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
 652   %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
 653   %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
 654   %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
 655   %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
 656   ret %struct.int16x4x4_t %.fca.0.3.insert
 657 }
 658
 659 define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
 660 ; CHECK-LABEL: test_vld4_s32
 661 ; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 662   %1 = bitcast i32* %a to i8*
 663   %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
 664   %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
 665   %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
 666   %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
 667   %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
 668   %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
 669   %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
 670   %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
 671   %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
 672   ret %struct.int32x2x4_t %.fca.0.3.insert
 673 }
 674
 675 define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
 676 ; CHECK-LABEL: test_vld4_s64
 677 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 678   %1 = bitcast i64* %a to i8*
 679   %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
 680   %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
 681   %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
 682   %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
 683   %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
 684   %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
 685   %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
 686   %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
 687   %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
 688   ret %struct.int64x1x4_t %.fca.0.3.insert
 689 }
 690
 691 define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
 692 ; CHECK-LABEL: test_vld4_f32
 693 ; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
 694   %1 = bitcast float* %a to i8*
 695   %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
 696   %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
 697   %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
 698   %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
 699   %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
 700   %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
 701   %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
 702   %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
 703   %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
 704   ret %struct.float32x2x4_t %.fca.0.3.insert
 705 }
 706
 707 define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
 708 ; CHECK-LABEL: test_vld4_f64
 709 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
 710   %1 = bitcast double* %a to i8*
 711   %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
 712   %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
 713   %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
 714   %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
 715   %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
 716   %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
 717   %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
 718   %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
 719   %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
 720   ret %struct.float64x1x4_t %.fca.0.3.insert
 721 }
 722
 723 declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
 724 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
 725 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
 726 declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
 727 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
 728 declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
 729 declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
 730 declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
 731 declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
 732 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
 733 declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
 734 declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
 735 declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
 736 declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
 737 declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
 738 declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
 739 declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
 740 declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
 741 declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
 742 declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
 743 declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
 744 declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
 745 declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
 746 declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
 747 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
 748 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
 749 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
 750 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
 751 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
 752 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
 753 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
 754 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
 755 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
 756 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
 757 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
 758 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
 759 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
 760 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
 761 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
 762 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
 763 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
 764 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
 765 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
 766 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
 767 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
 768 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
 769 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
 770 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
 771
 772 define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
 773 ; CHECK-LABEL: test_vst1q_s8
 774 ; CHECK: st1 { v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
 775   tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
 776   ret void
 777 }
 778
 779 define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
 780 ; CHECK-LABEL: test_vst1q_s16
 781 ; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
 782   %1 = bitcast i16* %a to i8*
 783   tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
 784   ret void
 785 }
 786
 787 define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
 788 ; CHECK-LABEL: test_vst1q_s32
 789 ; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
 790   %1 = bitcast i32* %a to i8*
 791   tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
 792   ret void
 793 }
 794
 795 define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
 796 ; CHECK-LABEL: test_vst1q_s64
 797 ; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 798   %1 = bitcast i64* %a to i8*
 799   tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
 800   ret void
 801 }
 802
 803 define void @test_vst1q_f32(float* %a, <4 x float> %b) {
 804 ; CHECK-LABEL: test_vst1q_f32
 805 ; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
 806   %1 = bitcast float* %a to i8*
 807   tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
 808   ret void
 809 }
 810
 811 define void @test_vst1q_f64(double* %a, <2 x double> %b) {
 812 ; CHECK-LABEL: test_vst1q_f64
 813 ; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 814   %1 = bitcast double* %a to i8*
 815   tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
 816   ret void
 817 }
 818
 819 define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
 820 ; CHECK-LABEL: test_vst1_s8
 821 ; CHECK: st1 { v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
 822   tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
 823   ret void
 824 }
 825
 826 define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
 827 ; CHECK-LABEL: test_vst1_s16
 828 ; CHECK: st1 { v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
 829   %1 = bitcast i16* %a to i8*
 830   tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
 831   ret void
 832 }
 833
 834 define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
 835 ; CHECK-LABEL: test_vst1_s32
 836 ; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
 837   %1 = bitcast i32* %a to i8*
 838   tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
 839   ret void
 840 }
 841
 842 define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
 843 ; CHECK-LABEL: test_vst1_s64
 844 ; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
 845   %1 = bitcast i64* %a to i8*
 846   tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
 847   ret void
 848 }
 849
 850 define void @test_vst1_f32(float* %a, <2 x float> %b) {
 851 ; CHECK-LABEL: test_vst1_f32
 852 ; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
 853   %1 = bitcast float* %a to i8*
 854   tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
 855   ret void
 856 }
 857
 858 define void @test_vst1_f64(double* %a, <1 x double> %b) {
 859 ; CHECK-LABEL: test_vst1_f64
 860 ; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
 861   %1 = bitcast double* %a to i8*
 862   tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
 863   ret void
 864 }
 865
 866 define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
 867 ; CHECK-LABEL: test_vst2q_s8
 868 ; CHECK: st2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
 869   %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
 870   %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
 871   tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
 872   ret void
 873 }
 874
 875 define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
 876 ; CHECK-LABEL: test_vst2q_s16
 877 ; CHECK: st2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
 878   %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
 879   %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
 880   %1 = bitcast i16* %a to i8*
 881   tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
 882   ret void
 883 }
 884
 885 define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
 886 ; CHECK-LABEL: test_vst2q_s32
 887 ; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
 888   %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
 889   %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
 890   %1 = bitcast i32* %a to i8*
 891   tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
 892   ret void
 893 }
 894
 895 define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
 896 ; CHECK-LABEL: test_vst2q_s64
 897 ; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 898   %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
 899   %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
 900   %1 = bitcast i64* %a to i8*
 901   tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
 902   ret void
 903 }
 904
 905 define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
 906 ; CHECK-LABEL: test_vst2q_f32
 907 ; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
 908   %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
 909   %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
 910   %1 = bitcast float* %a to i8*
 911   tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
 912   ret void
 913 }
 914
 915 define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
 916 ; CHECK-LABEL: test_vst2q_f64
 917 ; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 918   %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
 919   %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
 920   %1 = bitcast double* %a to i8*
 921   tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
 922   ret void
 923 }
 924
 925 define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
 926 ; CHECK-LABEL: test_vst2_s8
 927 ; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
 928   %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
 929   %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
 930   tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
 931   ret void
 932 }
 933
 934 define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
 935 ; CHECK-LABEL: test_vst2_s16
 936 ; CHECK: st2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
 937   %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
 938   %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
 939   %1 = bitcast i16* %a to i8*
 940   tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
 941   ret void
 942 }
 943
 944 define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
 945 ; CHECK-LABEL: test_vst2_s32
 946 ; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
 947   %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
 948   %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
 949   %1 = bitcast i32* %a to i8*
 950   tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
 951   ret void
 952 }
 953
 954 define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
 955 ; CHECK-LABEL: test_vst2_s64
 956 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
 957   %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
 958   %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
 959   %1 = bitcast i64* %a to i8*
 960   tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
 961   ret void
 962 }
 963
 964 define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
 965 ; CHECK-LABEL: test_vst2_f32
 966 ; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
 967   %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
 968   %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
 969   %1 = bitcast float* %a to i8*
 970   tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
 971   ret void
 972 }
 973
 974 define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
 975 ; CHECK-LABEL: test_vst2_f64
 976 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
 977   %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
 978   %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
 979   %1 = bitcast double* %a to i8*
 980   tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
 981   ret void
 982 }
 983
 984 define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
 985 ; CHECK-LABEL: test_vst3q_s8
 986 ; CHECK: st3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
 987   %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
 988   %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
 989   %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
 990   tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
 991   ret void
 992 }
 993
 994 define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
 995 ; CHECK-LABEL: test_vst3q_s16
 996 ; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
 997   %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
 998   %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
 999   %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
1000   %1 = bitcast i16* %a to i8*
1001   tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
1002   ret void
1003 }
1004
1005 define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
1006 ; CHECK-LABEL: test_vst3q_s32
1007 ; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1008   %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
1009   %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
1010   %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
1011   %1 = bitcast i32* %a to i8*
1012   tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
1013   ret void
1014 }
1015
1016 define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
1017 ; CHECK-LABEL: test_vst3q_s64
1018 ; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1019   %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
1020   %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
1021   %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
1022   %1 = bitcast i64* %a to i8*
1023   tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
1024   ret void
1025 }
1026
1027 define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
1028 ; CHECK-LABEL: test_vst3q_f32
1029 ; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1030   %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
1031   %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
1032   %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
1033   %1 = bitcast float* %a to i8*
1034   tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
1035   ret void
1036 }
1037
1038 define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
1039 ; CHECK-LABEL: test_vst3q_f64
1040 ; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1041   %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
1042   %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
1043   %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
1044   %1 = bitcast double* %a to i8*
1045   tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
1046   ret void
1047 }
1048
1049 define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
1050 ; CHECK-LABEL: test_vst3_s8
1051 ; CHECK: st3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
1052   %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
1053   %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
1054   %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
1055   tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
1056   ret void
1057 }
1058
1059 define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
1060 ; CHECK-LABEL: test_vst3_s16
1061 ; CHECK: st3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
1062   %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
1063   %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
1064   %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
1065   %1 = bitcast i16* %a to i8*
1066   tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
1067   ret void
1068 }
1069
1070 define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
1071 ; CHECK-LABEL: test_vst3_s32
1072 ; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1073   %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
1074   %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
1075   %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
1076   %1 = bitcast i32* %a to i8*
1077   tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
1078   ret void
1079 }
1080
1081 define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
1082 ; CHECK-LABEL: test_vst3_s64
1083 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1084   %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
1085   %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
1086   %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
1087   %1 = bitcast i64* %a to i8*
1088   tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
1089   ret void
1090 }
1091
1092 define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
1093 ; CHECK-LABEL: test_vst3_f32
1094 ; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1095   %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
1096   %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
1097   %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
1098   %1 = bitcast float* %a to i8*
1099   tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
1100   ret void
1101 }
1102
1103 define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
1104 ; CHECK-LABEL: test_vst3_f64
1105 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1106   %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
1107   %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
1108   %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
1109   %1 = bitcast double* %a to i8*
1110   tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
1111   ret void
1112 }
1113
1114 define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
1115 ; CHECK-LABEL: test_vst4q_s8
1116 ; CHECK: st4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
1117   %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
1118   %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
1119   %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
1120   %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
1121   tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
1122   ret void
1123 }
1124
1125 define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
1126 ; CHECK-LABEL: test_vst4q_s16
1127 ; CHECK: st4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
1128   %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
1129   %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
1130   %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
1131   %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
1132   %1 = bitcast i16* %a to i8*
1133   tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
1134   ret void
1135 }
1136
1137 define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
1138 ; CHECK-LABEL: test_vst4q_s32
1139 ; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1140   %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
1141   %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
1142   %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
1143   %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
1144   %1 = bitcast i32* %a to i8*
1145   tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
1146   ret void
1147 }
1148
1149 define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
1150 ; CHECK-LABEL: test_vst4q_s64
1151 ; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1152   %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
1153   %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
1154   %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
1155   %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
1156   %1 = bitcast i64* %a to i8*
1157   tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
1158   ret void
1159 }
1160
1161 define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
1162 ; CHECK-LABEL: test_vst4q_f32
1163 ; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1164   %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
1165   %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
1166   %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
1167   %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
1168   %1 = bitcast float* %a to i8*
1169   tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
1170   ret void
1171 }
1172
1173 define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
1174 ; CHECK-LABEL: test_vst4q_f64
1175 ; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1176   %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
1177   %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
1178   %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
1179   %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
1180   %1 = bitcast double* %a to i8*
1181   tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
1182   ret void
1183 }
1184
1185 define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
1186 ; CHECK-LABEL: test_vst4_s8
1187 ; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
1188   %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
1189   %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
1190   %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
1191   %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
1192   tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
1193   ret void
1194 }
1195
1196 define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
1197 ; CHECK-LABEL: test_vst4_s16
1198 ; CHECK: st4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
1199   %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
1200   %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
1201   %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
1202   %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
1203   %1 = bitcast i16* %a to i8*
1204   tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
1205   ret void
1206 }
1207
1208 define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
1209 ; CHECK-LABEL: test_vst4_s32
1210 ; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1211   %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
1212   %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
1213   %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
1214   %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
1215   %1 = bitcast i32* %a to i8*
1216   tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
1217   ret void
1218 }
1219
1220 define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
1221 ; CHECK-LABEL: test_vst4_s64
1222 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1223   %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
1224   %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
1225   %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
1226   %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
1227   %1 = bitcast i64* %a to i8*
1228   tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
1229   ret void
1230 }
1231
1232 define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
1233 ; CHECK-LABEL: test_vst4_f32
1234 ; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1235   %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
1236   %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
1237   %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
1238   %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
1239   %1 = bitcast float* %a to i8*
1240   tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
1241   ret void
1242 }
1243
1244 define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
1245 ; CHECK-LABEL: test_vst4_f64
1246 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1247   %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
1248   %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
1249   %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
1250   %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
1251   %1 = bitcast double* %a to i8*
1252   tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
1253   ret void
1254 }
1255
1256 declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
1257 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
1258 declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
1259 declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
1260 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
1261 declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
1262 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
1263 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
1264 declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
1265 declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
1266 declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
1267 declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
1268 declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
1269 declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
1270 declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
1271 declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
1272 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
1273 declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
1274 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
1275 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
1276 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
1277 declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
1278 declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
1279 declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
1280 declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1281 declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1282 declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1283 declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1284 declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
1285 declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
1286 declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1287 declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1288 declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1289 declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1290 declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
1291 declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
1292 declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1293 declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1294 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1295 declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1296 declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
1297 declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
1298 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1299 declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1300 declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1301 declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1302 declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
1303 declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
1304
1305 define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a)  {
1306 ; CHECK-LABEL: test_vld1q_s8_x2
1307 ; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
1308   %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
1309   %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
1310   %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
1311   %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
1312   %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
1313   ret %struct.int8x16x2_t %5
1314 }
1315
1316 define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a)  {
1317 ; CHECK-LABEL: test_vld1q_s16_x2
1318 ; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
1319   %1 = bitcast i16* %a to i8*
1320   %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
1321   %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
1322   %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
1323   %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
1324   %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
1325   ret %struct.int16x8x2_t %6
1326 }
1327
1328 define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a)  {
1329 ; CHECK-LABEL: test_vld1q_s32_x2
1330 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1331   %1 = bitcast i32* %a to i8*
1332   %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
1333   %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
1334   %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
1335   %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
1336   %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
1337   ret %struct.int32x4x2_t %6
1338 }
1339
1340 define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a)  {
1341 ; CHECK-LABEL: test_vld1q_s64_x2
1342 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1343   %1 = bitcast i64* %a to i8*
1344   %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
1345   %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
1346   %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
1347   %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
1348   %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
1349   ret %struct.int64x2x2_t %6
1350 }
1351
1352 define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a)  {
1353 ; CHECK-LABEL: test_vld1q_f32_x2
1354 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1355   %1 = bitcast float* %a to i8*
1356   %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
1357   %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
1358   %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
1359   %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
1360   %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
1361   ret %struct.float32x4x2_t %6
1362 }
1363
1364
1365 define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a)  {
1366 ; CHECK-LABEL: test_vld1q_f64_x2
1367 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1368   %1 = bitcast double* %a to i8*
1369   %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
1370   %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
1371   %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
1372   %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
1373   %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
1374   ret %struct.float64x2x2_t %6
1375 }
1376
1377 define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a)  {
1378 ; CHECK-LABEL: test_vld1_s8_x2
1379 ; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
1380   %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
1381   %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
1382   %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
1383   %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
1384   %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
1385   ret %struct.int8x8x2_t %5
1386 }
1387
1388 define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a)  {
1389 ; CHECK-LABEL: test_vld1_s16_x2
1390 ; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
1391   %1 = bitcast i16* %a to i8*
1392   %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
1393   %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
1394   %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
1395   %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
1396   %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
1397   ret %struct.int16x4x2_t %6
1398 }
1399
1400 define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a)  {
1401 ; CHECK-LABEL: test_vld1_s32_x2
1402 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1403   %1 = bitcast i32* %a to i8*
1404   %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
1405   %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
1406   %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
1407   %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
1408   %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
1409   ret %struct.int32x2x2_t %6
1410 }
1411
1412 define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a)  {
1413 ; CHECK-LABEL: test_vld1_s64_x2
1414 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1415   %1 = bitcast i64* %a to i8*
1416   %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
1417   %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
1418   %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
1419   %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
1420   %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
1421   ret %struct.int64x1x2_t %6
1422 }
1423
1424 define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a)  {
1425 ; CHECK-LABEL: test_vld1_f32_x2
1426 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1427   %1 = bitcast float* %a to i8*
1428   %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
1429   %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
1430   %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
1431   %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
1432   %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
1433   ret %struct.float32x2x2_t %6
1434 }
1435
1436 define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a)  {
1437 ; CHECK-LABEL: test_vld1_f64_x2
1438 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1439   %1 = bitcast double* %a to i8*
1440   %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
1441   %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
1442   %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
1443   %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
1444   %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
1445   ret %struct.float64x1x2_t %6
1446 }
1447
1448 define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a)  {
1449 ; CHECK-LABEL: test_vld1q_s8_x3
1450 ; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b },
1451 ; [{{x[0-9]+|sp}}]
1452   %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
1453   %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1454   %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1455   %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1456   %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
1457   %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
1458   %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
1459   ret %struct.int8x16x3_t %7
1460 }
1461
1462 define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a)  {
1463 ; CHECK-LABEL: test_vld1q_s16_x3
1464 ; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h },
1465 ; [{{x[0-9]+|sp}}]
1466   %1 = bitcast i16* %a to i8*
1467   %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
1468   %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1469   %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1470   %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1471   %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
1472   %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
1473   %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
1474   ret %struct.int16x8x3_t %8
1475 }
1476
1477 define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a)  {
1478 ; CHECK-LABEL: test_vld1q_s32_x3
1479 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
1480 ; [{{x[0-9]+|sp}}]
1481   %1 = bitcast i32* %a to i8*
1482   %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
1483   %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1484   %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1485   %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1486   %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
1487   %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
1488   %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
1489   ret %struct.int32x4x3_t %8
1490 }
1491
1492 define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a)  {
1493 ; CHECK-LABEL: test_vld1q_s64_x3
1494 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
1495 ; [{{x[0-9]+|sp}}]
1496   %1 = bitcast i64* %a to i8*
1497   %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
1498   %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1499   %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1500   %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1501   %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
1502   %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
1503   %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
1504   ret %struct.int64x2x3_t %8
1505 }
1506
1507 define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a)  {
1508 ; CHECK-LABEL: test_vld1q_f32_x3
1509 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
1510 ; [{{x[0-9]+|sp}}]
1511   %1 = bitcast float* %a to i8*
1512   %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
1513   %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
1514   %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
1515   %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
1516   %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
1517   %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
1518   %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
1519   ret %struct.float32x4x3_t %8
1520 }
1521
1522
1523 define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a)  {
1524 ; CHECK-LABEL: test_vld1q_f64_x3
1525 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
1526 ; [{{x[0-9]+|sp}}]
1527   %1 = bitcast double* %a to i8*
1528   %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
1529   %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
1530   %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
1531   %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
1532   %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
1533   %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
1534   %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
1535   ret %struct.float64x2x3_t %8
1536 }
1537
1538 define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a)  {
1539 ; CHECK-LABEL: test_vld1_s8_x3
1540 ; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b },
1541 ; [{{x[0-9]+|sp}}]
1542   %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
1543   %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1544   %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1545   %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1546   %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
1547   %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
1548   %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
1549   ret %struct.int8x8x3_t %7
1550 }
1551
1552 define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a)  {
1553 ; CHECK-LABEL: test_vld1_s16_x3
1554 ; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h },
1555 ; [{{x[0-9]+|sp}}]
1556   %1 = bitcast i16* %a to i8*
1557   %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
1558   %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1559   %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1560   %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1561   %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
1562   %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
1563   %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
1564   ret %struct.int16x4x3_t %8
1565 }
1566
1567 define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a)  {
1568   %1 = bitcast i32* %a to i8*
1569 ; CHECK-LABEL: test_vld1_s32_x3
1570 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
1571 ; [{{x[0-9]+|sp}}]
1572   %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
1573   %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1574   %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1575   %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1576   %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
1577   %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
1578   %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
1579   ret %struct.int32x2x3_t %8
1580 }
1581
1582 define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a)  {
1583 ; CHECK-LABEL: test_vld1_s64_x3
1584 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
1585 ; [{{x[0-9]+|sp}}]
1586   %1 = bitcast i64* %a to i8*
1587   %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
1588   %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1589   %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1590   %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1591   %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
1592   %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
1593   %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
1594   ret %struct.int64x1x3_t %8
1595 }
1596
1597 define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a)  {
1598 ; CHECK-LABEL: test_vld1_f32_x3
1599 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
1600 ; [{{x[0-9]+|sp}}]
1601   %1 = bitcast float* %a to i8*
1602   %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
1603   %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
1604   %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
1605   %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
1606   %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
1607   %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
1608   %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
1609   ret %struct.float32x2x3_t %8
1610 }
1611
1612
1613 define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a)  {
1614 ; CHECK-LABEL: test_vld1_f64_x3
1615 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
1616 ; [{{x[0-9]+|sp}}]
1617   %1 = bitcast double* %a to i8*
1618   %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
1619   %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
1620   %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
1621   %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
1622   %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
1623   %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
1624   %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
1625   ret %struct.float64x1x3_t %8
1626 }
1627
1628 define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a)  {
1629 ; CHECK-LABEL: test_vld1q_s8_x4
1630 ; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
1631 ; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
1632   %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
1633   %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1634   %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1635   %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1636   %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
1637   %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
1638   %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
1639   %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
1640   %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
1641   ret %struct.int8x16x4_t %9
1642 }
1643
1644 define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a)  {
1645 ; CHECK-LABEL: test_vld1q_s16_x4
1646 ; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
1647 ; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
1648   %1 = bitcast i16* %a to i8*
1649   %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
1650   %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1651   %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1652   %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1653   %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
1654   %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
1655   %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
1656   %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
1657   %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
1658   ret %struct.int16x8x4_t %10
1659 }
1660
1661 define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a)  {
1662 ; CHECK-LABEL: test_vld1q_s32_x4
1663 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1664 ; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1665   %1 = bitcast i32* %a to i8*
1666   %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
1667   %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1668   %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1669   %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1670   %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
1671   %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
1672   %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
1673   %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
1674   %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
1675   ret %struct.int32x4x4_t %10
1676 }
1677
1678 define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a)  {
1679 ; CHECK-LABEL: test_vld1q_s64_x4
1680 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1681 ; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1682   %1 = bitcast i64* %a to i8*
1683   %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
1684   %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1685   %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1686   %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1687   %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
1688   %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
1689   %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
1690   %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
1691   %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
1692   ret %struct.int64x2x4_t %10
1693 }
1694
1695 define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a)  {
1696 ; CHECK-LABEL: test_vld1q_f32_x4
1697 ; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1698 ; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1699   %1 = bitcast float* %a to i8*
1700   %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
1701   %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
1702   %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
1703   %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
1704   %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
1705   %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
1706   %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
1707   %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
1708   %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
1709   ret %struct.float32x4x4_t %10
1710 }
1711
1712 define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a)  {
1713 ; CHECK-LABEL: test_vld1q_f64_x4
1714 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1715 ; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1716   %1 = bitcast double* %a to i8*
1717   %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
1718   %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
1719   %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
1720   %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
1721   %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
1722   %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
1723   %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
1724   %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
1725   %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
1726   ret %struct.float64x2x4_t %10
1727 }
1728
1729 define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a)  {
1730 ; CHECK-LABEL: test_vld1_s8_x4
1731 ; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
1732 ; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
1733   %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
1734   %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1735   %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1736   %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1737   %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
1738   %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
1739   %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
1740   %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
1741   %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
1742   ret %struct.int8x8x4_t %9
1743 }
1744
1745 define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a)  {
1746 ; CHECK-LABEL: test_vld1_s16_x4
1747 ; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
1748 ; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
1749   %1 = bitcast i16* %a to i8*
1750   %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
1751   %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1752   %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1753   %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1754   %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
1755   %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
1756   %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
1757   %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
1758   %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
1759   ret %struct.int16x4x4_t %10
1760 }
1761
1762 define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a)  {
1763 ; CHECK-LABEL: test_vld1_s32_x4
1764 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1765 ; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1766   %1 = bitcast i32* %a to i8*
1767   %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
1768   %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1769   %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1770   %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1771   %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
1772   %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
1773   %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
1774   %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
1775   %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
1776   ret %struct.int32x2x4_t %10
1777 }
1778
1779 define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a)  {
1780 ; CHECK-LABEL: test_vld1_s64_x4
1781 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1782 ; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1783   %1 = bitcast i64* %a to i8*
1784   %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
1785   %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1786   %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1787   %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1788   %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
1789   %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
1790   %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
1791   %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
1792   %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
1793   ret %struct.int64x1x4_t %10
1794 }
1795
1796 define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a)  {
1797 ; CHECK-LABEL: test_vld1_f32_x4
1798 ; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1799 ; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1800   %1 = bitcast float* %a to i8*
1801   %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
1802   %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
1803   %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
1804   %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
1805   %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
1806   %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
1807   %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
1808   %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
1809   %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
1810   ret %struct.float32x2x4_t %10
1811 }
1812
1813
1814 define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a)  {
1815 ; CHECK-LABEL: test_vld1_f64_x4
1816 ; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1817 ; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1818   %1 = bitcast double* %a to i8*
1819   %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
1820   %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
1821   %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
1822   %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
1823   %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
1824   %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
1825   %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
1826   %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
1827   %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
1828   ret %struct.float64x1x4_t %10
1829 }
1830
1831 define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b)  {
1832 ; CHECK-LABEL: test_vst1q_s8_x2
1833 ; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
1834   %1 = extractvalue [2 x <16 x i8>] %b, 0
1835   %2 = extractvalue [2 x <16 x i8>] %b, 1
1836   tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
1837   ret void
1838 }
1839
1840 define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b)  {
1841 ; CHECK-LABEL: test_vst1q_s16_x2
1842 ; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
1843   %1 = extractvalue [2 x <8 x i16>] %b, 0
1844   %2 = extractvalue [2 x <8 x i16>] %b, 1
1845   %3 = bitcast i16* %a to i8*
1846   tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
1847   ret void
1848 }
1849
1850 define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b)  {
1851 ; CHECK-LABEL: test_vst1q_s32_x2
1852 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1853   %1 = extractvalue [2 x <4 x i32>] %b, 0
1854   %2 = extractvalue [2 x <4 x i32>] %b, 1
1855   %3 = bitcast i32* %a to i8*
1856   tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
1857   ret void
1858 }
1859
1860 define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b)  {
1861 ; CHECK-LABEL: test_vst1q_s64_x2
1862 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1863   %1 = extractvalue [2 x <2 x i64>] %b, 0
1864   %2 = extractvalue [2 x <2 x i64>] %b, 1
1865   %3 = bitcast i64* %a to i8*
1866   tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
1867   ret void
1868 }
1869
1870 define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b)  {
1871 ; CHECK-LABEL: test_vst1q_f32_x2
1872 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
1873   %1 = extractvalue [2 x <4 x float>] %b, 0
1874   %2 = extractvalue [2 x <4 x float>] %b, 1
1875   %3 = bitcast float* %a to i8*
1876   tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
1877   ret void
1878 }
1879
1880
1881 define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b)  {
1882 ; CHECK-LABEL: test_vst1q_f64_x2
1883 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
1884   %1 = extractvalue [2 x <2 x double>] %b, 0
1885   %2 = extractvalue [2 x <2 x double>] %b, 1
1886   %3 = bitcast double* %a to i8*
1887   tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
1888   ret void
1889 }
1890
1891 define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b)  {
1892 ; CHECK-LABEL: test_vst1_s8_x2
1893 ; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
1894   %1 = extractvalue [2 x <8 x i8>] %b, 0
1895   %2 = extractvalue [2 x <8 x i8>] %b, 1
1896   tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
1897   ret void
1898 }
1899
1900 define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b)  {
1901 ; CHECK-LABEL: test_vst1_s16_x2
1902 ; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
1903   %1 = extractvalue [2 x <4 x i16>] %b, 0
1904   %2 = extractvalue [2 x <4 x i16>] %b, 1
1905   %3 = bitcast i16* %a to i8*
1906   tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
1907   ret void
1908 }
1909
1910 define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b)  {
1911 ; CHECK-LABEL: test_vst1_s32_x2
1912 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1913   %1 = extractvalue [2 x <2 x i32>] %b, 0
1914   %2 = extractvalue [2 x <2 x i32>] %b, 1
1915   %3 = bitcast i32* %a to i8*
1916   tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
1917   ret void
1918 }
1919
1920 define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b)  {
1921 ; CHECK-LABEL: test_vst1_s64_x2
1922 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1923   %1 = extractvalue [2 x <1 x i64>] %b, 0
1924   %2 = extractvalue [2 x <1 x i64>] %b, 1
1925   %3 = bitcast i64* %a to i8*
1926   tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
1927   ret void
1928 }
1929
1930 define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b)  {
1931 ; CHECK-LABEL: test_vst1_f32_x2
1932 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
1933   %1 = extractvalue [2 x <2 x float>] %b, 0
1934   %2 = extractvalue [2 x <2 x float>] %b, 1
1935   %3 = bitcast float* %a to i8*
1936   tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
1937   ret void
1938 }
1939
1940 define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b)  {
1941 ; CHECK-LABEL: test_vst1_f64_x2
1942 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
1943   %1 = extractvalue [2 x <1 x double>] %b, 0
1944   %2 = extractvalue [2 x <1 x double>] %b, 1
1945   %3 = bitcast double* %a to i8*
1946   tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
1947   ret void
1948 }
1949
1950 define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b)  {
1951 ; CHECK-LABEL: test_vst1q_s8_x3
1952 ; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b },
1953 ; [{{x[0-9]+|sp}}]
1954   %1 = extractvalue [3 x <16 x i8>] %b, 0
1955   %2 = extractvalue [3 x <16 x i8>] %b, 1
1956   %3 = extractvalue [3 x <16 x i8>] %b, 2
1957   tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
1958   ret void
1959 }
1960
1961 define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b)  {
1962 ; CHECK-LABEL: test_vst1q_s16_x3
1963 ; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h },
1964 ; [{{x[0-9]+|sp}}]
1965   %1 = extractvalue [3 x <8 x i16>] %b, 0
1966   %2 = extractvalue [3 x <8 x i16>] %b, 1
1967   %3 = extractvalue [3 x <8 x i16>] %b, 2
1968   %4 = bitcast i16* %a to i8*
1969   tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
1970   ret void
1971 }
1972
1973 define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b)  {
1974 ; CHECK-LABEL: test_vst1q_s32_x3
1975 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
1976 ; [{{x[0-9]+|sp}}]
1977   %1 = extractvalue [3 x <4 x i32>] %b, 0
1978   %2 = extractvalue [3 x <4 x i32>] %b, 1
1979   %3 = extractvalue [3 x <4 x i32>] %b, 2
1980   %4 = bitcast i32* %a to i8*
1981   tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
1982   ret void
1983 }
1984
1985 define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b)  {
1986 ; CHECK-LABEL: test_vst1q_s64_x3
1987 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
1988 ; [{{x[0-9]+|sp}}]
1989   %1 = extractvalue [3 x <2 x i64>] %b, 0
1990   %2 = extractvalue [3 x <2 x i64>] %b, 1
1991   %3 = extractvalue [3 x <2 x i64>] %b, 2
1992   %4 = bitcast i64* %a to i8*
1993   tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
1994   ret void
1995 }
1996
1997 define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b)  {
1998 ; CHECK-LABEL: test_vst1q_f32_x3
1999 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
2000 ; [{{x[0-9]+|sp}}]
2001   %1 = extractvalue [3 x <4 x float>] %b, 0
2002   %2 = extractvalue [3 x <4 x float>] %b, 1
2003   %3 = extractvalue [3 x <4 x float>] %b, 2
2004   %4 = bitcast float* %a to i8*
2005   tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
2006   ret void
2007 }
2008
2009 define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b)  {
2010 ; CHECK-LABEL: test_vst1q_f64_x3
2011 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
2012 ; [{{x[0-9]+|sp}}]
2013   %1 = extractvalue [3 x <2 x double>] %b, 0
2014   %2 = extractvalue [3 x <2 x double>] %b, 1
2015   %3 = extractvalue [3 x <2 x double>] %b, 2
2016   %4 = bitcast double* %a to i8*
2017   tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
2018   ret void
2019 }
2020
2021 define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b)  {
2022 ; CHECK-LABEL: test_vst1_s8_x3
2023 ; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b },
2024 ; [{{x[0-9]+|sp}}]
2025   %1 = extractvalue [3 x <8 x i8>] %b, 0
2026   %2 = extractvalue [3 x <8 x i8>] %b, 1
2027   %3 = extractvalue [3 x <8 x i8>] %b, 2
2028   tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
2029   ret void
2030 }
2031
2032 define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b)  {
2033 ; CHECK-LABEL: test_vst1_s16_x3
2034 ; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h },
2035 ; [{{x[0-9]+|sp}}]
2036   %1 = extractvalue [3 x <4 x i16>] %b, 0
2037   %2 = extractvalue [3 x <4 x i16>] %b, 1
2038   %3 = extractvalue [3 x <4 x i16>] %b, 2
2039   %4 = bitcast i16* %a to i8*
2040   tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
2041   ret void
2042 }
2043
2044 define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b)  {
2045 ; CHECK-LABEL: test_vst1_s32_x3
2046 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
2047 ; [{{x[0-9]+|sp}}]
2048   %1 = extractvalue [3 x <2 x i32>] %b, 0
2049   %2 = extractvalue [3 x <2 x i32>] %b, 1
2050   %3 = extractvalue [3 x <2 x i32>] %b, 2
2051   %4 = bitcast i32* %a to i8*
2052   tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
2053   ret void
2054 }
2055
2056 define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b)  {
2057 ; CHECK-LABEL: test_vst1_s64_x3
2058 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
2059 ; [{{x[0-9]+|sp}}]
2060   %1 = extractvalue [3 x <1 x i64>] %b, 0
2061   %2 = extractvalue [3 x <1 x i64>] %b, 1
2062   %3 = extractvalue [3 x <1 x i64>] %b, 2
2063   %4 = bitcast i64* %a to i8*
2064   tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
2065   ret void
2066 }
2067
2068 define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b)  {
2069 ; CHECK-LABEL: test_vst1_f32_x3
2070 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
2071 ; [{{x[0-9]+|sp}}]
2072   %1 = extractvalue [3 x <2 x float>] %b, 0
2073   %2 = extractvalue [3 x <2 x float>] %b, 1
2074   %3 = extractvalue [3 x <2 x float>] %b, 2
2075   %4 = bitcast float* %a to i8*
2076   tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
2077   ret void
2078 }
2079
2080 define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b)  {
2081 ; CHECK-LABEL: test_vst1_f64_x3
2082 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
2083 ; [{{x[0-9]+|sp}}]
2084   %1 = extractvalue [3 x <1 x double>] %b, 0
2085   %2 = extractvalue [3 x <1 x double>] %b, 1
2086   %3 = extractvalue [3 x <1 x double>] %b, 2
2087   %4 = bitcast double* %a to i8*
2088   tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
2089   ret void
2090 }
2091
2092 define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b)  {
2093 ; CHECK-LABEL: test_vst1q_s8_x4
2094 ; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
2095 ; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
2096   %1 = extractvalue [4 x <16 x i8>] %b, 0
2097   %2 = extractvalue [4 x <16 x i8>] %b, 1
2098   %3 = extractvalue [4 x <16 x i8>] %b, 2
2099   %4 = extractvalue [4 x <16 x i8>] %b, 3
2100   tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
2101   ret void
2102 }
2103
2104 define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b)  {
2105 ; CHECK-LABEL: test_vst1q_s16_x4
2106 ; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
2107 ; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
2108   %1 = extractvalue [4 x <8 x i16>] %b, 0
2109   %2 = extractvalue [4 x <8 x i16>] %b, 1
2110   %3 = extractvalue [4 x <8 x i16>] %b, 2
2111   %4 = extractvalue [4 x <8 x i16>] %b, 3
2112   %5 = bitcast i16* %a to i8*
2113   tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
2114   ret void
2115 }
2116
2117 define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b)  {
2118 ; CHECK-LABEL: test_vst1q_s32_x4
2119 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2120 ; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
2121   %1 = extractvalue [4 x <4 x i32>] %b, 0
2122   %2 = extractvalue [4 x <4 x i32>] %b, 1
2123   %3 = extractvalue [4 x <4 x i32>] %b, 2
2124   %4 = extractvalue [4 x <4 x i32>] %b, 3
2125   %5 = bitcast i32* %a to i8*
2126   tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
2127   ret void
2128 }
2129
2130 define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b)  {
2131 ; CHECK-LABEL: test_vst1q_s64_x4
2132 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2133 ; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
2134   %1 = extractvalue [4 x <2 x i64>] %b, 0
2135   %2 = extractvalue [4 x <2 x i64>] %b, 1
2136   %3 = extractvalue [4 x <2 x i64>] %b, 2
2137   %4 = extractvalue [4 x <2 x i64>] %b, 3
2138   %5 = bitcast i64* %a to i8*
2139   tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
2140   ret void
2141 }
2142
2143 define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b)  {
2144 ; CHECK-LABEL: test_vst1q_f32_x4
2145 ; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2146 ; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
2147   %1 = extractvalue [4 x <4 x float>] %b, 0
2148   %2 = extractvalue [4 x <4 x float>] %b, 1
2149   %3 = extractvalue [4 x <4 x float>] %b, 2
2150   %4 = extractvalue [4 x <4 x float>] %b, 3
2151   %5 = bitcast float* %a to i8*
2152   tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
2153   ret void
2154 }
2155
2156 define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b)  {
2157 ; CHECK-LABEL: test_vst1q_f64_x4
2158 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2159 ; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
2160   %1 = extractvalue [4 x <2 x double>] %b, 0
2161   %2 = extractvalue [4 x <2 x double>] %b, 1
2162   %3 = extractvalue [4 x <2 x double>] %b, 2
2163   %4 = extractvalue [4 x <2 x double>] %b, 3
2164   %5 = bitcast double* %a to i8*
2165   tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
2166   ret void
2167 }
2168
2169 define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b)  {
2170 ; CHECK-LABEL: test_vst1_s8_x4
2171 ; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
2172 ; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
2173   %1 = extractvalue [4 x <8 x i8>] %b, 0
2174   %2 = extractvalue [4 x <8 x i8>] %b, 1
2175   %3 = extractvalue [4 x <8 x i8>] %b, 2
2176   %4 = extractvalue [4 x <8 x i8>] %b, 3
2177   tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
2178   ret void
2179 }
2180
2181 define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b)  {
2182 ; CHECK-LABEL: test_vst1_s16_x4
2183 ; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
2184 ; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
2185   %1 = extractvalue [4 x <4 x i16>] %b, 0
2186   %2 = extractvalue [4 x <4 x i16>] %b, 1
2187   %3 = extractvalue [4 x <4 x i16>] %b, 2
2188   %4 = extractvalue [4 x <4 x i16>] %b, 3
2189   %5 = bitcast i16* %a to i8*
2190   tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
2191   ret void
2192 }
2193
2194 define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b)  {
2195 ; CHECK-LABEL: test_vst1_s32_x4
2196 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2197 ; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
2198   %1 = extractvalue [4 x <2 x i32>] %b, 0
2199   %2 = extractvalue [4 x <2 x i32>] %b, 1
2200   %3 = extractvalue [4 x <2 x i32>] %b, 2
2201   %4 = extractvalue [4 x <2 x i32>] %b, 3
2202   %5 = bitcast i32* %a to i8*
2203   tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
2204   ret void
2205 }
2206
2207 define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b)  {
2208 ; CHECK-LABEL: test_vst1_s64_x4
2209 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2210 ; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
2211   %1 = extractvalue [4 x <1 x i64>] %b, 0
2212   %2 = extractvalue [4 x <1 x i64>] %b, 1
2213   %3 = extractvalue [4 x <1 x i64>] %b, 2
2214   %4 = extractvalue [4 x <1 x i64>] %b, 3
2215   %5 = bitcast i64* %a to i8*
2216   tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
2217   ret void
2218 }
2219
2220 define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b)  {
2221 ; CHECK-LABEL: test_vst1_f32_x4
2222 ; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2223 ; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
2224   %1 = extractvalue [4 x <2 x float>] %b, 0
2225   %2 = extractvalue [4 x <2 x float>] %b, 1
2226   %3 = extractvalue [4 x <2 x float>] %b, 2
2227   %4 = extractvalue [4 x <2 x float>] %b, 3
2228   %5 = bitcast float* %a to i8*
2229   tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
2230   ret void
2231 }
2232
2233 define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b)  {
2234 ; CHECK-LABEL: test_vst1_f64_x4
2235 ; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2236 ; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
2237   %1 = extractvalue [4 x <1 x double>] %b, 0
2238   %2 = extractvalue [4 x <1 x double>] %b, 1
2239   %3 = extractvalue [4 x <1 x double>] %b, 2
2240   %4 = extractvalue [4 x <1 x double>] %b, 3
2241   %5 = bitcast double* %a to i8*
2242   tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
2243   ret void
2244 }
2245
2246 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
2247 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
2248 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
2249 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
2250 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
2251 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
2252 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
2253 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
2254 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
2255 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
2256 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
2257 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
2258 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
2259 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
2260 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
2261 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
2262 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
2263 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
2264 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
2265 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
2266 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
2267 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
2268 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
2269 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
2270 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
2271 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
2272 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
2273 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
2274 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
2275 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
2276 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
2277 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
2278 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
2279 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
2280 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
2281 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
2282 declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
2283 declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
2284 declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
2285 declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
2286 declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
2287 declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
2288 declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
2289 declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
2290 declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
2291 declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
2292 declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
2293 declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
2294 declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2295 declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2296 declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2297 declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2298 declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
2299 declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
2300 declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2301 declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2302 declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2303 declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2304 declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
2305 declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
2306 declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2307 declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2308 declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2309 declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2310 declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
2311 declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
2312 declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2313 declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2314 declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2315 declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2316 declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
2317 declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)