[AArch64] Fix the scalar NEON ACLE functions so that they return float/double
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl       : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18                       [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19                       SDTCisSameAs<0, 3>]>>;
20
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
23
24 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
25
26 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
27
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
31
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
35
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
39
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
43
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
45                                      SDTCisVT<2, i32>]>;
46 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
48
49 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
50 def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
51 def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
52 def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
53 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
54                        [SDTCisVec<0>]>>;
55 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
56                            [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
57 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
58                            [SDTCisVec<0>,  SDTCisSameAs<0, 1>,
59                            SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
60
61 //===----------------------------------------------------------------------===//
62 // Multiclasses
63 //===----------------------------------------------------------------------===//
64
65 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
66                                 string asmop, SDPatternOperator opnode8B,
67                                 SDPatternOperator opnode16B,
68                                 bit Commutable = 0> {
69   let isCommutable = Commutable in {
70     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
71                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
72                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
73                [(set (v8i8 VPR64:$Rd),
74                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
75                NoItinerary>;
76
77     def _16B : NeonI_3VSame<0b1, u, size, opcode,
78                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
79                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
80                [(set (v16i8 VPR128:$Rd),
81                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
82                NoItinerary>;
83   }
84
85 }
86
87 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
88                                   string asmop, SDPatternOperator opnode,
89                                   bit Commutable = 0> {
90   let isCommutable = Commutable in {
91     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
92               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
93               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
94               [(set (v4i16 VPR64:$Rd),
95                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
96               NoItinerary>;
97
98     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
99               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
100               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
101               [(set (v8i16 VPR128:$Rd),
102                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
103               NoItinerary>;
104
105     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
106               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
107               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
108               [(set (v2i32 VPR64:$Rd),
109                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
110               NoItinerary>;
111
112     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
113               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
114               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
115               [(set (v4i32 VPR128:$Rd),
116                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
117               NoItinerary>;
118   }
119 }
120 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
121                                   string asmop, SDPatternOperator opnode,
122                                   bit Commutable = 0>
123    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> {
124   let isCommutable = Commutable in {
125     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
126                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128                [(set (v8i8 VPR64:$Rd),
129                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
130                NoItinerary>;
131
132     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
133                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135                [(set (v16i8 VPR128:$Rd),
136                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137                NoItinerary>;
138   }
139 }
140
141 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
142                                    string asmop, SDPatternOperator opnode,
143                                    bit Commutable = 0>
144    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> {
145   let isCommutable = Commutable in {
146     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
147               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
148               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
149               [(set (v2i64 VPR128:$Rd),
150                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
151               NoItinerary>;
152   }
153 }
154
155 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
156 // but Result types can be integer or floating point types.
157 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
158                                  string asmop, SDPatternOperator opnode2S,
159                                  SDPatternOperator opnode4S,
160                                  SDPatternOperator opnode2D,
161                                  ValueType ResTy2S, ValueType ResTy4S,
162                                  ValueType ResTy2D, bit Commutable = 0> {
163   let isCommutable = Commutable in {
164     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
165               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
166               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
167               [(set (ResTy2S VPR64:$Rd),
168                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
169               NoItinerary>;
170
171     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
172               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
173               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
174               [(set (ResTy4S VPR128:$Rd),
175                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
176               NoItinerary>;
177
178     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
179               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
180               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
181               [(set (ResTy2D VPR128:$Rd),
182                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
183                NoItinerary>;
184   }
185 }
186
187 //===----------------------------------------------------------------------===//
188 // Instruction Definitions
189 //===----------------------------------------------------------------------===//
190
191 // Vector Arithmetic Instructions
192
193 // Vector Add (Integer and Floating-Point)
194
195 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
196 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
197                                      v2f32, v4f32, v2f64, 1>;
198
199 // Vector Sub (Integer and Floating-Point)
200
201 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
202 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
203                                      v2f32, v4f32, v2f64, 0>;
204
205 // Vector Multiply (Integer and Floating-Point)
206
207 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
208 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
209                                      v2f32, v4f32, v2f64, 1>;
210
211 // Vector Multiply (Polynomial)
212
213 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
214                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
215
216 // Vector Multiply-accumulate and Multiply-subtract (Integer)
217
218 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
219 // two operands constraints.
220 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
221   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, 
222   bits<5> opcode, SDPatternOperator opnode>
223   : NeonI_3VSame<q, u, size, opcode,
224     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
225     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
226     [(set (OpTy VPRC:$Rd),
227        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
228     NoItinerary> {
229   let Constraints = "$src = $Rd";
230 }
231
232 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
234
235 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
236                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
237
238
239 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
240                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
241 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
242                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
243 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
244                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
245 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
246                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
247 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
248                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
249 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
250                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
251
252 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
253                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
254 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
255                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
256 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
257                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
258 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
259                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
260 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
261                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
262 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
263                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
264
265 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
266
267 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268                         (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
269
270 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
271                         (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
272
273 let Predicates = [HasNEON, UseFusedMAC] in {
274 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
275                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
276 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
277                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
278 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
279                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
280
281 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
282                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
283 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
284                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
285 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
286                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
287 }
288
289 // We're also allowed to match the fma instruction regardless of compile
290 // options.
291 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
292           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
293 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
294           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
296           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
297
298 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
299           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
300 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
301           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
302 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
303           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
304
305 // Vector Divide (Floating-Point)
306
307 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
308                                      v2f32, v4f32, v2f64, 0>;
309
310 // Vector Bitwise Operations
311
312 // Vector Bitwise AND
313
314 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
315
316 // Vector Bitwise Exclusive OR
317
318 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
319
320 // Vector Bitwise OR
321
322 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
323
324 // ORR disassembled as MOV if Vn==Vm
325
326 // Vector Move - register
327 // Alias for ORR if Vn=Vm.
328 // FIXME: This is actually the preferred syntax but TableGen can't deal with
329 // custom printing of aliases.
330 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
331                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
332 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
333                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
334
335 // The MOVI instruction takes two immediate operands.  The first is the
336 // immediate encoding, while the second is the cmode.  A cmode of 14, or
337 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
338 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
339 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
340
341 def Neon_not8B  : PatFrag<(ops node:$in),
342                           (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
343 def Neon_not16B : PatFrag<(ops node:$in),
344                           (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
345
346 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
347                          (or node:$Rn, (Neon_not8B node:$Rm))>;
348
349 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
350                           (or node:$Rn, (Neon_not16B node:$Rm))>;
351
352 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
353                          (and node:$Rn, (Neon_not8B node:$Rm))>;
354
355 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
356                           (and node:$Rn, (Neon_not16B node:$Rm))>;
357
358
359 // Vector Bitwise OR NOT - register
360
361 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
362                                    Neon_orn8B, Neon_orn16B, 0>;
363
364 // Vector Bitwise Bit Clear (AND NOT) - register
365
366 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
367                                    Neon_bic8B, Neon_bic16B, 0>;
368
369 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
370                                    SDPatternOperator opnode16B,
371                                    Instruction INST8B,
372                                    Instruction INST16B> {
373   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
374             (INST8B VPR64:$Rn, VPR64:$Rm)>;
375   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
376             (INST8B VPR64:$Rn, VPR64:$Rm)>;
377   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
378             (INST8B VPR64:$Rn, VPR64:$Rm)>;
379   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
380             (INST16B VPR128:$Rn, VPR128:$Rm)>;
381   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
382             (INST16B VPR128:$Rn, VPR128:$Rm)>;
383   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
384             (INST16B VPR128:$Rn, VPR128:$Rm)>;
385 }
386
387 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
388 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
389 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
390 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
391 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
392 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
393
394 //   Vector Bitwise Select
395 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
396                                               0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
397
398 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
399                                               0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
400
401 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
402                                    Instruction INST8B,
403                                    Instruction INST16B> {
404   // Disassociate type from instruction definition
405   def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
406             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
407   def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
408             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
409   def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
410             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
411   def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
412             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
413   def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
414             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
415   def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
416             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
417
418   // Allow to match BSL instruction pattern with non-constant operand
419   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
420                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
421           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
422   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
423                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
424           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
425   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
426                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
427           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
428   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
429                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
430           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
431   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
432                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
433           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
434   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
435                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
436           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
437   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
438                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
439           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
440   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
441                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
442           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
443
444   // Allow to match llvm.arm.* intrinsics.
445   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
446                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
447             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
448   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
449                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
450             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
451   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
452                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
453             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
454   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
455                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
456             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
457   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
458                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
459             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
460   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
461                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
462             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
463   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
464                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
465             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
466   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
467                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
468             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
469   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
470                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
471             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
473                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
474             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
475   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
476                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
477             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
478 }
479
480 // Additional patterns for bitwise instruction BSL
481 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
482
483 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
484                            (Neon_bsl node:$src, node:$Rn, node:$Rm),
485                            [{ (void)N; return false; }]>;
486
487 // Vector Bitwise Insert if True
488
489 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
490                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
491 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
492                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
493
494 // Vector Bitwise Insert if False
495
496 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
497                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
498 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
499                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
500
501 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
502
503 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
504                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
505 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
506                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
507
508 // Vector Absolute Difference and Accumulate (Unsigned)
509 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
510                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
511 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
512                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
513 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
514                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
515 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
516                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
517 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
518                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
519 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
520                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
521
522 // Vector Absolute Difference and Accumulate (Signed)
523 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
524                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
525 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
526                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
527 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
528                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
529 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
530                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
531 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
532                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
533 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
534                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
535
536
537 // Vector Absolute Difference (Signed, Unsigned)
538 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
539 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
540
541 // Vector Absolute Difference (Floating Point)
542 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
543                                     int_arm_neon_vabds, int_arm_neon_vabds,
544                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
545
546 // Vector Reciprocal Step (Floating Point)
547 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
548                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
549                                        int_arm_neon_vrecps,
550                                        v2f32, v4f32, v2f64, 0>;
551
552 // Vector Reciprocal Square Root Step (Floating Point)
553 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
554                                         int_arm_neon_vrsqrts,
555                                         int_arm_neon_vrsqrts,
556                                         int_arm_neon_vrsqrts,
557                                         v2f32, v4f32, v2f64, 0>;
558
559 // Vector Comparisons
560
561 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
562                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
563 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
564                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
565 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
566                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
567 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
568                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
569 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
570                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
571
572 // NeonI_compare_aliases class: swaps register operands to implement
573 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
574 class NeonI_compare_aliases<string asmop, string asmlane,
575                             Instruction inst, RegisterOperand VPRC>
576   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
577                     ", $Rm" # asmlane,
578                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
579
580 // Vector Comparisons (Integer)
581
582 // Vector Compare Mask Equal (Integer)
583 let isCommutable =1 in {
584 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
585 }
586
587 // Vector Compare Mask Higher or Same (Unsigned Integer)
588 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
589
590 // Vector Compare Mask Greater Than or Equal (Integer)
591 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
592
593 // Vector Compare Mask Higher (Unsigned Integer)
594 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
595
596 // Vector Compare Mask Greater Than (Integer)
597 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
598
599 // Vector Compare Mask Bitwise Test (Integer)
600 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
601
602 // Vector Compare Mask Less or Same (Unsigned Integer)
603 // CMLS is alias for CMHS with operands reversed.
604 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
605 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
606 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
607 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
608 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
609 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
610 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
611
612 // Vector Compare Mask Less Than or Equal (Integer)
613 // CMLE is alias for CMGE with operands reversed.
614 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
615 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
616 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
617 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
618 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
619 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
620 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
621
622 // Vector Compare Mask Lower (Unsigned Integer)
623 // CMLO is alias for CMHI with operands reversed.
624 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
625 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
626 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
627 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
628 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
629 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
630 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
631
632 // Vector Compare Mask Less Than (Integer)
633 // CMLT is alias for CMGT with operands reversed.
634 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
635 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
636 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
637 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
638 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
639 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
640 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
641
642
643 def neon_uimm0_asmoperand : AsmOperandClass
644 {
645   let Name = "UImm0";
646   let PredicateMethod = "isUImm<0>";
647   let RenderMethod = "addImmOperands";
648 }
649
650 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
651   let ParserMatchClass = neon_uimm0_asmoperand;
652   let PrintMethod = "printNeonUImm0Operand";
653
654 }
655
656 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
657 {
658   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
659              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
660              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
661              [(set (v8i8 VPR64:$Rd),
662                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
663              NoItinerary>;
664
665   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
666              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
667              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
668              [(set (v16i8 VPR128:$Rd),
669                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
670              NoItinerary>;
671
672   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
673             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
674             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
675             [(set (v4i16 VPR64:$Rd),
676                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
677             NoItinerary>;
678
679   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
680             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
681             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
682             [(set (v8i16 VPR128:$Rd),
683                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
684             NoItinerary>;
685
686   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
687             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
688             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
689             [(set (v2i32 VPR64:$Rd),
690                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
691             NoItinerary>;
692
693   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
694             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
695             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
696             [(set (v4i32 VPR128:$Rd),
697                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
698             NoItinerary>;
699
700   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
701             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
702             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
703             [(set (v2i64 VPR128:$Rd),
704                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
705             NoItinerary>;
706 }
707
708 // Vector Compare Mask Equal to Zero (Integer)
709 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
710
711 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
712 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
713
714 // Vector Compare Mask Greater Than Zero (Signed Integer)
715 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
716
717 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
718 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
719
720 // Vector Compare Mask Less Than Zero (Signed Integer)
721 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
722
723 // Vector Comparisons (Floating Point)
724
725 // Vector Compare Mask Equal (Floating Point)
726 let isCommutable =1 in {
727 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
728                                       Neon_cmeq, Neon_cmeq,
729                                       v2i32, v4i32, v2i64, 0>;
730 }
731
732 // Vector Compare Mask Greater Than Or Equal (Floating Point)
733 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
734                                       Neon_cmge, Neon_cmge,
735                                       v2i32, v4i32, v2i64, 0>;
736
737 // Vector Compare Mask Greater Than (Floating Point)
738 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
739                                       Neon_cmgt, Neon_cmgt,
740                                       v2i32, v4i32, v2i64, 0>;
741
742 // Vector Compare Mask Less Than Or Equal (Floating Point)
743 // FCMLE is alias for FCMGE with operands reversed.
744 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
745 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
746 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
747
748 // Vector Compare Mask Less Than (Floating Point)
749 // FCMLT is alias for FCMGT with operands reversed.
750 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
751 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
752 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
753
754
755 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
756                               string asmop, CondCode CC>
757 {
758   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
759             (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
760             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
761             [(set (v2i32 VPR64:$Rd),
762                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
763             NoItinerary>;
764
765   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
766             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
767             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
768             [(set (v4i32 VPR128:$Rd),
769                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
770             NoItinerary>;
771
772   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
773             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
774             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
775             [(set (v2i64 VPR128:$Rd),
776                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
777             NoItinerary>;
778 }
779
780 // Vector Compare Mask Equal to Zero (Floating Point)
781 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
782
783 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
784 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
785
786 // Vector Compare Mask Greater Than Zero (Floating Point)
787 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
788
789 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
790 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
791
792 // Vector Compare Mask Less Than Zero (Floating Point)
793 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
794
795 // Vector Absolute Comparisons (Floating Point)
796
797 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
798 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
799                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
800                                       int_aarch64_neon_vacgeq,
801                                       v2i32, v4i32, v2i64, 0>;
802
803 // Vector Absolute Compare Mask Greater Than (Floating Point)
804 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
805                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
806                                       int_aarch64_neon_vacgtq,
807                                       v2i32, v4i32, v2i64, 0>;
808
809 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
810 // FACLE is alias for FACGE with operands reversed.
811 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
812 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
813 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
814
815 // Vector Absolute Compare Mask Less Than (Floating Point)
816 // FACLT is alias for FACGT with operands reversed.
817 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
818 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
819 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
820
821 // Vector halving add (Integer Signed, Unsigned)
822 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
823                                         int_arm_neon_vhadds, 1>;
824 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
825                                         int_arm_neon_vhaddu, 1>;
826
827 // Vector halving sub (Integer Signed, Unsigned)
828 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
829                                         int_arm_neon_vhsubs, 0>;
830 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
831                                         int_arm_neon_vhsubu, 0>;
832
833 // Vector rouding halving add (Integer Signed, Unsigned)
834 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
835                                          int_arm_neon_vrhadds, 1>;
836 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
837                                          int_arm_neon_vrhaddu, 1>;
838
839 // Vector Saturating add (Integer Signed, Unsigned)
840 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
841                    int_arm_neon_vqadds, 1>;
842 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
843                    int_arm_neon_vqaddu, 1>;
844
845 // Vector Saturating sub (Integer Signed, Unsigned)
846 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
847                    int_arm_neon_vqsubs, 1>;
848 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
849                    int_arm_neon_vqsubu, 1>;
850
851 // Vector Shift Left (Signed and Unsigned Integer)
852 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
853                  int_arm_neon_vshifts, 1>;
854 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
855                  int_arm_neon_vshiftu, 1>;
856
857 // Vector Saturating Shift Left (Signed and Unsigned Integer)
858 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
859                   int_arm_neon_vqshifts, 1>;
860 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
861                   int_arm_neon_vqshiftu, 1>;
862
863 // Vector Rouding Shift Left (Signed and Unsigned Integer)
864 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
865                   int_arm_neon_vrshifts, 1>;
866 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
867                   int_arm_neon_vrshiftu, 1>;
868
869 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
870 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
871                    int_arm_neon_vqrshifts, 1>;
872 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
873                    int_arm_neon_vqrshiftu, 1>;
874
875 // Vector Maximum (Signed and Unsigned Integer)
876 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
877 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
878
879 // Vector Minimum (Signed and Unsigned Integer)
880 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
881 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
882
883 // Vector Maximum (Floating Point)
884 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
885                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
886                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
887
888 // Vector Minimum (Floating Point)
889 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
890                                      int_arm_neon_vmins, int_arm_neon_vmins,
891                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
892
893 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
894 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
895                                        int_aarch64_neon_vmaxnm,
896                                        int_aarch64_neon_vmaxnm,
897                                        int_aarch64_neon_vmaxnm,
898                                        v2f32, v4f32, v2f64, 1>;
899
900 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
901 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
902                                        int_aarch64_neon_vminnm,
903                                        int_aarch64_neon_vminnm,
904                                        int_aarch64_neon_vminnm,
905                                        v2f32, v4f32, v2f64, 1>;
906
907 // Vector Maximum Pairwise (Signed and Unsigned Integer)
908 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
909 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
910
911 // Vector Minimum Pairwise (Signed and Unsigned Integer)
912 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
913 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
914
915 // Vector Maximum Pairwise (Floating Point)
916 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
917                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
918                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
919
920 // Vector Minimum Pairwise (Floating Point)
921 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
922                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
923                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
924
925 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
926 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
927                                        int_aarch64_neon_vpmaxnm,
928                                        int_aarch64_neon_vpmaxnm,
929                                        int_aarch64_neon_vpmaxnm,
930                                        v2f32, v4f32, v2f64, 1>;
931
932 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
933 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
934                                        int_aarch64_neon_vpminnm,
935                                        int_aarch64_neon_vpminnm,
936                                        int_aarch64_neon_vpminnm,
937                                        v2f32, v4f32, v2f64, 1>;
938
939 // Vector Addition Pairwise (Integer)
940 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
941
942 // Vector Addition Pairwise (Floating Point)
943 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
944                                        int_arm_neon_vpadd,
945                                        int_arm_neon_vpadd,
946                                        int_arm_neon_vpadd,
947                                        v2f32, v4f32, v2f64, 1>;
948
949 // Vector Saturating Doubling Multiply High
950 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
951                     int_arm_neon_vqdmulh, 1>;
952
953 // Vector Saturating Rouding Doubling Multiply High
954 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
955                      int_arm_neon_vqrdmulh, 1>;
956
957 // Vector Multiply Extended (Floating Point)
958 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
959                                       int_aarch64_neon_vmulx,
960                                       int_aarch64_neon_vmulx,
961                                       int_aarch64_neon_vmulx,
962                                       v2f32, v4f32, v2f64, 1>;
963
964 // Vector Immediate Instructions
965
966 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
967 {
968   def _asmoperand : AsmOperandClass
969     {
970       let Name = "NeonMovImmShift" # PREFIX;
971       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
972       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
973     }
974 }
975
976 // Definition of vector immediates shift operands
977
978 // The selectable use-cases extract the shift operation
979 // information from the OpCmode fields encoded in the immediate.
980 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
981   uint64_t OpCmode = N->getZExtValue();
982   unsigned ShiftImm;
983   unsigned ShiftOnesIn;
984   unsigned HasShift =
985     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
986   if (!HasShift) return SDValue();
987   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
988 }]>;
989
990 // Vector immediates shift operands which accept LSL and MSL
991 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
992 // or 0, 8 (LSLH) or 8, 16 (MSL).
993 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
994 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
995 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
996 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
997
998 multiclass neon_mov_imm_shift_operands<string PREFIX,
999                                        string HALF, string ISHALF, code pred>
1000 {
1001    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1002     {
1003       let PrintMethod =
1004         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1005       let DecoderMethod =
1006         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1007       let ParserMatchClass =
1008         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1009     }
1010 }
1011
1012 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1013   unsigned ShiftImm;
1014   unsigned ShiftOnesIn;
1015   unsigned HasShift =
1016     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1017   return (HasShift && !ShiftOnesIn);
1018 }]>;
1019
1020 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1021   unsigned ShiftImm;
1022   unsigned ShiftOnesIn;
1023   unsigned HasShift =
1024     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1025   return (HasShift && ShiftOnesIn);
1026 }]>;
1027
1028 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1029   unsigned ShiftImm;
1030   unsigned ShiftOnesIn;
1031   unsigned HasShift =
1032     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1033   return (HasShift && !ShiftOnesIn);
1034 }]>;
1035
1036 def neon_uimm1_asmoperand : AsmOperandClass
1037 {
1038   let Name = "UImm1";
1039   let PredicateMethod = "isUImm<1>";
1040   let RenderMethod = "addImmOperands";
1041 }
1042
1043 def neon_uimm2_asmoperand : AsmOperandClass
1044 {
1045   let Name = "UImm2";
1046   let PredicateMethod = "isUImm<2>";
1047   let RenderMethod = "addImmOperands";
1048 }
1049
1050 def neon_uimm8_asmoperand : AsmOperandClass
1051 {
1052   let Name = "UImm8";
1053   let PredicateMethod = "isUImm<8>";
1054   let RenderMethod = "addImmOperands";
1055 }
1056
1057 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1058   let ParserMatchClass = neon_uimm8_asmoperand;
1059   let PrintMethod = "printUImmHexOperand";
1060 }
1061
1062 def neon_uimm64_mask_asmoperand : AsmOperandClass
1063 {
1064   let Name = "NeonUImm64Mask";
1065   let PredicateMethod = "isNeonUImm64Mask";
1066   let RenderMethod = "addNeonUImm64MaskOperands";
1067 }
1068
1069 // MCOperand for 64-bit bytemask with each byte having only the
1070 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1071 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1072   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1073   let PrintMethod = "printNeonUImm64MaskOperand";
1074 }
1075
1076 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1077                                    SDPatternOperator opnode>
1078 {
1079     // shift zeros, per word
1080     def _2S  : NeonI_1VModImm<0b0, op,
1081                               (outs VPR64:$Rd),
1082                               (ins neon_uimm8:$Imm,
1083                                 neon_mov_imm_LSL_operand:$Simm),
1084                               !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1085                               [(set (v2i32 VPR64:$Rd),
1086                                  (v2i32 (opnode (timm:$Imm),
1087                                    (neon_mov_imm_LSL_operand:$Simm))))],
1088                               NoItinerary> {
1089        bits<2> Simm;
1090        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1091      }
1092
1093     def _4S  : NeonI_1VModImm<0b1, op,
1094                               (outs VPR128:$Rd),
1095                               (ins neon_uimm8:$Imm,
1096                                 neon_mov_imm_LSL_operand:$Simm),
1097                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1098                               [(set (v4i32 VPR128:$Rd),
1099                                  (v4i32 (opnode (timm:$Imm),
1100                                    (neon_mov_imm_LSL_operand:$Simm))))],
1101                               NoItinerary> {
1102       bits<2> Simm;
1103       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1104     }
1105
1106     // shift zeros, per halfword
1107     def _4H  : NeonI_1VModImm<0b0, op,
1108                               (outs VPR64:$Rd),
1109                               (ins neon_uimm8:$Imm,
1110                                 neon_mov_imm_LSLH_operand:$Simm),
1111                               !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1112                               [(set (v4i16 VPR64:$Rd),
1113                                  (v4i16 (opnode (timm:$Imm),
1114                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1115                               NoItinerary> {
1116       bit  Simm;
1117       let cmode = {0b1, 0b0, Simm, 0b0};
1118     }
1119
1120     def _8H  : NeonI_1VModImm<0b1, op,
1121                               (outs VPR128:$Rd),
1122                               (ins neon_uimm8:$Imm,
1123                                 neon_mov_imm_LSLH_operand:$Simm),
1124                               !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1125                               [(set (v8i16 VPR128:$Rd),
1126                                  (v8i16 (opnode (timm:$Imm),
1127                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1128                               NoItinerary> {
1129       bit Simm;
1130       let cmode = {0b1, 0b0, Simm, 0b0};
1131      }
1132 }
1133
1134 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1135                                                    SDPatternOperator opnode,
1136                                                    SDPatternOperator neonopnode>
1137 {
1138   let Constraints = "$src = $Rd" in {
1139     // shift zeros, per word
1140     def _2S  : NeonI_1VModImm<0b0, op,
1141                  (outs VPR64:$Rd),
1142                  (ins VPR64:$src, neon_uimm8:$Imm,
1143                    neon_mov_imm_LSL_operand:$Simm),
1144                  !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1145                  [(set (v2i32 VPR64:$Rd),
1146                     (v2i32 (opnode (v2i32 VPR64:$src),
1147                       (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1148                         neon_mov_imm_LSL_operand:$Simm)))))))],
1149                  NoItinerary> {
1150       bits<2> Simm;
1151       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1152     }
1153
1154     def _4S  : NeonI_1VModImm<0b1, op,
1155                  (outs VPR128:$Rd),
1156                  (ins VPR128:$src, neon_uimm8:$Imm,
1157                    neon_mov_imm_LSL_operand:$Simm),
1158                  !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1159                  [(set (v4i32 VPR128:$Rd),
1160                     (v4i32 (opnode (v4i32 VPR128:$src),
1161                       (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1162                         neon_mov_imm_LSL_operand:$Simm)))))))],
1163                  NoItinerary> {
1164       bits<2> Simm;
1165       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1166     }
1167
1168     // shift zeros, per halfword
1169     def _4H  : NeonI_1VModImm<0b0, op,
1170                  (outs VPR64:$Rd),
1171                  (ins VPR64:$src, neon_uimm8:$Imm,
1172                    neon_mov_imm_LSLH_operand:$Simm),
1173                  !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1174                  [(set (v4i16 VPR64:$Rd),
1175                     (v4i16 (opnode (v4i16 VPR64:$src),
1176                        (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1177                           neon_mov_imm_LSL_operand:$Simm)))))))],
1178                  NoItinerary> {
1179       bit  Simm;
1180       let cmode = {0b1, 0b0, Simm, 0b1};
1181     }
1182
1183     def _8H  : NeonI_1VModImm<0b1, op,
1184                  (outs VPR128:$Rd),
1185                  (ins VPR128:$src, neon_uimm8:$Imm,
1186                    neon_mov_imm_LSLH_operand:$Simm),
1187                  !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1188                  [(set (v8i16 VPR128:$Rd),
1189                     (v8i16 (opnode (v8i16 VPR128:$src),
1190                       (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1191                         neon_mov_imm_LSL_operand:$Simm)))))))],
1192                  NoItinerary> {
1193       bit Simm;
1194       let cmode = {0b1, 0b0, Simm, 0b1};
1195     }
1196   }
1197 }
1198
1199 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1200                                    SDPatternOperator opnode>
1201 {
1202     // shift ones, per word
1203     def _2S  : NeonI_1VModImm<0b0, op,
1204                              (outs VPR64:$Rd),
1205                              (ins neon_uimm8:$Imm,
1206                                neon_mov_imm_MSL_operand:$Simm),
1207                              !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1208                               [(set (v2i32 VPR64:$Rd),
1209                                  (v2i32 (opnode (timm:$Imm),
1210                                    (neon_mov_imm_MSL_operand:$Simm))))],
1211                              NoItinerary> {
1212        bit Simm;
1213        let cmode = {0b1, 0b1, 0b0, Simm};
1214      }
1215
1216    def _4S  : NeonI_1VModImm<0b1, op,
1217                               (outs VPR128:$Rd),
1218                               (ins neon_uimm8:$Imm,
1219                                 neon_mov_imm_MSL_operand:$Simm),
1220                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1221                               [(set (v4i32 VPR128:$Rd),
1222                                  (v4i32 (opnode (timm:$Imm),
1223                                    (neon_mov_imm_MSL_operand:$Simm))))],
1224                               NoItinerary> {
1225      bit Simm;
1226      let cmode = {0b1, 0b1, 0b0, Simm};
1227    }
1228 }
1229
1230 // Vector Move Immediate Shifted
1231 let isReMaterializable = 1 in {
1232 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1233 }
1234
1235 // Vector Move Inverted Immediate Shifted
1236 let isReMaterializable = 1 in {
1237 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1238 }
1239
1240 // Vector Bitwise Bit Clear (AND NOT) - immediate
1241 let isReMaterializable = 1 in {
1242 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1243                                                          and, Neon_mvni>;
1244 }
1245
1246 // Vector Bitwise OR - immedidate
1247
1248 let isReMaterializable = 1 in {
1249 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1250                                                            or, Neon_movi>;
1251 }
1252
1253 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1254 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1255 // BIC immediate instructions selection requires additional patterns to
1256 // transform Neon_movi operands into BIC immediate operands
1257
1258 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1259   uint64_t OpCmode = N->getZExtValue();
1260   unsigned ShiftImm;
1261   unsigned ShiftOnesIn;
1262   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1263   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1264   // Transform encoded shift amount 0 to 1 and 1 to 0.
1265   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1266 }]>;
1267
1268 def neon_mov_imm_LSLH_transform_operand
1269   : ImmLeaf<i32, [{
1270     unsigned ShiftImm;
1271     unsigned ShiftOnesIn;
1272     unsigned HasShift =
1273       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1274     return (HasShift && !ShiftOnesIn); }],
1275   neon_mov_imm_LSLH_transform_XFORM>;
1276
1277 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1278 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1279 def : Pat<(v4i16 (and VPR64:$src,
1280             (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1281           (BICvi_lsl_4H VPR64:$src, 0,
1282             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1283
1284 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1285 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1286 def : Pat<(v8i16 (and VPR128:$src,
1287             (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1288           (BICvi_lsl_8H VPR128:$src, 0,
1289             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1290
1291
1292 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1293                                    SDPatternOperator neonopnode,
1294                                    Instruction INST4H,
1295                                    Instruction INST8H> {
1296   def : Pat<(v8i8 (opnode VPR64:$src,
1297                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1298                       neon_mov_imm_LSLH_operand:$Simm))))),
1299             (INST4H VPR64:$src, neon_uimm8:$Imm,
1300               neon_mov_imm_LSLH_operand:$Simm)>;
1301   def : Pat<(v1i64 (opnode VPR64:$src,
1302                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1303                     neon_mov_imm_LSLH_operand:$Simm))))),
1304           (INST4H VPR64:$src, neon_uimm8:$Imm,
1305             neon_mov_imm_LSLH_operand:$Simm)>;
1306
1307   def : Pat<(v16i8 (opnode VPR128:$src,
1308                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1309                      neon_mov_imm_LSLH_operand:$Simm))))),
1310           (INST8H VPR128:$src, neon_uimm8:$Imm,
1311             neon_mov_imm_LSLH_operand:$Simm)>;
1312   def : Pat<(v4i32 (opnode VPR128:$src,
1313                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1314                      neon_mov_imm_LSLH_operand:$Simm))))),
1315           (INST8H VPR128:$src, neon_uimm8:$Imm,
1316             neon_mov_imm_LSLH_operand:$Simm)>;
1317   def : Pat<(v2i64 (opnode VPR128:$src,
1318                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1319                      neon_mov_imm_LSLH_operand:$Simm))))),
1320           (INST8H VPR128:$src, neon_uimm8:$Imm,
1321             neon_mov_imm_LSLH_operand:$Simm)>;
1322 }
1323
1324 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1325 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1326
1327 // Additional patterns for Vector Bitwise OR - immedidate
1328 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1329
1330
1331 // Vector Move Immediate Masked
1332 let isReMaterializable = 1 in {
1333 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1334 }
1335
1336 // Vector Move Inverted Immediate Masked
1337 let isReMaterializable = 1 in {
1338 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1339 }
1340
1341 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1342                                 Instruction inst, RegisterOperand VPRC>
1343   : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1344                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1345
1346 // Aliases for Vector Move Immediate Shifted
1347 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1348 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1349 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1350 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1351
1352 // Aliases for Vector Move Inverted Immediate Shifted
1353 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1354 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1355 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1356 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1357
1358 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1359 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1360 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1361 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1362 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1363
1364 // Aliases for Vector Bitwise OR - immedidate
1365 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1366 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1367 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1368 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1369
1370 //  Vector Move Immediate - per byte
1371 let isReMaterializable = 1 in {
1372 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1373                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1374                                "movi\t$Rd.8b, $Imm",
1375                                [(set (v8i8 VPR64:$Rd),
1376                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1377                                 NoItinerary> {
1378   let cmode = 0b1110;
1379 }
1380
1381 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1382                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1383                                 "movi\t$Rd.16b, $Imm",
1384                                 [(set (v16i8 VPR128:$Rd),
1385                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1386                                  NoItinerary> {
1387   let cmode = 0b1110;
1388 }
1389 }
1390
1391 // Vector Move Immediate - bytemask, per double word
1392 let isReMaterializable = 1 in {
1393 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1394                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1395                                "movi\t $Rd.2d, $Imm",
1396                                [(set (v2i64 VPR128:$Rd),
1397                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1398                                NoItinerary> {
1399   let cmode = 0b1110;
1400 }
1401 }
1402
1403 // Vector Move Immediate - bytemask, one doubleword
1404
1405 let isReMaterializable = 1 in {
1406 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1407                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1408                            "movi\t $Rd, $Imm",
1409                            [(set (f64 FPR64:$Rd),
1410                               (f64 (bitconvert
1411                                 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1412                            NoItinerary> {
1413   let cmode = 0b1110;
1414 }
1415 }
1416
1417 // Vector Floating Point Move Immediate
1418
1419 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1420                       Operand immOpType, bit q, bit op>
1421   : NeonI_1VModImm<q, op,
1422                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1423                    "fmov\t$Rd" # asmlane # ", $Imm",
1424                    [(set (OpTy VPRC:$Rd),
1425                       (OpTy (Neon_fmovi (timm:$Imm))))],
1426                    NoItinerary> {
1427      let cmode = 0b1111;
1428    }
1429
1430 let isReMaterializable = 1 in {
1431 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1432 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1433 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1434 }
1435
1436 // Vector Shift (Immediate) 
1437 // Immediate in [0, 63]
1438 def imm0_63 : Operand<i32> {
1439   let ParserMatchClass = uimm6_asmoperand;
1440 }
1441
1442 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1443 // as follows:
1444 //
1445 //    Offset    Encoding
1446 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1447 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1448 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1449 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1450 //
1451 // The shift right immediate amount, in the range 1 to element bits, is computed
1452 // as Offset - UInt(immh:immb).  The shift left immediate amount, in the range 0
1453 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1454
1455 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1456   let Name = "ShrImm" # OFFSET;
1457   let RenderMethod = "addImmOperands";
1458   let DiagnosticType = "ShrImm" # OFFSET;
1459 }
1460
1461 class shr_imm<string OFFSET> : Operand<i32> {
1462   let EncoderMethod = "getShiftRightImm" # OFFSET;
1463   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1464   let ParserMatchClass = 
1465     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1466 }
1467
1468 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1469 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1470 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1471 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1472
1473 def shr_imm8 : shr_imm<"8">;
1474 def shr_imm16 : shr_imm<"16">;
1475 def shr_imm32 : shr_imm<"32">;
1476 def shr_imm64 : shr_imm<"64">;
1477
1478 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1479   let Name = "ShlImm" # OFFSET;
1480   let RenderMethod = "addImmOperands";
1481   let DiagnosticType = "ShlImm" # OFFSET;
1482 }
1483
1484 class shl_imm<string OFFSET> : Operand<i32> {
1485   let EncoderMethod = "getShiftLeftImm" # OFFSET;
1486   let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1487   let ParserMatchClass = 
1488     !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1489 }
1490
1491 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1492 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1493 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1494 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1495
1496 def shl_imm8 : shl_imm<"8">;
1497 def shl_imm16 : shl_imm<"16">;
1498 def shl_imm32 : shl_imm<"32">;
1499 def shl_imm64 : shl_imm<"64">;
1500
1501 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1502                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1503   : NeonI_2VShiftImm<q, u, opcode,
1504                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1505                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1506                      [(set (Ty VPRC:$Rd),
1507                         (Ty (OpNode (Ty VPRC:$Rn),
1508                           (Ty (Neon_vdup (i32 imm:$Imm))))))],
1509                      NoItinerary>;
1510
1511 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1512   // 64-bit vector types.
1513   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1514     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1515   }
1516
1517   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1518     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1519   }
1520
1521   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1522     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1523   }
1524
1525   // 128-bit vector types.
1526   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1527     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1528   }
1529
1530   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1531     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1532   }
1533
1534   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1535     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1536   }
1537
1538   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1539     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1540   }
1541 }
1542
1543 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1544   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1545                      OpNode> {
1546     let Inst{22-19} = 0b0001;
1547   }
1548
1549   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1550                      OpNode> {
1551     let Inst{22-20} = 0b001;
1552   }
1553
1554   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1555                      OpNode> {
1556      let Inst{22-21} = 0b01;
1557   }
1558
1559   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1560                       OpNode> {
1561                       let Inst{22-19} = 0b0001;
1562                     }
1563
1564   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1565                      OpNode> {
1566                      let Inst{22-20} = 0b001;
1567                     }
1568
1569   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1570                      OpNode> {
1571                       let Inst{22-21} = 0b01;
1572                     }
1573
1574   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1575                      OpNode> {
1576                       let Inst{22} = 0b1;
1577                     }
1578 }
1579
1580 // Shift left
1581 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1582
1583 // Shift right
1584 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1585 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1586
1587 def Neon_High16B : PatFrag<(ops node:$in),
1588                            (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1589 def Neon_High8H  : PatFrag<(ops node:$in),
1590                            (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1591 def Neon_High4S  : PatFrag<(ops node:$in),
1592                            (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1593
1594 def Neon_low8H : PatFrag<(ops node:$in),
1595                          (v4i16 (extract_subvector (v8i16 node:$in),
1596                                                    (iPTR 0)))>;
1597 def Neon_low4S : PatFrag<(ops node:$in),
1598                          (v2i32 (extract_subvector (v4i32 node:$in),
1599                                                    (iPTR 0)))>;
1600 def Neon_low4f : PatFrag<(ops node:$in),
1601                          (v2f32 (extract_subvector (v4f32 node:$in),
1602                                                    (iPTR 0)))>;
1603
1604 def neon_uimm3_shift : Operand<i32>,
1605                          ImmLeaf<i32, [{return Imm < 8;}]> {
1606   let ParserMatchClass = uimm3_asmoperand;
1607 }
1608
1609 def neon_uimm4_shift : Operand<i32>,
1610                          ImmLeaf<i32, [{return Imm < 16;}]> {
1611   let ParserMatchClass = uimm4_asmoperand;
1612 }
1613
1614 def neon_uimm5_shift : Operand<i32>,
1615                          ImmLeaf<i32, [{return Imm < 32;}]> {
1616   let ParserMatchClass = uimm5_asmoperand;
1617 }
1618
1619 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1620                    string SrcT, ValueType DestTy, ValueType SrcTy,
1621                    Operand ImmTy, SDPatternOperator ExtOp>
1622   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1623                      (ins VPR64:$Rn, ImmTy:$Imm),
1624                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1625                      [(set (DestTy VPR128:$Rd),
1626                         (DestTy (shl
1627                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1628                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1629                      NoItinerary>;
1630
1631 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1632                        string SrcT, ValueType DestTy, ValueType SrcTy,
1633                        int StartIndex, Operand ImmTy,
1634                        SDPatternOperator ExtOp, PatFrag getTop>
1635   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1636                      (ins VPR128:$Rn, ImmTy:$Imm),
1637                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1638                      [(set (DestTy VPR128:$Rd),
1639                         (DestTy (shl
1640                           (DestTy (ExtOp
1641                             (SrcTy (getTop VPR128:$Rn)))),
1642                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1643                      NoItinerary>;
1644
1645 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1646                          SDNode ExtOp> {
1647   // 64-bit vector types.
1648   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1649                          neon_uimm3_shift, ExtOp> {
1650     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1651   }
1652
1653   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1654                          neon_uimm4_shift, ExtOp> {
1655     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1656   }
1657
1658   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1659                          neon_uimm5_shift, ExtOp> {
1660     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1661   }
1662
1663   // 128-bit vector types
1664   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1665                               8, neon_uimm3_shift, ExtOp, Neon_High16B> {
1666     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1667   }
1668
1669   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1670                              4, neon_uimm4_shift, ExtOp, Neon_High8H> {
1671     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1672   }
1673
1674   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1675                              2, neon_uimm5_shift, ExtOp, Neon_High4S> {
1676     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1677   }
1678
1679   // Use other patterns to match when the immediate is 0.
1680   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1681             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1682
1683   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1684             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1685
1686   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1687             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1688
1689   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1690             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1691
1692   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1693             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1694
1695   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1696             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1697 }
1698
1699 // Shift left long
1700 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1701 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1702
1703 // Rounding/Saturating shift
1704 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1705                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1706                   SDPatternOperator OpNode>
1707   : NeonI_2VShiftImm<q, u, opcode,
1708                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1709                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1710                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1711                         (i32 imm:$Imm))))],
1712                      NoItinerary>;
1713
1714 // shift right (vector by immediate)
1715 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1716                            SDPatternOperator OpNode> {
1717   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1718                          OpNode> {
1719     let Inst{22-19} = 0b0001;
1720   }
1721
1722   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1723                          OpNode> {
1724     let Inst{22-20} = 0b001;
1725   }
1726
1727   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1728                          OpNode> {
1729     let Inst{22-21} = 0b01;
1730   }
1731
1732   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1733                          OpNode> {
1734     let Inst{22-19} = 0b0001;
1735   }
1736
1737   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1738                         OpNode> {
1739     let Inst{22-20} = 0b001;
1740   }
1741
1742   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1743                         OpNode> {
1744     let Inst{22-21} = 0b01;
1745   }
1746
1747   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1748                         OpNode> {
1749     let Inst{22} = 0b1;
1750   }
1751 }
1752
1753 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1754                           SDPatternOperator OpNode> {
1755   // 64-bit vector types.
1756   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1757                         OpNode> {
1758     let Inst{22-19} = 0b0001;
1759   }
1760
1761   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1762                         OpNode> {
1763     let Inst{22-20} = 0b001;
1764   }
1765
1766   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1767                         OpNode> {
1768     let Inst{22-21} = 0b01;
1769   }
1770
1771   // 128-bit vector types.
1772   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1773                          OpNode> {
1774     let Inst{22-19} = 0b0001;
1775   }
1776
1777   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1778                         OpNode> {
1779     let Inst{22-20} = 0b001;
1780   }
1781
1782   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1783                         OpNode> {
1784     let Inst{22-21} = 0b01;
1785   }
1786
1787   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1788                         OpNode> {
1789     let Inst{22} = 0b1;
1790   }
1791 }
1792
1793 // Rounding shift right
1794 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1795                                 int_aarch64_neon_vsrshr>;
1796 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1797                                 int_aarch64_neon_vurshr>;
1798
1799 // Saturating shift left unsigned
1800 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1801
1802 // Saturating shift left
1803 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1804 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1805
1806 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1807                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1808                   SDNode OpNode>
1809   : NeonI_2VShiftImm<q, u, opcode,
1810            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1811            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1812            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1813               (Ty (OpNode (Ty VPRC:$Rn),
1814                 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1815            NoItinerary> {
1816   let Constraints = "$src = $Rd";
1817 }
1818
1819 // Shift Right accumulate
1820 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1821   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1822                         OpNode> {
1823     let Inst{22-19} = 0b0001;
1824   }
1825
1826   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1827                         OpNode> {
1828     let Inst{22-20} = 0b001;
1829   }
1830
1831   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1832                         OpNode> {
1833     let Inst{22-21} = 0b01;
1834   }
1835
1836   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1837                          OpNode> {
1838     let Inst{22-19} = 0b0001;
1839   }
1840
1841   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1842                         OpNode> {
1843     let Inst{22-20} = 0b001;
1844   }
1845
1846   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1847                         OpNode> {
1848     let Inst{22-21} = 0b01;
1849   }
1850
1851   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1852                         OpNode> {
1853     let Inst{22} = 0b1;
1854   }
1855 }
1856
1857 // Shift right and accumulate
1858 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1859 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1860
1861 // Rounding shift accumulate
1862 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1863                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1864                     SDPatternOperator OpNode>
1865   : NeonI_2VShiftImm<q, u, opcode,
1866                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1867                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1868                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1869                         (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1870                      NoItinerary> {
1871   let Constraints = "$src = $Rd";
1872 }
1873
1874 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1875                              SDPatternOperator OpNode> {
1876   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1877                           OpNode> {
1878     let Inst{22-19} = 0b0001;
1879   }
1880
1881   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1882                           OpNode> {
1883     let Inst{22-20} = 0b001;
1884   }
1885
1886   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1887                           OpNode> {
1888     let Inst{22-21} = 0b01;
1889   }
1890
1891   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1892                            OpNode> {
1893     let Inst{22-19} = 0b0001;
1894   }
1895
1896   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1897                           OpNode> {
1898     let Inst{22-20} = 0b001;
1899   }
1900
1901   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1902                           OpNode> {
1903     let Inst{22-21} = 0b01;
1904   }
1905
1906   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1907                           OpNode> {
1908     let Inst{22} = 0b1;
1909   }
1910 }
1911
1912 // Rounding shift right and accumulate
1913 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1914 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1915
1916 // Shift insert by immediate
1917 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1918                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1919                   SDPatternOperator OpNode>
1920     : NeonI_2VShiftImm<q, u, opcode,
1921            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1922            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1923            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1924              (i32 imm:$Imm))))],
1925            NoItinerary> {
1926   let Constraints = "$src = $Rd";
1927 }
1928
1929 // shift left insert (vector by immediate)
1930 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1931   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1932                         int_aarch64_neon_vsli> {
1933     let Inst{22-19} = 0b0001;
1934   }
1935
1936   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1937                         int_aarch64_neon_vsli> {
1938     let Inst{22-20} = 0b001;
1939   }
1940
1941   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1942                         int_aarch64_neon_vsli> {
1943     let Inst{22-21} = 0b01;
1944   }
1945
1946     // 128-bit vector types
1947   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1948                          int_aarch64_neon_vsli> {
1949     let Inst{22-19} = 0b0001;
1950   }
1951
1952   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1953                         int_aarch64_neon_vsli> {
1954     let Inst{22-20} = 0b001;
1955   }
1956
1957   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1958                         int_aarch64_neon_vsli> {
1959     let Inst{22-21} = 0b01;
1960   }
1961
1962   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1963                         int_aarch64_neon_vsli> {
1964     let Inst{22} = 0b1;
1965   }
1966 }
1967
1968 // shift right insert (vector by immediate)
1969 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1970     // 64-bit vector types.
1971   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1972                         int_aarch64_neon_vsri> {
1973     let Inst{22-19} = 0b0001;
1974   }
1975
1976   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1977                         int_aarch64_neon_vsri> {
1978     let Inst{22-20} = 0b001;
1979   }
1980
1981   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1982                         int_aarch64_neon_vsri> {
1983     let Inst{22-21} = 0b01;
1984   }
1985
1986     // 128-bit vector types
1987   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1988                          int_aarch64_neon_vsri> {
1989     let Inst{22-19} = 0b0001;
1990   }
1991
1992   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1993                         int_aarch64_neon_vsri> {
1994     let Inst{22-20} = 0b001;
1995   }
1996
1997   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1998                         int_aarch64_neon_vsri> {
1999     let Inst{22-21} = 0b01;
2000   }
2001
2002   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2003                         int_aarch64_neon_vsri> {
2004     let Inst{22} = 0b1;
2005   }
2006 }
2007
2008 // Shift left and insert
2009 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2010
2011 // Shift right and insert
2012 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2013
2014 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2015                     string SrcT, Operand ImmTy>
2016   : NeonI_2VShiftImm<q, u, opcode,
2017                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2018                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2019                      [], NoItinerary>;
2020
2021 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2022                        string SrcT, Operand ImmTy>
2023   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2024                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2025                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2026                      [], NoItinerary> {
2027   let Constraints = "$src = $Rd";
2028 }
2029
2030 // left long shift by immediate
2031 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2032   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2033     let Inst{22-19} = 0b0001;
2034   }
2035
2036   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2037     let Inst{22-20} = 0b001;
2038   }
2039
2040   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2041     let Inst{22-21} = 0b01;
2042   }
2043
2044   // Shift Narrow High
2045   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2046                               shr_imm8> {
2047     let Inst{22-19} = 0b0001;
2048   }
2049
2050   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2051                              shr_imm16> {
2052     let Inst{22-20} = 0b001;
2053   }
2054
2055   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2056                              shr_imm32> {
2057     let Inst{22-21} = 0b01;
2058   }
2059 }
2060
2061 // Shift right narrow
2062 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2063
2064 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2065 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2066 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2067 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2068 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2069 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2070 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2071 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2072
2073 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2074                               (v2i64 (concat_vectors (v1i64 node:$Rm),
2075                                                      (v1i64 node:$Rn)))>;
2076 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2077                               (v8i16 (concat_vectors (v4i16 node:$Rm),
2078                                                      (v4i16 node:$Rn)))>;
2079 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2080                               (v4i32 (concat_vectors (v2i32 node:$Rm),
2081                                                      (v2i32 node:$Rn)))>;
2082 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2083                               (v4f32 (concat_vectors (v2f32 node:$Rm),
2084                                                      (v2f32 node:$Rn)))>;
2085 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2086                               (v2f64 (concat_vectors (v1f64 node:$Rm),
2087                                                      (v1f64 node:$Rn)))>;
2088
2089 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2090                              (v8i16 (srl (v8i16 node:$lhs),
2091                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2092 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2093                              (v4i32 (srl (v4i32 node:$lhs),
2094                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2095 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2096                              (v2i64 (srl (v2i64 node:$lhs),
2097                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2098 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2099                              (v8i16 (sra (v8i16 node:$lhs),
2100                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2101 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2102                              (v4i32 (sra (v4i32 node:$lhs),
2103                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2104 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2105                              (v2i64 (sra (v2i64 node:$lhs),
2106                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2107
2108 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2109 multiclass Neon_shiftNarrow_patterns<string shr> {
2110   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2111               (i32 imm:$Imm)))),
2112             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2113   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2114               (i32 imm:$Imm)))),
2115             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2116   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2117               (i32 imm:$Imm)))),
2118             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2119
2120   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2121               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2122                 VPR128:$Rn, (i32 imm:$Imm))))))),
2123             (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2124                          VPR128:$Rn, imm:$Imm)>;
2125   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2126               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2127                 VPR128:$Rn, (i32 imm:$Imm))))))),
2128             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2129                         VPR128:$Rn, imm:$Imm)>;
2130   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2131               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2132                 VPR128:$Rn, (i32 imm:$Imm))))))),
2133             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2134                         VPR128:$Rn, imm:$Imm)>;
2135 }
2136
2137 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2138   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2139             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2140   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2141             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2142   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2143             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2144
2145   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2146                 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2147             (!cast<Instruction>(prefix # "_16B")
2148                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2149                 VPR128:$Rn, imm:$Imm)>;
2150   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2151                 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2152             (!cast<Instruction>(prefix # "_8H")
2153                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2154                 VPR128:$Rn, imm:$Imm)>;
2155   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2156                 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2157             (!cast<Instruction>(prefix # "_4S")
2158                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2159                   VPR128:$Rn, imm:$Imm)>;
2160 }
2161
2162 defm : Neon_shiftNarrow_patterns<"lshr">;
2163 defm : Neon_shiftNarrow_patterns<"ashr">;
2164
2165 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2166 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2167 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2168 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2169 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2170 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2171 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2172
2173 // Convert fix-point and float-pointing
2174 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2175                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2176                 Operand ImmTy, SDPatternOperator IntOp>
2177   : NeonI_2VShiftImm<q, u, opcode,
2178                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2179                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2180                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2181                        (i32 imm:$Imm))))],
2182                      NoItinerary>;
2183
2184 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2185                               SDPatternOperator IntOp> {
2186   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2187                       shr_imm32, IntOp> {
2188     let Inst{22-21} = 0b01;
2189   }
2190
2191   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2192                       shr_imm32, IntOp> {
2193     let Inst{22-21} = 0b01;
2194   }
2195
2196   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2197                       shr_imm64, IntOp> {
2198     let Inst{22} = 0b1;
2199   }
2200 }
2201
2202 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2203                               SDPatternOperator IntOp> {
2204   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2205                       shr_imm32, IntOp> {
2206     let Inst{22-21} = 0b01;
2207   }
2208
2209   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2210                       shr_imm32, IntOp> {
2211     let Inst{22-21} = 0b01;
2212   }
2213
2214   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2215                       shr_imm64, IntOp> {
2216     let Inst{22} = 0b1;
2217   }
2218 }
2219
2220 // Convert fixed-point to floating-point
2221 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2222                                    int_arm_neon_vcvtfxs2fp>;
2223 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2224                                    int_arm_neon_vcvtfxu2fp>;
2225
2226 // Convert floating-point to fixed-point
2227 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2228                                    int_arm_neon_vcvtfp2fxs>;
2229 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2230                                    int_arm_neon_vcvtfp2fxu>;
2231
2232 multiclass Neon_sshll2_0<SDNode ext>
2233 {
2234   def _v8i8  : PatFrag<(ops node:$Rn),
2235                        (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2236   def _v4i16 : PatFrag<(ops node:$Rn),
2237                        (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2238   def _v2i32 : PatFrag<(ops node:$Rn),
2239                        (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2240 }
2241
2242 defm NI_sext_high : Neon_sshll2_0<sext>;
2243 defm NI_zext_high : Neon_sshll2_0<zext>;
2244
2245
2246 //===----------------------------------------------------------------------===//
2247 // Multiclasses for NeonI_Across
2248 //===----------------------------------------------------------------------===//
2249
2250 // Variant 1
2251
2252 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2253                             string asmop, SDPatternOperator opnode>
2254 {
2255     def _1h8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2256                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2257                 asmop # "\t$Rd, $Rn.8b",
2258                 [(set (v1i16 FPR16:$Rd),
2259                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2260                 NoItinerary>;
2261
2262     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2263                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2264                 asmop # "\t$Rd, $Rn.16b",
2265                 [(set (v1i16 FPR16:$Rd),
2266                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2267                 NoItinerary>;
2268
2269     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2270                 (outs FPR32:$Rd), (ins VPR64:$Rn),
2271                 asmop # "\t$Rd, $Rn.4h",
2272                 [(set (v1i32 FPR32:$Rd),
2273                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2274                 NoItinerary>;
2275
2276     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2277                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2278                 asmop # "\t$Rd, $Rn.8h",
2279                 [(set (v1i32 FPR32:$Rd),
2280                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2281                 NoItinerary>;
2282
2283     // _1d2s doesn't exist!
2284
2285     def _1d4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2286                 (outs FPR64:$Rd), (ins VPR128:$Rn),
2287                 asmop # "\t$Rd, $Rn.4s",
2288                 [(set (v1i64 FPR64:$Rd),
2289                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2290                 NoItinerary>;
2291 }
2292
2293 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2294 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2295
2296 // Variant 2
2297
2298 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2299                             string asmop, SDPatternOperator opnode>
2300 {
2301     def _1b8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2302                 (outs FPR8:$Rd), (ins VPR64:$Rn),
2303                 asmop # "\t$Rd, $Rn.8b",
2304                 [(set (v1i8 FPR8:$Rd),
2305                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2306                 NoItinerary>;
2307
2308     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2309                 (outs FPR8:$Rd), (ins VPR128:$Rn),
2310                 asmop # "\t$Rd, $Rn.16b",
2311                 [(set (v1i8 FPR8:$Rd),
2312                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2313                 NoItinerary>;
2314
2315     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2316                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2317                 asmop # "\t$Rd, $Rn.4h",
2318                 [(set (v1i16 FPR16:$Rd),
2319                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2320                 NoItinerary>;
2321
2322     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2323                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2324                 asmop # "\t$Rd, $Rn.8h",
2325                 [(set (v1i16 FPR16:$Rd),
2326                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2327                 NoItinerary>;
2328
2329     // _1s2s doesn't exist!
2330
2331     def _1s4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2332                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2333                 asmop # "\t$Rd, $Rn.4s",
2334                 [(set (v1i32 FPR32:$Rd),
2335                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2336                 NoItinerary>;
2337 }
2338
2339 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2340 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2341
2342 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2343 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2344
2345 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2346
2347 // Variant 3
2348
2349 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2350                             string asmop, SDPatternOperator opnode> {
2351     def _1s4s:  NeonI_2VAcross<0b1, u, size, opcode,
2352                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2353                 asmop # "\t$Rd, $Rn.4s",
2354                 [(set (v1f32 FPR32:$Rd),
2355                     (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2356                 NoItinerary>;
2357 }
2358
2359 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2360                                 int_aarch64_neon_vmaxnmv>;
2361 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2362                                 int_aarch64_neon_vminnmv>;
2363
2364 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2365                               int_aarch64_neon_vmaxv>;
2366 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2367                               int_aarch64_neon_vminv>;
2368
2369 // The followings are for instruction class (Perm)
2370
2371 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2372                     string asmop, RegisterOperand OpVPR, string OpS>
2373   : NeonI_Perm<q, size, opcode,
2374                (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2375                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2376                [], NoItinerary>;
2377
2378 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
2379    def _8b  : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64,  "8b">;
2380    def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
2381    def _4h  : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64,  "4h">;
2382    def _8h  : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
2383    def _2s  : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64,  "2s">;
2384    def _4s  : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
2385    def _2d  : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
2386 }                          
2387
2388 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
2389 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
2390 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
2391 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
2392 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
2393 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
2394
2395 // Extract and Insert
2396 def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
2397                         (vector_insert node:$Rn,
2398                           (i32 (vector_extract node:$Rm, node:$Ext)),
2399                           node:$Ins)>;
2400
2401 def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
2402                         (vector_insert node:$Rn,
2403                           (f32 (vector_extract node:$Rm, node:$Ext)),
2404                           node:$Ins)>;
2405
2406 // uzp1
2407 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2408           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2409           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2410           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2411           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2412           (v16i8 VPR128:$Rn),
2413           (v16i8 VPR128:$Rn), 2,  1)),
2414           (v16i8 VPR128:$Rn), 4,  2)),
2415           (v16i8 VPR128:$Rn), 6,  3)),
2416           (v16i8 VPR128:$Rn), 8,  4)),
2417           (v16i8 VPR128:$Rn), 10, 5)),
2418           (v16i8 VPR128:$Rn), 12, 6)),
2419           (v16i8 VPR128:$Rn), 14, 7)),
2420           (v16i8 VPR128:$Rm), 0,  8)),
2421           (v16i8 VPR128:$Rm), 2,  9)),
2422           (v16i8 VPR128:$Rm), 4,  10)),
2423           (v16i8 VPR128:$Rm), 6,  11)),
2424           (v16i8 VPR128:$Rm), 8,  12)),
2425           (v16i8 VPR128:$Rm), 10, 13)),
2426           (v16i8 VPR128:$Rm), 12, 14)),
2427           (v16i8 VPR128:$Rm), 14, 15)),
2428           (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2429
2430 class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2431   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2432         (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2433         (Ty VPR:$Rn),
2434         (Ty VPR:$Rn), 2, 1)),
2435         (Ty VPR:$Rn), 4, 2)),
2436         (Ty VPR:$Rn), 6, 3)),
2437         (Ty VPR:$Rm), 0, 4)),
2438         (Ty VPR:$Rm), 2, 5)),
2439         (Ty VPR:$Rm), 4, 6)),
2440         (Ty VPR:$Rm), 6, 7)),
2441         (INST VPR:$Rn, VPR:$Rm)>;
2442
2443 def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
2444 def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
2445
2446 class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2447                  PatFrag ei>
2448   : Pat<(Ty (ei (Ty (ei (Ty (ei
2449         (Ty VPR:$Rn),
2450         (Ty VPR:$Rn), 2, 1)),
2451         (Ty VPR:$Rm), 0, 2)),
2452         (Ty VPR:$Rm), 2, 3)),
2453         (INST VPR:$Rn, VPR:$Rm)>;
2454
2455 def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
2456 def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
2457 def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
2458
2459 // uzp2
2460 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2461           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2462           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2463           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2464           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 
2465           (v16i8 VPR128:$Rm),
2466           (v16i8 VPR128:$Rn), 1,  0)),
2467           (v16i8 VPR128:$Rn), 3,  1)),
2468           (v16i8 VPR128:$Rn), 5,  2)),
2469           (v16i8 VPR128:$Rn), 7,  3)),
2470           (v16i8 VPR128:$Rn), 9,  4)),
2471           (v16i8 VPR128:$Rn), 11, 5)),
2472           (v16i8 VPR128:$Rn), 13, 6)),
2473           (v16i8 VPR128:$Rn), 15, 7)),
2474           (v16i8 VPR128:$Rm), 1,  8)),
2475           (v16i8 VPR128:$Rm), 3,  9)),
2476           (v16i8 VPR128:$Rm), 5,  10)),
2477           (v16i8 VPR128:$Rm), 7,  11)),
2478           (v16i8 VPR128:$Rm), 9,  12)),
2479           (v16i8 VPR128:$Rm), 11, 13)),
2480           (v16i8 VPR128:$Rm), 13, 14)),
2481           (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2482
2483 class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2484   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2485         (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2486         (Ty VPR:$Rm),
2487         (Ty VPR:$Rn), 1, 0)),
2488         (Ty VPR:$Rn), 3, 1)),
2489         (Ty VPR:$Rn), 5, 2)),
2490         (Ty VPR:$Rn), 7, 3)),
2491         (Ty VPR:$Rm), 1, 4)),
2492         (Ty VPR:$Rm), 3, 5)),
2493         (Ty VPR:$Rm), 5, 6)),
2494         (INST VPR:$Rn, VPR:$Rm)>;
2495
2496 def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
2497 def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
2498
2499 class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2500                  PatFrag ei>
2501   : Pat<(Ty (ei (Ty (ei (Ty (ei
2502         (Ty VPR:$Rm),
2503         (Ty VPR:$Rn), 1, 0)),
2504         (Ty VPR:$Rn), 3, 1)),
2505         (Ty VPR:$Rm), 1, 2)),
2506         (INST VPR:$Rn, VPR:$Rm)>;
2507
2508 def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
2509 def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
2510 def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
2511
2512 // zip1
2513 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2514           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2515           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2516           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2517           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2518           (v16i8 VPR128:$Rn),
2519           (v16i8 VPR128:$Rm), 0, 1)),
2520           (v16i8 VPR128:$Rn), 1, 2)),
2521           (v16i8 VPR128:$Rm), 1, 3)),
2522           (v16i8 VPR128:$Rn), 2, 4)),
2523           (v16i8 VPR128:$Rm), 2, 5)),
2524           (v16i8 VPR128:$Rn), 3, 6)),
2525           (v16i8 VPR128:$Rm), 3, 7)),
2526           (v16i8 VPR128:$Rn), 4, 8)),
2527           (v16i8 VPR128:$Rm), 4, 9)),
2528           (v16i8 VPR128:$Rn), 5, 10)),
2529           (v16i8 VPR128:$Rm), 5, 11)),
2530           (v16i8 VPR128:$Rn), 6, 12)),
2531           (v16i8 VPR128:$Rm), 6, 13)),
2532           (v16i8 VPR128:$Rn), 7, 14)),
2533           (v16i8 VPR128:$Rm), 7, 15)),
2534           (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2535
2536 class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2537   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2538         (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2539         (Ty VPR:$Rn),
2540         (Ty VPR:$Rm), 0, 1)),
2541         (Ty VPR:$Rn), 1, 2)),
2542         (Ty VPR:$Rm), 1, 3)),
2543         (Ty VPR:$Rn), 2, 4)),
2544         (Ty VPR:$Rm), 2, 5)),
2545         (Ty VPR:$Rn), 3, 6)),
2546         (Ty VPR:$Rm), 3, 7)),
2547         (INST VPR:$Rn, VPR:$Rm)>;
2548
2549 def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
2550 def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
2551
2552 class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2553                  PatFrag ei>
2554   : Pat<(Ty (ei (Ty (ei (Ty (ei
2555         (Ty VPR:$Rn),
2556         (Ty VPR:$Rm), 0, 1)),
2557         (Ty VPR:$Rn), 1, 2)),
2558         (Ty VPR:$Rm), 1, 3)),
2559         (INST VPR:$Rn, VPR:$Rm)>;
2560
2561 def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
2562 def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
2563 def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
2564
2565 // zip2
2566 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2567           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2568           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2569           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2570           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2571           (v16i8 VPR128:$Rm),
2572           (v16i8 VPR128:$Rn), 8, 0)),
2573           (v16i8 VPR128:$Rm), 8, 1)),
2574           (v16i8 VPR128:$Rn), 9, 2)),
2575           (v16i8 VPR128:$Rm), 9, 3)),
2576           (v16i8 VPR128:$Rn), 10, 4)),
2577           (v16i8 VPR128:$Rm), 10, 5)),
2578           (v16i8 VPR128:$Rn), 11, 6)),
2579           (v16i8 VPR128:$Rm), 11, 7)),
2580           (v16i8 VPR128:$Rn), 12, 8)),
2581           (v16i8 VPR128:$Rm), 12, 9)),
2582           (v16i8 VPR128:$Rn), 13, 10)),
2583           (v16i8 VPR128:$Rm), 13, 11)),
2584           (v16i8 VPR128:$Rn), 14, 12)),
2585           (v16i8 VPR128:$Rm), 14, 13)),
2586           (v16i8 VPR128:$Rn), 15, 14)),
2587           (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2588
2589 class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2590   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2591         (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2592         (Ty VPR:$Rm),
2593         (Ty VPR:$Rn), 4, 0)),
2594         (Ty VPR:$Rm), 4, 1)),
2595         (Ty VPR:$Rn), 5, 2)),
2596         (Ty VPR:$Rm), 5, 3)),
2597         (Ty VPR:$Rn), 6, 4)),
2598         (Ty VPR:$Rm), 6, 5)),
2599         (Ty VPR:$Rn), 7, 6)),
2600         (INST VPR:$Rn, VPR:$Rm)>;
2601
2602 def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
2603 def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
2604
2605 class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2606                  PatFrag ei>
2607   : Pat<(Ty (ei (Ty (ei (Ty (ei
2608         (Ty VPR:$Rm),
2609         (Ty VPR:$Rn), 2, 0)),
2610         (Ty VPR:$Rm), 2, 1)),
2611         (Ty VPR:$Rn), 3, 2)),
2612         (INST VPR:$Rn, VPR:$Rm)>;
2613
2614 def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
2615 def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
2616 def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
2617
2618 // trn1
2619 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2620           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2621           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2622           (v16i8 VPR128:$Rn),
2623           (v16i8 VPR128:$Rm), 0,  1)),
2624           (v16i8 VPR128:$Rm), 2,  3)),
2625           (v16i8 VPR128:$Rm), 4,  5)),
2626           (v16i8 VPR128:$Rm), 6,  7)),
2627           (v16i8 VPR128:$Rm), 8,  9)),
2628           (v16i8 VPR128:$Rm), 10, 11)),
2629           (v16i8 VPR128:$Rm), 12, 13)),
2630           (v16i8 VPR128:$Rm), 14, 15)),
2631           (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2632
2633 class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2634   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2635         (Ty VPR:$Rn),
2636         (Ty VPR:$Rm), 0, 1)),
2637         (Ty VPR:$Rm), 2, 3)),
2638         (Ty VPR:$Rm), 4, 5)),
2639         (Ty VPR:$Rm), 6, 7)),
2640         (INST VPR:$Rn, VPR:$Rm)>;
2641
2642 def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
2643 def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
2644
2645 class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2646                  PatFrag ei>
2647   : Pat<(Ty (ei (Ty (ei
2648         (Ty VPR:$Rn),
2649         (Ty VPR:$Rm), 0, 1)),
2650         (Ty VPR:$Rm), 2, 3)),
2651         (INST VPR:$Rn, VPR:$Rm)>;
2652
2653 def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
2654 def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
2655 def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
2656
2657 // trn2
2658 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2659           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2660           (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2661           (v16i8 VPR128:$Rm),
2662           (v16i8 VPR128:$Rn), 1,  0)),
2663           (v16i8 VPR128:$Rn), 3,  2)),
2664           (v16i8 VPR128:$Rn), 5,  4)),
2665           (v16i8 VPR128:$Rn), 7,  6)),
2666           (v16i8 VPR128:$Rn), 9,  8)),
2667           (v16i8 VPR128:$Rn), 11, 10)),
2668           (v16i8 VPR128:$Rn), 13, 12)),
2669           (v16i8 VPR128:$Rn), 15, 14)),
2670           (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2671
2672 class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2673   : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2674         (Ty VPR:$Rm),
2675         (Ty VPR:$Rn), 1, 0)),
2676         (Ty VPR:$Rn), 3, 2)),
2677         (Ty VPR:$Rn), 5, 4)),
2678         (Ty VPR:$Rn), 7, 6)),
2679         (INST VPR:$Rn, VPR:$Rm)>;
2680
2681 def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
2682 def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
2683
2684 class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2685                  PatFrag ei>
2686   : Pat<(Ty (ei (Ty (ei
2687         (Ty VPR:$Rm),
2688         (Ty VPR:$Rn), 1, 0)),
2689         (Ty VPR:$Rn), 3, 2)),
2690         (INST VPR:$Rn, VPR:$Rm)>;
2691
2692 def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
2693 def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
2694 def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
2695
2696 // End of implementation for instruction class (Perm)
2697
2698 // The followings are for instruction class (3V Diff)
2699
2700 // normal long/long2 pattern
2701 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2702                  string asmop, string ResS, string OpS,
2703                  SDPatternOperator opnode, SDPatternOperator ext,
2704                  RegisterOperand OpVPR,
2705                  ValueType ResTy, ValueType OpTy>
2706   : NeonI_3VDiff<q, u, size, opcode,
2707                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2708                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2709                  [(set (ResTy VPR128:$Rd),
2710                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2711                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2712                  NoItinerary>;
2713
2714 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2715                         string asmop, SDPatternOperator opnode,
2716                         bit Commutable = 0> {
2717   let isCommutable = Commutable in {
2718     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2719                            opnode, sext, VPR64, v8i16, v8i8>;
2720     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2721                            opnode, sext, VPR64, v4i32, v4i16>;
2722     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2723                            opnode, sext, VPR64, v2i64, v2i32>;
2724   }
2725 }
2726
2727 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2728                          SDPatternOperator opnode, bit Commutable = 0> {
2729   let isCommutable = Commutable in {
2730     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2731                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2732     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2733                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2734     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2735                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2736   }
2737 }
2738
2739 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2740                         SDPatternOperator opnode, bit Commutable = 0> {
2741   let isCommutable = Commutable in {
2742     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2743                            opnode, zext, VPR64, v8i16, v8i8>;
2744     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2745                            opnode, zext, VPR64, v4i32, v4i16>;
2746     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2747                            opnode, zext, VPR64, v2i64, v2i32>;
2748   }
2749 }
2750
2751 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2752                          SDPatternOperator opnode, bit Commutable = 0> {
2753   let isCommutable = Commutable in {
2754     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2755                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2756     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2757                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2758     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2759                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2760   }
2761 }
2762
2763 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2764 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2765
2766 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2767 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2768
2769 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2770 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2771
2772 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2773 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2774
2775 // normal wide/wide2 pattern
2776 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2777                  string asmop, string ResS, string OpS,
2778                  SDPatternOperator opnode, SDPatternOperator ext,
2779                  RegisterOperand OpVPR,
2780                  ValueType ResTy, ValueType OpTy>
2781   : NeonI_3VDiff<q, u, size, opcode,
2782                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2783                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2784                  [(set (ResTy VPR128:$Rd),
2785                     (ResTy (opnode (ResTy VPR128:$Rn),
2786                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2787                  NoItinerary>;
2788
2789 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2790                         SDPatternOperator opnode> {
2791   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2792                          opnode, sext, VPR64, v8i16, v8i8>;
2793   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2794                          opnode, sext, VPR64, v4i32, v4i16>;
2795   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2796                          opnode, sext, VPR64, v2i64, v2i32>;
2797 }
2798
2799 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2800 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2801
2802 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2803                          SDPatternOperator opnode> {
2804   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2805                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2806   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2807                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2808   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2809                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2810 }
2811
2812 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2813 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2814
2815 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2816                         SDPatternOperator opnode> {
2817   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2818                          opnode, zext, VPR64, v8i16, v8i8>;
2819   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2820                          opnode, zext, VPR64, v4i32, v4i16>;
2821   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2822                          opnode, zext, VPR64, v2i64, v2i32>;
2823 }
2824
2825 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2826 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2827
2828 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2829                          SDPatternOperator opnode> {
2830   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2831                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2832   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2833                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2834   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2835                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2836 }
2837
2838 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2839 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2840
2841 // Get the high half part of the vector element.
2842 multiclass NeonI_get_high {
2843   def _8h : PatFrag<(ops node:$Rn),
2844                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2845                                              (v8i16 (Neon_vdup (i32 8)))))))>;
2846   def _4s : PatFrag<(ops node:$Rn),
2847                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2848                                               (v4i32 (Neon_vdup (i32 16)))))))>;
2849   def _2d : PatFrag<(ops node:$Rn),
2850                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2851                                               (v2i64 (Neon_vdup (i32 32)))))))>;
2852 }
2853
2854 defm NI_get_hi : NeonI_get_high;
2855
2856 // pattern for addhn/subhn with 2 operands
2857 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2858                            string asmop, string ResS, string OpS,
2859                            SDPatternOperator opnode, SDPatternOperator get_hi,
2860                            ValueType ResTy, ValueType OpTy>
2861   : NeonI_3VDiff<q, u, size, opcode,
2862                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2863                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2864                  [(set (ResTy VPR64:$Rd),
2865                     (ResTy (get_hi
2866                       (OpTy (opnode (OpTy VPR128:$Rn),
2867                                     (OpTy VPR128:$Rm))))))],
2868                  NoItinerary>;
2869
2870 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2871                                 SDPatternOperator opnode, bit Commutable = 0> {
2872   let isCommutable = Commutable in {
2873     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2874                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2875     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2876                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2877     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2878                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2879   }
2880 }
2881
2882 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2883 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2884
2885 // pattern for operation with 2 operands
2886 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2887                     string asmop, string ResS, string OpS,
2888                     SDPatternOperator opnode,
2889                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2890                     ValueType ResTy, ValueType OpTy>
2891   : NeonI_3VDiff<q, u, size, opcode,
2892                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2893                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2894                  [(set (ResTy ResVPR:$Rd),
2895                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2896                  NoItinerary>;
2897
2898 // normal narrow pattern
2899 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2900                           SDPatternOperator opnode, bit Commutable = 0> {
2901   let isCommutable = Commutable in {
2902     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2903                               opnode, VPR64, VPR128, v8i8, v8i16>;
2904     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2905                               opnode, VPR64, VPR128, v4i16, v4i32>;
2906     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2907                               opnode, VPR64, VPR128, v2i32, v2i64>;
2908   }
2909 }
2910
2911 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2912 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2913
2914 // pattern for acle intrinsic with 3 operands
2915 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2916                      string asmop, string ResS, string OpS>
2917   : NeonI_3VDiff<q, u, size, opcode,
2918                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2919                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2920                  [], NoItinerary> {
2921   let Constraints = "$src = $Rd";
2922   let neverHasSideEffects = 1;
2923 }
2924
2925 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2926   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2927   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2928   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2929 }
2930
2931 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2932 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2933
2934 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2935 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2936
2937 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2938 // part.
2939 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2940                         SDPatternOperator coreop>
2941   : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2942                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2943                                                         (SrcTy VPR128:$Rm)))))),
2944         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2945               VPR128:$Rn, VPR128:$Rm)>;
2946
2947 // addhn2 patterns
2948 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2949           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2950 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2951           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2952 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2953           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2954
2955 // subhn2 patterns
2956 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2957           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2958 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2959           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2960 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2961           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2962
2963 // raddhn2 patterns
2964 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2965 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2966 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2967
2968 // rsubhn2 patterns
2969 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2970 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2971 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2972
2973 // pattern that need to extend result
2974 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2975                      string asmop, string ResS, string OpS,
2976                      SDPatternOperator opnode,
2977                      RegisterOperand OpVPR,
2978                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2979   : NeonI_3VDiff<q, u, size, opcode,
2980                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2981                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2982                  [(set (ResTy VPR128:$Rd),
2983                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2984                                                 (OpTy OpVPR:$Rm))))))],
2985                  NoItinerary>;
2986
2987 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2988                            SDPatternOperator opnode, bit Commutable = 0> {
2989   let isCommutable = Commutable in {
2990     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2991                                opnode, VPR64, v8i16, v8i8, v8i8>;
2992     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2993                                opnode, VPR64, v4i32, v4i16, v4i16>;
2994     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2995                                opnode, VPR64, v2i64, v2i32, v2i32>;
2996   }
2997 }
2998
2999 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
3000 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
3001
3002 multiclass NeonI_Op_High<SDPatternOperator op> {
3003   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
3004                      (op (v8i8 (Neon_High16B node:$Rn)),
3005                          (v8i8 (Neon_High16B node:$Rm)))>;
3006   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
3007                      (op (v4i16 (Neon_High8H node:$Rn)),
3008                          (v4i16 (Neon_High8H node:$Rm)))>;
3009   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
3010                      (op (v2i32 (Neon_High4S node:$Rn)),
3011                          (v2i32 (Neon_High4S node:$Rm)))>;
3012 }
3013
3014 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
3015 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
3016 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
3017 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
3018 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
3019 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
3020
3021 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
3022                             bit Commutable = 0> {
3023   let isCommutable = Commutable in {
3024     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3025                                 !cast<PatFrag>(opnode # "_16B"),
3026                                 VPR128, v8i16, v16i8, v8i8>;
3027     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3028                                 !cast<PatFrag>(opnode # "_8H"),
3029                                 VPR128, v4i32, v8i16, v4i16>;
3030     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3031                                 !cast<PatFrag>(opnode # "_4S"),
3032                                 VPR128, v2i64, v4i32, v2i32>;
3033   }
3034 }
3035
3036 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
3037 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
3038
3039 // For pattern that need two operators being chained.
3040 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
3041                      string asmop, string ResS, string OpS, 
3042                      SDPatternOperator opnode, SDPatternOperator subop,
3043                      RegisterOperand OpVPR,
3044                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
3045   : NeonI_3VDiff<q, u, size, opcode,
3046                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3047                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 
3048                  [(set (ResTy VPR128:$Rd),
3049                     (ResTy (opnode
3050                       (ResTy VPR128:$src), 
3051                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
3052                                                  (OpTy OpVPR:$Rm))))))))],
3053                  NoItinerary> {
3054   let Constraints = "$src = $Rd";
3055 }
3056
3057 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
3058                              SDPatternOperator opnode, SDPatternOperator subop>{
3059   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3060                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
3061   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3062                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
3063   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3064                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
3065 }
3066
3067 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
3068                                    add, int_arm_neon_vabds>;
3069 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
3070                                    add, int_arm_neon_vabdu>;
3071
3072 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
3073                               SDPatternOperator opnode, string subop> {
3074   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3075                              opnode, !cast<PatFrag>(subop # "_16B"), 
3076                              VPR128, v8i16, v16i8, v8i8>;
3077   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3078                              opnode, !cast<PatFrag>(subop # "_8H"), 
3079                              VPR128, v4i32, v8i16, v4i16>;
3080   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3081                              opnode, !cast<PatFrag>(subop # "_4S"), 
3082                              VPR128, v2i64, v4i32, v2i32>;
3083 }
3084
3085 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
3086                                      "NI_sabdl_hi">;
3087 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
3088                                      "NI_uabdl_hi">;
3089
3090 // Long pattern with 2 operands
3091 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
3092                           SDPatternOperator opnode, bit Commutable = 0> {
3093   let isCommutable = Commutable in {
3094     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3095                               opnode, VPR128, VPR64, v8i16, v8i8>;
3096     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3097                               opnode, VPR128, VPR64, v4i32, v4i16>;
3098     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3099                               opnode, VPR128, VPR64, v2i64, v2i32>;
3100   }
3101 }
3102
3103 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
3104 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
3105
3106 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
3107                            string asmop, string ResS, string OpS,
3108                            SDPatternOperator opnode,
3109                            ValueType ResTy, ValueType OpTy>
3110   : NeonI_3VDiff<q, u, size, opcode,
3111                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3112                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3113                  [(set (ResTy VPR128:$Rd),
3114                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
3115                  NoItinerary>;
3116
3117 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
3118                                    string opnode, bit Commutable = 0> {
3119   let isCommutable = Commutable in {
3120     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3121                                       !cast<PatFrag>(opnode # "_16B"),
3122                                       v8i16, v16i8>;
3123     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3124                                      !cast<PatFrag>(opnode # "_8H"),
3125                                      v4i32, v8i16>;
3126     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3127                                      !cast<PatFrag>(opnode # "_4S"),
3128                                      v2i64, v4i32>;
3129   }
3130 }
3131
3132 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
3133                                          "NI_smull_hi", 1>;
3134 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
3135                                          "NI_umull_hi", 1>;
3136
3137 // Long pattern with 3 operands
3138 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
3139                      string asmop, string ResS, string OpS,
3140                      SDPatternOperator opnode,
3141                      ValueType ResTy, ValueType OpTy>
3142   : NeonI_3VDiff<q, u, size, opcode,
3143                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
3144                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3145                  [(set (ResTy VPR128:$Rd),
3146                     (ResTy (opnode
3147                       (ResTy VPR128:$src),
3148                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
3149                NoItinerary> {
3150   let Constraints = "$src = $Rd";
3151 }
3152
3153 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
3154                              SDPatternOperator opnode> {
3155   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3156                              opnode, v8i16, v8i8>;
3157   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3158                              opnode, v4i32, v4i16>;
3159   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3160                              opnode, v2i64, v2i32>;
3161 }
3162
3163 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3164                          (add node:$Rd,
3165                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3166
3167 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3168                          (add node:$Rd,
3169                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3170
3171 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3172                          (sub node:$Rd,
3173                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3174
3175 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3176                          (sub node:$Rd,
3177                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3178
3179 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
3180 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
3181
3182 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
3183 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
3184
3185 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
3186                            string asmop, string ResS, string OpS,
3187                            SDPatternOperator subop, SDPatternOperator opnode,
3188                            RegisterOperand OpVPR,
3189                            ValueType ResTy, ValueType OpTy>
3190   : NeonI_3VDiff<q, u, size, opcode,
3191                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3192                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3193                [(set (ResTy VPR128:$Rd),
3194                   (ResTy (subop
3195                     (ResTy VPR128:$src),
3196                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3197                NoItinerary> {
3198   let Constraints = "$src = $Rd";
3199 }
3200
3201 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop, 
3202                                    SDPatternOperator subop, string opnode> {
3203   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3204                                     subop, !cast<PatFrag>(opnode # "_16B"),
3205                                     VPR128, v8i16, v16i8>;
3206   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3207                                    subop, !cast<PatFrag>(opnode # "_8H"), 
3208                                    VPR128, v4i32, v8i16>;
3209   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3210                                    subop, !cast<PatFrag>(opnode # "_4S"),
3211                                    VPR128, v2i64, v4i32>;
3212 }
3213
3214 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3215                                           add, "NI_smull_hi">;
3216 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3217                                           add, "NI_umull_hi">;
3218
3219 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3220                                           sub, "NI_smull_hi">;
3221 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3222                                           sub, "NI_umull_hi">;
3223
3224 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3225                                     SDPatternOperator opnode> {
3226   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3227                                    opnode, int_arm_neon_vqdmull,
3228                                    VPR64, v4i32, v4i16>;
3229   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3230                                    opnode, int_arm_neon_vqdmull,
3231                                    VPR64, v2i64, v2i32>;
3232 }
3233
3234 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3235                                            int_arm_neon_vqadds>;
3236 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3237                                            int_arm_neon_vqsubs>;
3238
3239 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3240                          SDPatternOperator opnode, bit Commutable = 0> {
3241   let isCommutable = Commutable in {
3242     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3243                               opnode, VPR128, VPR64, v4i32, v4i16>;
3244     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3245                               opnode, VPR128, VPR64, v2i64, v2i32>;
3246   }
3247 }
3248
3249 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3250                                 int_arm_neon_vqdmull, 1>;
3251
3252 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop, 
3253                                    string opnode, bit Commutable = 0> {
3254   let isCommutable = Commutable in {
3255     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3256                                      !cast<PatFrag>(opnode # "_8H"),
3257                                      v4i32, v8i16>;
3258     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3259                                      !cast<PatFrag>(opnode # "_4S"),
3260                                      v2i64, v4i32>;
3261   }
3262 }
3263
3264 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", 
3265                                            "NI_qdmull_hi", 1>;
3266
3267 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop, 
3268                                      SDPatternOperator opnode> {
3269   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3270                                    opnode, NI_qdmull_hi_8H,
3271                                    VPR128, v4i32, v8i16>;
3272   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3273                                    opnode, NI_qdmull_hi_4S,
3274                                    VPR128, v2i64, v4i32>;
3275 }
3276
3277 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3278                                              int_arm_neon_vqadds>;
3279 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3280                                              int_arm_neon_vqsubs>;
3281
3282 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3283                          SDPatternOperator opnode, bit Commutable = 0> {
3284   let isCommutable = Commutable in {
3285     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3286                               opnode, VPR128, VPR64, v8i16, v8i8>;
3287   }
3288 }
3289
3290 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
3291
3292 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop, 
3293                                    string opnode, bit Commutable = 0> {
3294   let isCommutable = Commutable in {
3295     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3296                                       !cast<PatFrag>(opnode # "_16B"),
3297                                       v8i16, v16i8>;
3298   }
3299 }
3300
3301 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3302                                          1>;
3303
3304 // End of implementation for instruction class (3V Diff)
3305
3306 // The followings are vector load/store multiple N-element structure
3307 // (class SIMD lselem).
3308
3309 // ld1:         load multiple 1-element structure to 1/2/3/4 registers.
3310 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3311 //              The structure consists of a sequence of sets of N values.
3312 //              The first element of the structure is placed in the first lane
3313 //              of the first first vector, the second element in the first lane
3314 //              of the second vector, and so on. 
3315 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3316 // the three 64-bit vectors list {BA, DC, FE}.
3317 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3318 // 64-bit vectors list {DA, EB, FC}.
3319 // Store instructions store multiple structure to N registers like load.
3320
3321
3322 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3323                     RegisterOperand VecList, string asmop>
3324   : NeonI_LdStMult<q, 1, opcode, size,
3325                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3326                  asmop # "\t$Rt, [$Rn]",
3327                  [],
3328                  NoItinerary> {
3329   let mayLoad = 1;
3330   let neverHasSideEffects = 1;
3331 }
3332
3333 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3334   def _8B : NeonI_LDVList<0, opcode, 0b00,
3335                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3336
3337   def _4H : NeonI_LDVList<0, opcode, 0b01,
3338                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3339
3340   def _2S : NeonI_LDVList<0, opcode, 0b10,
3341                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3342
3343   def _16B : NeonI_LDVList<1, opcode, 0b00,
3344                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3345
3346   def _8H : NeonI_LDVList<1, opcode, 0b01,
3347                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3348
3349   def _4S : NeonI_LDVList<1, opcode, 0b10,
3350                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3351
3352   def _2D : NeonI_LDVList<1, opcode, 0b11,
3353                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3354 }
3355
3356 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3357 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3358 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3359
3360 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3361
3362 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3363
3364 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3365
3366 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3367 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3368 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3369
3370 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3371 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3372
3373 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3374 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3375
3376 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3377                     RegisterOperand VecList, string asmop>
3378   : NeonI_LdStMult<q, 0, opcode, size,
3379                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt), 
3380                  asmop # "\t$Rt, [$Rn]",
3381                  [], 
3382                  NoItinerary> {
3383   let mayStore = 1;
3384   let neverHasSideEffects = 1;
3385 }
3386
3387 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3388   def _8B : NeonI_STVList<0, opcode, 0b00,
3389                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3390
3391   def _4H : NeonI_STVList<0, opcode, 0b01,
3392                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3393
3394   def _2S : NeonI_STVList<0, opcode, 0b10,
3395                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3396
3397   def _16B : NeonI_STVList<1, opcode, 0b00,
3398                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3399
3400   def _8H : NeonI_STVList<1, opcode, 0b01,
3401                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3402
3403   def _4S : NeonI_STVList<1, opcode, 0b10,
3404                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3405
3406   def _2D : NeonI_STVList<1, opcode, 0b11,
3407                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3408 }
3409
3410 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3411 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3412 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3413
3414 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3415
3416 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3417
3418 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3419
3420 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3421 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3422 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3423
3424 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3425 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3426
3427 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3428 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3429
3430 // End of vector load/store multiple N-element structure(class SIMD lselem)
3431
3432 // The followings are post-index vector load/store multiple N-element
3433 // structure(class SIMD lselem-post)
3434 def exact8_asmoperand : AsmOperandClass {
3435   let Name = "Exact8";
3436   let PredicateMethod = "isExactImm<8>";
3437   let RenderMethod = "addImmOperands";
3438 }
3439 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3440   let ParserMatchClass = exact8_asmoperand;
3441 }
3442
3443 def exact16_asmoperand : AsmOperandClass {
3444   let Name = "Exact16";
3445   let PredicateMethod = "isExactImm<16>";
3446   let RenderMethod = "addImmOperands";
3447 }
3448 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3449   let ParserMatchClass = exact16_asmoperand;
3450 }
3451
3452 def exact24_asmoperand : AsmOperandClass {
3453   let Name = "Exact24";
3454   let PredicateMethod = "isExactImm<24>";
3455   let RenderMethod = "addImmOperands";
3456 }
3457 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3458   let ParserMatchClass = exact24_asmoperand;
3459 }
3460
3461 def exact32_asmoperand : AsmOperandClass {
3462   let Name = "Exact32";
3463   let PredicateMethod = "isExactImm<32>";
3464   let RenderMethod = "addImmOperands";
3465 }
3466 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3467   let ParserMatchClass = exact32_asmoperand;
3468 }
3469
3470 def exact48_asmoperand : AsmOperandClass {
3471   let Name = "Exact48";
3472   let PredicateMethod = "isExactImm<48>";
3473   let RenderMethod = "addImmOperands";
3474 }
3475 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3476   let ParserMatchClass = exact48_asmoperand;
3477 }
3478
3479 def exact64_asmoperand : AsmOperandClass {
3480   let Name = "Exact64";
3481   let PredicateMethod = "isExactImm<64>";
3482   let RenderMethod = "addImmOperands";
3483 }
3484 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3485   let ParserMatchClass = exact64_asmoperand;
3486 }
3487
3488 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3489                            RegisterOperand VecList, Operand ImmTy,
3490                            string asmop> {
3491   let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, 
3492       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3493     def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3494                      (outs VecList:$Rt, GPR64xsp:$wb),
3495                      (ins GPR64xsp:$Rn, ImmTy:$amt), 
3496                      asmop # "\t$Rt, [$Rn], $amt",
3497                      [],
3498                      NoItinerary> {
3499       let Rm = 0b11111;
3500     }
3501
3502     def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3503                         (outs VecList:$Rt, GPR64xsp:$wb),
3504                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), 
3505                         asmop # "\t$Rt, [$Rn], $Rm",
3506                         [],
3507                         NoItinerary>;
3508   }
3509 }
3510
3511 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3512     Operand ImmTy2, string asmop> {
3513   defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3514                               !cast<RegisterOperand>(List # "8B_operand"),
3515                               ImmTy, asmop>;
3516
3517   defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3518                               !cast<RegisterOperand>(List # "4H_operand"),
3519                               ImmTy, asmop>;
3520
3521   defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3522                               !cast<RegisterOperand>(List # "2S_operand"),
3523                               ImmTy, asmop>;
3524
3525   defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3526                                !cast<RegisterOperand>(List # "16B_operand"),
3527                                ImmTy2, asmop>;
3528
3529   defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3530                               !cast<RegisterOperand>(List # "8H_operand"),
3531                               ImmTy2, asmop>;
3532
3533   defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3534                               !cast<RegisterOperand>(List # "4S_operand"),
3535                               ImmTy2, asmop>;
3536
3537   defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3538                               !cast<RegisterOperand>(List # "2D_operand"),
3539                               ImmTy2, asmop>;
3540 }
3541
3542 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3543 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3544 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3545                                  "ld1">;
3546
3547 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3548
3549 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3550                              "ld3">;
3551
3552 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3553
3554 // Post-index load multiple 1-element structures from N consecutive registers
3555 // (N = 2,3,4)
3556 defm LD1WB2V : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3557                                "ld1">;
3558 defm LD1WB2V_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3559                                    uimm_exact16, "ld1">;
3560
3561 defm LD1WB3V : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3562                                "ld1">;
3563 defm LD1WB3V_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3564                                    uimm_exact24, "ld1">;
3565
3566 defm LD1WB_4V : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3567                                 "ld1">;
3568 defm LD1WB4V_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3569                                    uimm_exact32, "ld1">;
3570
3571 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3572                             RegisterOperand VecList, Operand ImmTy,
3573                             string asmop> {
3574   let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3575       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3576     def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3577                      (outs GPR64xsp:$wb),
3578                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3579                      asmop # "\t$Rt, [$Rn], $amt",
3580                      [],
3581                      NoItinerary> {
3582       let Rm = 0b11111;
3583     }
3584
3585     def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3586                       (outs GPR64xsp:$wb),
3587                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt), 
3588                       asmop # "\t$Rt, [$Rn], $Rm",
3589                       [],
3590                       NoItinerary>;
3591   }
3592 }
3593
3594 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3595                            Operand ImmTy2, string asmop> {
3596   defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3597                  !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3598
3599   defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3600                               !cast<RegisterOperand>(List # "4H_operand"),
3601                               ImmTy, asmop>;
3602
3603   defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3604                               !cast<RegisterOperand>(List # "2S_operand"),
3605                               ImmTy, asmop>;
3606
3607   defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3608                                !cast<RegisterOperand>(List # "16B_operand"),
3609                                ImmTy2, asmop>;
3610
3611   defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3612                               !cast<RegisterOperand>(List # "8H_operand"),
3613                               ImmTy2, asmop>;
3614
3615   defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3616                               !cast<RegisterOperand>(List # "4S_operand"),
3617                               ImmTy2, asmop>;
3618
3619   defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3620                               !cast<RegisterOperand>(List # "2D_operand"),
3621                               ImmTy2, asmop>;
3622 }
3623
3624 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3625 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3626 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3627                                  "st1">;
3628
3629 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3630
3631 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3632                              "st3">;
3633
3634 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3635
3636 // Post-index load multiple 1-element structures from N consecutive registers
3637 // (N = 2,3,4)
3638 defm ST1WB2V : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3639                                "st1">;
3640 defm ST1WB2V_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3641                                    uimm_exact16, "st1">;
3642
3643 defm ST1WB3V : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3644                                "st1">;
3645 defm ST1WB3V_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3646                                    uimm_exact24, "st1">;
3647
3648 defm ST1WB4V : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3649                                "st1">;
3650 defm ST1WB4V_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3651                                    uimm_exact32, "st1">;
3652
3653 // End of post-index vector load/store multiple N-element structure
3654 // (class SIMD lselem-post)
3655
3656 // Scalar Three Same
3657
3658 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3659                              RegisterClass FPRC>
3660   : NeonI_Scalar3Same<u, size, opcode,
3661                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
3662                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3663                       [],
3664                       NoItinerary>;
3665
3666 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3667   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3668
3669 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
3670                                       bit Commutable = 0> {
3671   let isCommutable = Commutable in {
3672     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3673     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3674   }
3675 }
3676
3677 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3678                                       string asmop, bit Commutable = 0> {
3679   let isCommutable = Commutable in {
3680     def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
3681     def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
3682   }
3683 }
3684
3685 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3686                                         string asmop, bit Commutable = 0> {
3687   let isCommutable = Commutable in {
3688     def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
3689     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3690     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3691     def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3692   }
3693 }
3694
3695 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3696                                             Instruction INSTD> {
3697   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3698             (INSTD FPR64:$Rn, FPR64:$Rm)>;        
3699 }
3700
3701 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3702                                                Instruction INSTB,
3703                                                Instruction INSTH,
3704                                                Instruction INSTS,
3705                                                Instruction INSTD>
3706   : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3707   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3708            (INSTB FPR8:$Rn, FPR8:$Rm)>;
3709
3710   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3711            (INSTH FPR16:$Rn, FPR16:$Rm)>;
3712
3713   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3714            (INSTS FPR32:$Rn, FPR32:$Rm)>;
3715 }
3716
3717 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3718                                            Instruction INSTD>
3719   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3720         (INSTD FPR64:$Rn, FPR64:$Rm)>;
3721
3722 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3723                                              Instruction INSTH,
3724                                              Instruction INSTS> {
3725   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3726             (INSTH FPR16:$Rn, FPR16:$Rm)>;
3727   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3728             (INSTS FPR32:$Rn, FPR32:$Rm)>;
3729 }
3730
3731 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3732                                              Instruction INSTS,
3733                                              Instruction INSTD> {
3734   def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3735             (INSTS FPR32:$Rn, FPR32:$Rm)>;
3736   def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3737             (INSTD FPR64:$Rn, FPR64:$Rm)>;
3738 }
3739
3740 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3741                                                  Instruction INSTS,
3742                                                  Instruction INSTD> {
3743   def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3744             (INSTS FPR32:$Rn, FPR32:$Rm)>;
3745   def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3746             (INSTD FPR64:$Rn, FPR64:$Rm)>;
3747 }
3748
3749 // Scalar Three Different
3750
3751 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
3752                              RegisterClass FPRCD, RegisterClass FPRCS>
3753   : NeonI_Scalar3Diff<u, size, opcode,
3754                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
3755                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3756                       [],
3757                       NoItinerary>;
3758
3759 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3760   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
3761   def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
3762 }
3763
3764 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3765   let Constraints = "$Src = $Rd" in {
3766     def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3767                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3768                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3769                        [],
3770                        NoItinerary>;
3771     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3772                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3773                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3774                        [],
3775                        NoItinerary>;
3776   }
3777 }
3778
3779 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3780                                              Instruction INSTH,
3781                                              Instruction INSTS> {
3782   def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3783             (INSTH FPR16:$Rn, FPR16:$Rm)>;
3784   def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3785             (INSTS FPR32:$Rn, FPR32:$Rm)>;
3786 }
3787
3788 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3789                                              Instruction INSTH,
3790                                              Instruction INSTS> {
3791   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3792             (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3793   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3794             (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3795 }
3796
3797 // Scalar Two Registers Miscellaneous
3798
3799 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3800                              RegisterClass FPRCD, RegisterClass FPRCS>
3801   : NeonI_Scalar2SameMisc<u, size, opcode,
3802                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
3803                           !strconcat(asmop, "\t$Rd, $Rn"),
3804                           [],
3805                           NoItinerary>;
3806
3807 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3808                                          string asmop> {
3809   def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
3810                                       FPR32>;
3811   def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
3812                                       FPR64>;
3813 }
3814
3815 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3816   def dd: NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
3817 }
3818
3819 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3820   : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3821   def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
3822   def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
3823   def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
3824 }
3825
3826 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3827                                                  string asmop> {
3828   def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
3829   def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
3830   def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
3831 }
3832
3833 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
3834                                        string asmop, RegisterClass FPRC>
3835   : NeonI_Scalar2SameMisc<u, size, opcode,
3836                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
3837                           !strconcat(asmop, "\t$Rd, $Rn"),
3838                           [],
3839                           NoItinerary>;
3840
3841 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3842                                                  string asmop> {
3843
3844   let Constraints = "$Src = $Rd" in {
3845     def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
3846     def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
3847     def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
3848     def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
3849   }
3850 }
3851
3852 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3853                                                      SDPatternOperator Dopnode,
3854                                                      Instruction INSTS,
3855                                                      Instruction INSTD> {
3856   def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))),
3857             (INSTS FPR32:$Rn)>;
3858   def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))),
3859             (INSTD FPR64:$Rn)>;
3860 }
3861
3862 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3863                                                  Instruction INSTS,
3864                                                  Instruction INSTD> {
3865   def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3866             (INSTS FPR32:$Rn)>;
3867   def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3868             (INSTD FPR64:$Rn)>;
3869 }
3870
3871 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3872   : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3873                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3874                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3875                           [],
3876                           NoItinerary>;
3877
3878 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3879                                               string asmop> {
3880   def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3881                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3882                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3883                            [],
3884                            NoItinerary>;
3885   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3886                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3887                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3888                            [],
3889                            NoItinerary>;
3890 }
3891
3892 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3893                                                 Instruction INSTD>
3894   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3895                        (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
3896         (INSTD FPR64:$Rn, 0)>;
3897
3898 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3899                                                       Instruction INSTS,
3900                                                       Instruction INSTD> {
3901   def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3902                            (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3903             (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3904   def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3905                            (v1f64 (bitconvert (v8i8 Neon_AllZero))))),
3906             (INSTD FPR64:$Rn, 0)>;
3907 }
3908
3909 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3910                                                 Instruction INSTD> {
3911   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3912             (INSTD FPR64:$Rn)>;
3913 }
3914
3915 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3916                                                    Instruction INSTB,
3917                                                    Instruction INSTH,
3918                                                    Instruction INSTS,
3919                                                    Instruction INSTD>
3920   : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3921   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3922             (INSTB FPR8:$Rn)>;
3923   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3924             (INSTH FPR16:$Rn)>;
3925   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3926             (INSTS FPR32:$Rn)>;
3927 }
3928
3929 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3930                                                        SDPatternOperator opnode,
3931                                                        Instruction INSTH,
3932                                                        Instruction INSTS,
3933                                                        Instruction INSTD> {
3934   def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3935             (INSTH FPR16:$Rn)>;
3936   def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3937             (INSTS FPR32:$Rn)>;
3938   def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3939             (INSTD FPR64:$Rn)>;
3940
3941 }
3942
3943 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3944                                                        SDPatternOperator opnode,
3945                                                        Instruction INSTB,
3946                                                        Instruction INSTH,
3947                                                        Instruction INSTS,
3948                                                        Instruction INSTD> {
3949   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3950             (INSTB FPR8:$Src, FPR8:$Rn)>;
3951   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3952             (INSTH FPR16:$Src, FPR16:$Rn)>;
3953   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3954             (INSTS FPR32:$Src, FPR32:$Rn)>;
3955   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3956             (INSTD FPR64:$Src, FPR64:$Rn)>;
3957 }
3958
3959 // Scalar Shift By Immediate
3960
3961 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
3962                                 RegisterClass FPRC, Operand ImmTy>
3963   : NeonI_ScalarShiftImm<u, opcode,
3964                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
3965                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3966                          [], NoItinerary>;
3967
3968 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
3969                                             string asmop> {
3970   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3971     bits<6> Imm;
3972     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3973     let Inst{21-16} = Imm;
3974   }
3975 }
3976
3977 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
3978                                                string asmop>
3979   : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
3980   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
3981     bits<3> Imm;
3982     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3983     let Inst{18-16} = Imm;
3984   }
3985   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
3986     bits<4> Imm;
3987     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3988     let Inst{19-16} = Imm;
3989   }
3990   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3991     bits<5> Imm;
3992     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3993     let Inst{20-16} = Imm;
3994   }
3995 }
3996
3997 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
3998                                             string asmop> {
3999   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4000     bits<6> Imm;
4001     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4002     let Inst{21-16} = Imm;
4003   }
4004 }
4005
4006 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4007                                               string asmop>
4008   : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4009   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4010     bits<3> Imm;
4011     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4012     let Inst{18-16} = Imm;
4013   }
4014   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4015     bits<4> Imm;
4016     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4017     let Inst{19-16} = Imm;
4018   }
4019   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4020     bits<5> Imm;
4021     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4022     let Inst{20-16} = Imm;
4023   }
4024 }
4025
4026 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4027   : NeonI_ScalarShiftImm<u, opcode,
4028                          (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4029                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4030                          [], NoItinerary> {
4031     bits<6> Imm;
4032     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4033     let Inst{21-16} = Imm;
4034     let Constraints = "$Src = $Rd";
4035 }
4036
4037 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4038   : NeonI_ScalarShiftImm<u, opcode,
4039                          (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4040                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4041                          [], NoItinerary> {
4042     bits<6> Imm;
4043     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4044     let Inst{21-16} = Imm;
4045     let Constraints = "$Src = $Rd";
4046 }
4047
4048 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4049                                        RegisterClass FPRCD, RegisterClass FPRCS,
4050                                        Operand ImmTy>
4051   : NeonI_ScalarShiftImm<u, opcode,
4052                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4053                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4054                          [], NoItinerary>;
4055
4056 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4057                                                 string asmop> {
4058   def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4059                                              shr_imm8> {
4060     bits<3> Imm;
4061     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4062     let Inst{18-16} = Imm;
4063   }
4064   def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4065                                              shr_imm16> {
4066     bits<4> Imm;
4067     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4068     let Inst{19-16} = Imm;
4069   }
4070   def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4071                                              shr_imm32> {
4072     bits<5> Imm;
4073     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4074     let Inst{20-16} = Imm;
4075   }
4076 }
4077
4078 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4079   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4080     bits<5> Imm;
4081     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4082     let Inst{20-16} = Imm;
4083   }
4084   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4085     bits<6> Imm;
4086     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4087     let Inst{21-16} = Imm;
4088   }
4089 }
4090
4091 multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
4092                                                Instruction INSTD> {
4093   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4094                 (INSTD FPR64:$Rn, imm:$Imm)>;
4095 }
4096
4097 class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode,
4098                                               Instruction INSTD>
4099   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 (Neon_vdup (i32 imm:$Imm))))),
4100         (INSTD FPR64:$Rn, imm:$Imm)>;
4101
4102 multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
4103                                                   Instruction INSTB,
4104                                                   Instruction INSTH,
4105                                                   Instruction INSTS,
4106                                                   Instruction INSTD>
4107   : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
4108   def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
4109                 (INSTB FPR8:$Rn, imm:$Imm)>;
4110   def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
4111                 (INSTH FPR16:$Rn, imm:$Imm)>;
4112   def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4113                 (INSTS FPR32:$Rn, imm:$Imm)>;
4114 }
4115
4116 class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
4117                                                 Instruction INSTD>
4118   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4119         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4120
4121 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4122                                                        SDPatternOperator opnode,
4123                                                        Instruction INSTH,
4124                                                        Instruction INSTS,
4125                                                        Instruction INSTD> {
4126   def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
4127                 (INSTH FPR16:$Rn, imm:$Imm)>;
4128   def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4129                 (INSTS FPR32:$Rn, imm:$Imm)>;
4130   def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4131                 (INSTD FPR64:$Rn, imm:$Imm)>;
4132 }
4133
4134 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
4135                                                       SDPatternOperator Dopnode,
4136                                                       Instruction INSTS,
4137                                                       Instruction INSTD> {
4138   def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4139                 (INSTS FPR32:$Rn, imm:$Imm)>;
4140   def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4141                 (INSTD FPR64:$Rn, imm:$Imm)>;
4142 }
4143
4144 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode,
4145                                                       SDPatternOperator Dopnode,
4146                                                       Instruction INSTS,
4147                                                       Instruction INSTD> {
4148   def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 imm:$Imm))),
4149                 (INSTS FPR32:$Rn, imm:$Imm)>;
4150   def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 imm:$Imm))),
4151                 (INSTD FPR64:$Rn, imm:$Imm)>;
4152 }
4153
4154 // Scalar Signed Shift Right (Immediate)
4155 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4156 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4157 // Pattern to match llvm.arm.* intrinsic.
4158 def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>;
4159
4160 // Scalar Unsigned Shift Right (Immediate)
4161 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4162 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4163 // Pattern to match llvm.arm.* intrinsic.
4164 def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>;
4165
4166 // Scalar Signed Rounding Shift Right (Immediate)
4167 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4168 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4169
4170 // Scalar Unigned Rounding Shift Right (Immediate)
4171 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4172 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4173
4174 // Scalar Signed Shift Right and Accumulate (Immediate)
4175 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4176 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
4177
4178 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4179 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4180 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
4181
4182 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4183 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4184 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
4185
4186 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4187 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4188 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
4189
4190 // Scalar Shift Left (Immediate)
4191 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4192 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4193 // Pattern to match llvm.arm.* intrinsic.
4194 def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>;
4195
4196 // Signed Saturating Shift Left (Immediate)
4197 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4198 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4199                                               SQSHLbbi, SQSHLhhi,
4200                                               SQSHLssi, SQSHLddi>;
4201 // Pattern to match llvm.arm.* intrinsic.
4202 defm : Neon_ScalarShiftImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4203
4204 // Unsigned Saturating Shift Left (Immediate)
4205 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4206 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4207                                               UQSHLbbi, UQSHLhhi,
4208                                               UQSHLssi, UQSHLddi>;
4209 // Pattern to match llvm.arm.* intrinsic.
4210 defm : Neon_ScalarShiftImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4211
4212 // Signed Saturating Shift Left Unsigned (Immediate)
4213 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4214 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4215                                               SQSHLUbbi, SQSHLUhhi,
4216                                               SQSHLUssi, SQSHLUddi>;
4217
4218 // Shift Right And Insert (Immediate)
4219 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4220 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsri, SRI>;
4221
4222 // Shift Left And Insert (Immediate)
4223 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4224 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsli, SLI>;
4225
4226 // Signed Saturating Shift Right Narrow (Immediate)
4227 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4228 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4229                                                     SQSHRNbhi, SQSHRNhsi,
4230                                                     SQSHRNsdi>;
4231
4232 // Unsigned Saturating Shift Right Narrow (Immediate)
4233 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4234 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4235                                                     UQSHRNbhi, UQSHRNhsi,
4236                                                     UQSHRNsdi>;
4237
4238 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4239 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4240 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4241                                                     SQRSHRNbhi, SQRSHRNhsi,
4242                                                     SQRSHRNsdi>;
4243
4244 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4245 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4246 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4247                                                     UQRSHRNbhi, UQRSHRNhsi,
4248                                                     UQRSHRNsdi>;
4249
4250 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4251 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4252 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4253                                                     SQSHRUNbhi, SQSHRUNhsi,
4254                                                     SQSHRUNsdi>;
4255
4256 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4257 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4258 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4259                                                     SQRSHRUNbhi, SQRSHRUNhsi,
4260                                                     SQRSHRUNsdi>;
4261
4262 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4263 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4264 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
4265                                                   int_aarch64_neon_vcvtf64_n_s64,
4266                                                   SCVTF_Nssi, SCVTF_Nddi>;
4267
4268 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4269 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4270 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
4271                                                   int_aarch64_neon_vcvtf64_n_u64,
4272                                                   UCVTF_Nssi, UCVTF_Nddi>;
4273
4274 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4275 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4276 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32,
4277                                                   int_aarch64_neon_vcvtd_n_s64_f64,
4278                                                   FCVTZS_Nssi, FCVTZS_Nddi>;
4279
4280 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4281 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4282 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32,
4283                                                   int_aarch64_neon_vcvtd_n_u64_f64,
4284                                                   FCVTZU_Nssi, FCVTZU_Nddi>;
4285
4286 // Scalar Integer Add
4287 let isCommutable = 1 in {
4288 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4289 }
4290
4291 // Scalar Integer Sub
4292 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4293
4294 // Pattern for Scalar Integer Add and Sub with D register only
4295 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4296 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4297
4298 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4299 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4300 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4301 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4302 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4303
4304 // Scalar Integer Saturating Add (Signed, Unsigned)
4305 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4306 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4307
4308 // Scalar Integer Saturating Sub (Signed, Unsigned)
4309 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4310 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4311
4312 // Patterns to match llvm.arm.* intrinsic for
4313 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
4314 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
4315 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
4316 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
4317 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
4318
4319 // Patterns to match llvm.aarch64.* intrinsic for
4320 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
4321 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
4322                                            SQADDhhh, SQADDsss, SQADDddd>;
4323 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
4324                                            UQADDhhh, UQADDsss, UQADDddd>;
4325 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
4326                                            SQSUBhhh, SQSUBsss, SQSUBddd>;
4327 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
4328                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
4329
4330 // Scalar Integer Saturating Doubling Multiply Half High
4331 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4332
4333 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4334 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4335
4336 // Patterns to match llvm.arm.* intrinsic for
4337 // Scalar Integer Saturating Doubling Multiply Half High and
4338 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4339 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4340                                                                SQDMULHsss>;
4341 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4342                                                                 SQRDMULHsss>;
4343
4344 // Scalar Floating-point Multiply Extended
4345 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4346
4347 // Scalar Floating-point Reciprocal Step
4348 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4349
4350 // Scalar Floating-point Reciprocal Square Root Step
4351 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4352
4353 // Patterns to match llvm.arm.* intrinsic for
4354 // Scalar Floating-point Reciprocal Step and
4355 // Scalar Floating-point Reciprocal Square Root Step
4356 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
4357                                                               FRECPSddd>;
4358 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
4359                                                                FRSQRTSddd>;
4360
4361 // Patterns to match llvm.aarch64.* intrinsic for
4362 // Scalar Floating-point Multiply Extended,
4363 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
4364                                          FMULXddd>;
4365
4366 // Scalar Integer Shift Left (Signed, Unsigned)
4367 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4368 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4369
4370 // Patterns to match llvm.arm.* intrinsic for
4371 // Scalar Integer Shift Left (Signed, Unsigned)
4372 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4373 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4374
4375 // Patterns to match llvm.aarch64.* intrinsic for
4376 // Scalar Integer Shift Left (Signed, Unsigned)
4377 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4378 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4379
4380 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4381 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4382 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4383
4384 // Patterns to match llvm.aarch64.* intrinsic for
4385 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4386 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4387                                            SQSHLhhh, SQSHLsss, SQSHLddd>;
4388 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4389                                            UQSHLhhh, UQSHLsss, UQSHLddd>;
4390
4391 // Patterns to match llvm.arm.* intrinsic for
4392 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4393 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4394 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4395
4396 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4397 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4398 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4399
4400 // Patterns to match llvm.aarch64.* intrinsic for
4401 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4402 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4403 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4404
4405 // Patterns to match llvm.arm.* intrinsic for
4406 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4407 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4408 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4409
4410 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4411 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4412 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4413
4414 // Patterns to match llvm.aarch64.* intrinsic for
4415 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4416 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
4417                                            SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
4418 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
4419                                            UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
4420
4421 // Patterns to match llvm.arm.* intrinsic for
4422 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4423 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
4424 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
4425
4426 // Signed Saturating Doubling Multiply-Add Long
4427 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
4428 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
4429                                             SQDMLALshh, SQDMLALdss>;
4430
4431 // Signed Saturating Doubling Multiply-Subtract Long
4432 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
4433 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
4434                                             SQDMLSLshh, SQDMLSLdss>;
4435
4436 // Signed Saturating Doubling Multiply Long
4437 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
4438 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
4439                                          SQDMULLshh, SQDMULLdss>;
4440
4441 // Scalar Signed Integer Convert To Floating-point
4442 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
4443 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
4444                                                  int_aarch64_neon_vcvtf64_s64,
4445                                                  SCVTFss, SCVTFdd>;
4446
4447 // Scalar Unsigned Integer Convert To Floating-point
4448 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
4449 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
4450                                                  int_aarch64_neon_vcvtf64_u64,
4451                                                  UCVTFss, UCVTFdd>;
4452
4453 // Scalar Floating-point Reciprocal Estimate
4454 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
4455 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
4456                                              FRECPEss, FRECPEdd>;
4457
4458 // Scalar Floating-point Reciprocal Exponent
4459 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
4460 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
4461                                              FRECPXss, FRECPXdd>;
4462
4463 // Scalar Floating-point Reciprocal Square Root Estimate
4464 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
4465 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
4466                                              FRSQRTEss, FRSQRTEdd>;
4467
4468 // Scalar Integer Compare
4469
4470 // Scalar Compare Bitwise Equal
4471 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
4472 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
4473
4474 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
4475                                               Instruction INSTD,
4476                                               CondCode CC>
4477   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
4478         (INSTD FPR64:$Rn, FPR64:$Rm)>;
4479
4480 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
4481
4482 // Scalar Compare Signed Greather Than Or Equal
4483 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
4484 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
4485
4486 // Scalar Compare Unsigned Higher Or Same
4487 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
4488 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
4489
4490 // Scalar Compare Unsigned Higher
4491 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
4492 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
4493
4494 // Scalar Compare Signed Greater Than
4495 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
4496 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
4497
4498 // Scalar Compare Bitwise Test Bits
4499 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
4500 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
4501 def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>;
4502
4503 // Scalar Compare Bitwise Equal To Zero
4504 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
4505 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
4506                                                 CMEQddi>;
4507
4508 // Scalar Compare Signed Greather Than Or Equal To Zero
4509 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
4510 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
4511                                                 CMGEddi>;
4512
4513 // Scalar Compare Signed Greater Than Zero
4514 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
4515 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
4516                                                 CMGTddi>;
4517
4518 // Scalar Compare Signed Less Than Or Equal To Zero
4519 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
4520 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
4521                                                 CMLEddi>;
4522
4523 // Scalar Compare Less Than Zero
4524 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
4525 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
4526                                                 CMLTddi>;
4527
4528 // Scalar Floating-point Compare
4529
4530 // Scalar Floating-point Compare Mask Equal
4531 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
4532 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
4533                                              FCMEQsss, FCMEQddd>;
4534
4535 // Scalar Floating-point Compare Mask Equal To Zero
4536 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
4537 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
4538                                                   FCMEQZssi, FCMEQZddi>;
4539
4540 // Scalar Floating-point Compare Mask Greater Than Or Equal
4541 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
4542 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
4543                                              FCMGEsss, FCMGEddd>;
4544
4545 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
4546 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
4547 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
4548                                                   FCMGEZssi, FCMGEZddi>;
4549
4550 // Scalar Floating-point Compare Mask Greather Than
4551 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
4552 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
4553                                              FCMGTsss, FCMGTddd>;
4554
4555 // Scalar Floating-point Compare Mask Greather Than Zero
4556 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
4557 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
4558                                                   FCMGTZssi, FCMGTZddi>;
4559
4560 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
4561 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
4562 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
4563                                                   FCMLEZssi, FCMLEZddi>;
4564
4565 // Scalar Floating-point Compare Mask Less Than Zero
4566 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
4567 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
4568                                                   FCMLTZssi, FCMLTZddi>;
4569
4570 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
4571 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
4572 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
4573                                              FACGEsss, FACGEddd>;
4574
4575 // Scalar Floating-point Absolute Compare Mask Greater Than
4576 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
4577 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
4578                                              FACGTsss, FACGTddd>;
4579
4580 // Scalar Absolute Value
4581 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
4582 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
4583
4584 // Scalar Signed Saturating Absolute Value
4585 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
4586 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
4587                                                SQABSbb, SQABShh, SQABSss, SQABSdd>;
4588
4589 // Scalar Negate
4590 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
4591 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
4592
4593 // Scalar Signed Saturating Negate
4594 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
4595 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
4596                                                SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
4597
4598 // Scalar Signed Saturating Accumulated of Unsigned Value
4599 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
4600 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
4601                                                      SUQADDbb, SUQADDhh,
4602                                                      SUQADDss, SUQADDdd>;
4603
4604 // Scalar Unsigned Saturating Accumulated of Signed Value
4605 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
4606 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
4607                                                      USQADDbb, USQADDhh,
4608                                                      USQADDss, USQADDdd>;
4609
4610 // Scalar Signed Saturating Extract Unsigned Narrow
4611 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
4612 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
4613                                                      SQXTUNbh, SQXTUNhs,
4614                                                      SQXTUNsd>;
4615
4616 // Scalar Signed Saturating Extract Narrow
4617 defm SQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
4618 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
4619                                                      SQXTNbh, SQXTNhs,
4620                                                      SQXTNsd>;
4621
4622 // Scalar Unsigned Saturating Extract Narrow
4623 defm UQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
4624 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
4625                                                      UQXTNbh, UQXTNhs,
4626                                                      UQXTNsd>;
4627
4628 // Scalar Reduce Pairwise
4629
4630 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
4631                                      string asmop, bit Commutable = 0> {
4632   let isCommutable = Commutable in {
4633     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
4634                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
4635                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
4636                                 [],
4637                                 NoItinerary>;
4638   }
4639 }
4640
4641 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
4642                                      string asmop, bit Commutable = 0>
4643   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
4644   let isCommutable = Commutable in {
4645     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
4646                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
4647                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
4648                                 [],
4649                                 NoItinerary>;
4650   }
4651 }
4652
4653 // Scalar Reduce Addition Pairwise (Integer) with
4654 // Pattern to match llvm.arm.* intrinsic
4655 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
4656
4657 // Pattern to match llvm.aarch64.* intrinsic for
4658 // Scalar Reduce Addition Pairwise (Integer)
4659 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
4660           (ADDPvv_D_2D VPR128:$Rn)>;
4661
4662 // Scalar Reduce Addition Pairwise (Floating Point)
4663 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
4664
4665 // Scalar Reduce Maximum Pairwise (Floating Point)
4666 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
4667
4668 // Scalar Reduce Minimum Pairwise (Floating Point)
4669 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
4670
4671 // Scalar Reduce maxNum Pairwise (Floating Point)
4672 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
4673
4674 // Scalar Reduce minNum Pairwise (Floating Point)
4675 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
4676
4677 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
4678                                             SDPatternOperator opnodeD,
4679                                             Instruction INSTS,
4680                                             Instruction INSTD> {
4681   def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
4682             (INSTS VPR64:$Rn)>;
4683   def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
4684             (INSTD VPR128:$Rn)>;
4685 }
4686
4687 // Patterns to match llvm.aarch64.* intrinsic for
4688 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
4689 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
4690   int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
4691
4692 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
4693   int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
4694
4695 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
4696   int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
4697
4698 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
4699   int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
4700
4701 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, 
4702   int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
4703
4704 def neon_uimm0_bare : Operand<i64>,
4705                         ImmLeaf<i64, [{return Imm == 0;}]> {
4706   let ParserMatchClass = neon_uimm0_asmoperand;
4707   let PrintMethod = "printUImmBareOperand";
4708 }
4709
4710 def neon_uimm1_bare : Operand<i64>,
4711                         ImmLeaf<i64, [{return Imm < 2;}]> {
4712   let ParserMatchClass = neon_uimm1_asmoperand;
4713   let PrintMethod = "printUImmBareOperand";
4714 }
4715
4716 def neon_uimm2_bare : Operand<i64>,
4717                         ImmLeaf<i64, [{return Imm < 4;}]> {
4718   let ParserMatchClass = neon_uimm2_asmoperand;
4719   let PrintMethod = "printUImmBareOperand";
4720 }
4721
4722 def neon_uimm3_bare : Operand<i64>,
4723                         ImmLeaf<i64, [{return Imm < 8;}]> {
4724   let ParserMatchClass = uimm3_asmoperand;
4725   let PrintMethod = "printUImmBareOperand";
4726 }
4727
4728 def neon_uimm4_bare : Operand<i64>,
4729                         ImmLeaf<i64, [{return Imm < 16;}]> {
4730   let ParserMatchClass = uimm4_asmoperand;
4731   let PrintMethod = "printUImmBareOperand";
4732 }
4733
4734
4735 // Scalar by element Arithmetic
4736
4737 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
4738                                     string rmlane, bit u, bit szhi, bit szlo,
4739                                     RegisterClass ResFPR, RegisterClass OpFPR,
4740                                     RegisterOperand OpVPR, Operand OpImm>
4741   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
4742                              (outs ResFPR:$Rd),
4743                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
4744                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
4745                              [],
4746                              NoItinerary> {
4747   bits<3> Imm;
4748   bits<5> MRm;
4749 }
4750
4751 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
4752                                                     string rmlane,
4753                                                     bit u, bit szhi, bit szlo,
4754                                                     RegisterClass ResFPR,
4755                                                     RegisterClass OpFPR,
4756                                                     RegisterOperand OpVPR,
4757                                                     Operand OpImm>
4758   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
4759                              (outs ResFPR:$Rd),
4760                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
4761                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
4762                              [],
4763                              NoItinerary> {
4764   let Constraints = "$src = $Rd";
4765   bits<3> Imm;
4766   bits<5> MRm;
4767 }
4768
4769 // Scalar Floating Point  multiply (scalar, by element)
4770 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
4771   0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4772   let Inst{11} = Imm{1}; // h
4773   let Inst{21} = Imm{0}; // l
4774   let Inst{20-16} = MRm;
4775 }
4776 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
4777   0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4778   let Inst{11} = Imm{0}; // h
4779   let Inst{21} = 0b0;    // l
4780   let Inst{20-16} = MRm;
4781 }
4782
4783 // Scalar Floating Point  multiply extended (scalar, by element)
4784 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
4785   0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4786   let Inst{11} = Imm{1}; // h
4787   let Inst{21} = Imm{0}; // l
4788   let Inst{20-16} = MRm;
4789 }
4790 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
4791   0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4792   let Inst{11} = Imm{0}; // h
4793   let Inst{21} = 0b0;    // l
4794   let Inst{20-16} = MRm;
4795 }
4796
4797 // Scalar Floating Point fused multiply-add (scalar, by element)
4798 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
4799   0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4800   let Inst{11} = Imm{1}; // h
4801   let Inst{21} = Imm{0}; // l
4802   let Inst{20-16} = MRm;
4803 }
4804 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
4805   0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4806   let Inst{11} = Imm{0}; // h
4807   let Inst{21} = 0b0;    // l
4808   let Inst{20-16} = MRm;
4809 }
4810
4811 // Scalar Floating Point fused multiply-subtract (scalar, by element)
4812 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
4813   0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4814   let Inst{11} = Imm{1}; // h
4815   let Inst{21} = Imm{0}; // l
4816   let Inst{20-16} = MRm;
4817 }
4818 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
4819   0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4820   let Inst{11} = Imm{0}; // h
4821   let Inst{21} = 0b0;    // l
4822   let Inst{20-16} = MRm;
4823 }
4824
4825 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
4826 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4827   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
4828   let Inst{11} = 0b0; // h
4829   let Inst{21} = Imm{1}; // l
4830   let Inst{20} = Imm{0}; // m
4831   let Inst{19-16} = MRm{3-0};
4832 }
4833 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4834   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
4835   let Inst{11} = Imm{2}; // h
4836   let Inst{21} = Imm{1}; // l
4837   let Inst{20} = Imm{0}; // m
4838   let Inst{19-16} = MRm{3-0};
4839 }
4840 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4841   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
4842   let Inst{11} = 0b0;    // h
4843   let Inst{21} = Imm{0}; // l
4844   let Inst{20-16} = MRm;
4845 }
4846 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4847   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
4848   let Inst{11} = Imm{1};    // h
4849   let Inst{21} = Imm{0};    // l
4850   let Inst{20-16} = MRm;
4851 }
4852
4853 // Scalar Signed saturating doubling
4854 // multiply-subtract long (scalar, by element)
4855 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
4856   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
4857   let Inst{11} = 0b0; // h
4858   let Inst{21} = Imm{1}; // l
4859   let Inst{20} = Imm{0}; // m
4860   let Inst{19-16} = MRm{3-0};
4861 }
4862 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
4863   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
4864   let Inst{11} = Imm{2}; // h
4865   let Inst{21} = Imm{1}; // l
4866   let Inst{20} = Imm{0}; // m
4867   let Inst{19-16} = MRm{3-0};
4868 }
4869 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
4870   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
4871   let Inst{11} = 0b0;    // h
4872   let Inst{21} = Imm{0}; // l
4873   let Inst{20-16} = MRm;
4874 }
4875 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
4876   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
4877   let Inst{11} = Imm{1};    // h
4878   let Inst{21} = Imm{0};    // l
4879   let Inst{20-16} = MRm;
4880 }
4881
4882 // Scalar Signed saturating doubling multiply long (scalar, by element)
4883 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
4884   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
4885   let Inst{11} = 0b0; // h
4886   let Inst{21} = Imm{1}; // l
4887   let Inst{20} = Imm{0}; // m
4888   let Inst{19-16} = MRm{3-0};
4889 }
4890 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
4891   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
4892   let Inst{11} = Imm{2}; // h
4893   let Inst{21} = Imm{1}; // l
4894   let Inst{20} = Imm{0}; // m
4895   let Inst{19-16} = MRm{3-0};
4896 }
4897 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
4898   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
4899   let Inst{11} = 0b0;    // h
4900   let Inst{21} = Imm{0}; // l
4901   let Inst{20-16} = MRm;
4902 }
4903 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
4904   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
4905   let Inst{11} = Imm{1};    // h
4906   let Inst{21} = Imm{0};    // l
4907   let Inst{20-16} = MRm;
4908 }
4909
4910 // Scalar Signed saturating doubling multiply returning
4911 // high half (scalar, by element)
4912 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
4913   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
4914   let Inst{11} = 0b0; // h
4915   let Inst{21} = Imm{1}; // l
4916   let Inst{20} = Imm{0}; // m
4917   let Inst{19-16} = MRm{3-0};
4918 }
4919 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
4920   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
4921   let Inst{11} = Imm{2}; // h
4922   let Inst{21} = Imm{1}; // l
4923   let Inst{20} = Imm{0}; // m
4924   let Inst{19-16} = MRm{3-0};
4925 }
4926 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
4927   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
4928   let Inst{11} = 0b0;    // h
4929   let Inst{21} = Imm{0}; // l
4930   let Inst{20-16} = MRm;
4931 }
4932 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
4933   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4934   let Inst{11} = Imm{1};    // h
4935   let Inst{21} = Imm{0};    // l
4936   let Inst{20-16} = MRm;
4937 }
4938
4939 // Scalar Signed saturating rounding doubling multiply
4940 // returning high half (scalar, by element)
4941 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
4942   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
4943   let Inst{11} = 0b0; // h
4944   let Inst{21} = Imm{1}; // l
4945   let Inst{20} = Imm{0}; // m
4946   let Inst{19-16} = MRm{3-0};
4947 }
4948 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
4949   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
4950   let Inst{11} = Imm{2}; // h
4951   let Inst{21} = Imm{1}; // l
4952   let Inst{20} = Imm{0}; // m
4953   let Inst{19-16} = MRm{3-0};
4954 }
4955 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
4956   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
4957   let Inst{11} = 0b0;    // h
4958   let Inst{21} = Imm{0}; // l
4959   let Inst{20-16} = MRm;
4960 }
4961 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
4962   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4963   let Inst{11} = Imm{1};    // h
4964   let Inst{21} = Imm{0};    // l
4965   let Inst{20-16} = MRm;
4966 }
4967
4968
4969 // Scalar Copy - DUP element to scalar
4970 class NeonI_Scalar_DUP<string asmop, string asmlane,
4971                        RegisterClass ResRC, RegisterOperand VPRC,
4972                        Operand OpImm>
4973   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
4974                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
4975                      [],
4976                      NoItinerary> {
4977   bits<4> Imm;
4978 }
4979
4980 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
4981   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4982 }
4983 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
4984   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4985 }
4986 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
4987   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
4988 }
4989 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
4990   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
4991 }
4992
4993
4994 //===----------------------------------------------------------------------===//
4995 // Non-Instruction Patterns
4996 //===----------------------------------------------------------------------===//
4997
4998 // 64-bit vector bitcasts...
4999
5000 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
5001 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
5002 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
5003 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
5004
5005 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
5006 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
5007 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
5008 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
5009
5010 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
5011 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
5012 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
5013 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
5014
5015 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
5016 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
5017 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
5018 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
5019
5020 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
5021 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
5022 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
5023 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
5024
5025 // ..and 128-bit vector bitcasts...
5026
5027 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
5028 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
5029 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
5030 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
5031 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
5032
5033 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
5034 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
5035 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
5036 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
5037 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
5038
5039 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
5040 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
5041 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
5042 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
5043 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
5044
5045 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
5046 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
5047 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
5048 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
5049 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
5050
5051 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
5052 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
5053 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
5054 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
5055 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
5056
5057 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
5058 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
5059 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
5060 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
5061 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
5062
5063
5064 // ...and scalar bitcasts...
5065 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
5066 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
5067 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
5068 def : Pat<(f32 (bitconvert (v1f32  FPR32:$src))), (f32 FPR32:$src)>;
5069 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
5070
5071 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
5072 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
5073
5074 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
5075 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
5076 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
5077
5078 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
5079 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
5080 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
5081 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
5082 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
5083
5084 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
5085 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
5086 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
5087 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
5088 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
5089 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
5090
5091 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
5092 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
5093 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
5094 def : Pat<(v1f32 (bitconvert (f32  FPR32:$src))), (v1f32 FPR32:$src)>;
5095 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
5096
5097 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
5098 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
5099
5100 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
5101 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
5102 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
5103 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
5104 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
5105
5106 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
5107 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
5108 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
5109 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
5110 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
5111 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
5112
5113 def neon_uimm3 : Operand<i64>,
5114                    ImmLeaf<i64, [{return Imm < 8;}]> {
5115   let ParserMatchClass = uimm3_asmoperand;
5116   let PrintMethod = "printUImmHexOperand";
5117 }
5118
5119 def neon_uimm4 : Operand<i64>,
5120                    ImmLeaf<i64, [{return Imm < 16;}]> {
5121   let ParserMatchClass = uimm4_asmoperand;
5122   let PrintMethod = "printUImmHexOperand";
5123 }
5124
5125 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
5126                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
5127   : NeonI_copy<0b1, 0b0, 0b0011,
5128                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
5129                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
5130                [(set (ResTy VPR128:$Rd),
5131                  (ResTy (vector_insert
5132                    (ResTy VPR128:$src),
5133                    (OpTy OpGPR:$Rn),
5134                    (OpImm:$Imm))))],
5135                NoItinerary> {
5136   bits<4> Imm;
5137   let Constraints = "$src = $Rd";
5138 }
5139
5140 // Bitwise Extract
5141 class NeonI_Extract<bit q, bits<2> op2, string asmop,
5142                     string OpS, RegisterOperand OpVPR, Operand OpImm>
5143   : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
5144                      (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
5145                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS # 
5146                      ", $Rm." # OpS # ", $Index",
5147                      [],
5148                      NoItinerary>{
5149   bits<4> Index;
5150 }
5151
5152 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
5153                                VPR64, neon_uimm3> {
5154   let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
5155 }
5156
5157 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
5158                                VPR128, neon_uimm4> {
5159   let Inst{14-11} = Index;
5160 }
5161
5162 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
5163                  Operand OpImm> 
5164   : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
5165                                  (i64 OpImm:$Imm))),
5166               (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
5167
5168 def : NI_Extract<v8i8,  VPR64,  EXTvvvi_8b,  neon_uimm3>;
5169 def : NI_Extract<v4i16, VPR64,  EXTvvvi_8b,  neon_uimm3>;
5170 def : NI_Extract<v2i32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
5171 def : NI_Extract<v1i64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
5172 def : NI_Extract<v2f32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
5173 def : NI_Extract<v1f64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
5174 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
5175 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
5176 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
5177 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
5178 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
5179 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
5180
5181 // Table lookup
5182 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
5183              string asmop, string OpS, RegisterOperand OpVPR,
5184              RegisterOperand VecList>
5185   : NeonI_TBL<q, op2, len, op,
5186               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
5187               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
5188               [],
5189               NoItinerary>;
5190
5191 // The vectors in look up table are always 16b
5192 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
5193   def _8b  : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
5194                     !cast<RegisterOperand>(List # "16B_operand")>;
5195
5196   def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
5197                     !cast<RegisterOperand>(List # "16B_operand")>;
5198 }
5199
5200 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
5201 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
5202 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
5203 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
5204
5205 // Table lookup extention
5206 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
5207              string asmop, string OpS, RegisterOperand OpVPR,
5208              RegisterOperand VecList>
5209   : NeonI_TBL<q, op2, len, op,
5210               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
5211               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
5212               [],
5213               NoItinerary> {
5214   let Constraints = "$src = $Rd";
5215 }
5216
5217 // The vectors in look up table are always 16b
5218 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
5219   def _8b  : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
5220                     !cast<RegisterOperand>(List # "16B_operand")>;
5221
5222   def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
5223                     !cast<RegisterOperand>(List # "16B_operand")>;
5224 }
5225
5226 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
5227 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
5228 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
5229 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
5230
5231 // The followings are for instruction class (3V Elem)
5232
5233 // Variant 1
5234
5235 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
5236              string asmop, string ResS, string OpS, string EleOpS,
5237              Operand OpImm, RegisterOperand ResVPR,
5238              RegisterOperand OpVPR, RegisterOperand EleOpVPR>
5239   : NeonI_2VElem<q, u, size, opcode, 
5240                  (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
5241                                          EleOpVPR:$Re, OpImm:$Index),
5242                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
5243                  ", $Re." # EleOpS # "[$Index]",
5244                  [],
5245                  NoItinerary> {
5246   bits<3> Index;
5247   bits<5> Re;
5248
5249   let Constraints = "$src = $Rd";
5250 }
5251
5252 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
5253   // vector register class for element is always 128-bit to cover the max index
5254   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5255                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
5256     let Inst{11} = {Index{1}};
5257     let Inst{21} = {Index{0}};
5258     let Inst{20-16} = Re;
5259   }
5260
5261   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5262                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
5263     let Inst{11} = {Index{1}};
5264     let Inst{21} = {Index{0}};
5265     let Inst{20-16} = Re;
5266   }
5267
5268   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5269   def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
5270                      neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
5271     let Inst{11} = {Index{2}};
5272     let Inst{21} = {Index{1}};
5273     let Inst{20} = {Index{0}};
5274     let Inst{19-16} = Re{3-0};
5275   }
5276
5277   def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
5278                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5279     let Inst{11} = {Index{2}};
5280     let Inst{21} = {Index{1}};
5281     let Inst{20} = {Index{0}};
5282     let Inst{19-16} = Re{3-0};
5283   }
5284 }
5285
5286 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
5287 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
5288
5289 // Pattern for lane in 128-bit vector
5290 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5291                    RegisterOperand ResVPR, RegisterOperand OpVPR,
5292                    RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
5293                    ValueType EleOpTy, SDPatternOperator coreop>
5294   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
5295           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5296         (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5297
5298 // Pattern for lane in 64-bit vector
5299 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5300                   RegisterOperand ResVPR, RegisterOperand OpVPR,
5301                   RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
5302                   ValueType EleOpTy, SDPatternOperator coreop>
5303   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
5304           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5305         (INST ResVPR:$src, OpVPR:$Rn, 
5306           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5307
5308 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
5309 {
5310   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5311                      op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
5312                      BinOpFrag<(Neon_vduplane
5313                                  (Neon_low4S node:$LHS), node:$RHS)>>;
5314
5315   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5316                      op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
5317                      BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5318
5319   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
5320                      op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
5321                      BinOpFrag<(Neon_vduplane
5322                                  (Neon_low8H node:$LHS), node:$RHS)>>;
5323
5324   def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
5325                      op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
5326                      BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5327
5328   // Index can only be half of the max value for lane in 64-bit vector
5329
5330   def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5331                     op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
5332                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5333
5334   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5335                     op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
5336                     BinOpFrag<(Neon_vduplane
5337                                 (Neon_combine_4S node:$LHS, undef),
5338                                  node:$RHS)>>;
5339
5340   def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
5341                     op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
5342                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5343
5344   def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
5345                     op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
5346                     BinOpFrag<(Neon_vduplane
5347                                 (Neon_combine_8H node:$LHS, undef),
5348                                 node:$RHS)>>;
5349 }
5350
5351 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
5352 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
5353
5354 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
5355                  string asmop, string ResS, string OpS, string EleOpS,
5356                  Operand OpImm, RegisterOperand ResVPR,
5357                  RegisterOperand OpVPR, RegisterOperand EleOpVPR>
5358   : NeonI_2VElem<q, u, size, opcode, 
5359                  (outs ResVPR:$Rd), (ins OpVPR:$Rn,
5360                                          EleOpVPR:$Re, OpImm:$Index),
5361                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
5362                  ", $Re." # EleOpS # "[$Index]",
5363                  [],
5364                  NoItinerary> {
5365   bits<3> Index;
5366   bits<5> Re;
5367 }
5368
5369 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
5370   // vector register class for element is always 128-bit to cover the max index
5371   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5372                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
5373     let Inst{11} = {Index{1}};
5374     let Inst{21} = {Index{0}};
5375     let Inst{20-16} = Re;
5376   }
5377
5378   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5379                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
5380     let Inst{11} = {Index{1}};
5381     let Inst{21} = {Index{0}};
5382     let Inst{20-16} = Re;
5383   }
5384
5385   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5386   def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
5387                          neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
5388     let Inst{11} = {Index{2}};
5389     let Inst{21} = {Index{1}};
5390     let Inst{20} = {Index{0}};
5391     let Inst{19-16} = Re{3-0};
5392   }
5393
5394   def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
5395                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5396     let Inst{11} = {Index{2}};
5397     let Inst{21} = {Index{1}};
5398     let Inst{20} = {Index{0}};
5399     let Inst{19-16} = Re{3-0};
5400   }
5401 }
5402
5403 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
5404 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
5405 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
5406
5407 // Pattern for lane in 128-bit vector
5408 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5409                        RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5410                        ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5411                        SDPatternOperator coreop>
5412   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5413           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5414         (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5415
5416 // Pattern for lane in 64-bit vector
5417 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5418                       RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5419                       ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5420                       SDPatternOperator coreop>
5421   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5422           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5423         (INST OpVPR:$Rn, 
5424           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5425
5426 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
5427   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5428                          op, VPR64, VPR128, v2i32, v2i32, v4i32,
5429                          BinOpFrag<(Neon_vduplane
5430                                      (Neon_low4S node:$LHS), node:$RHS)>>;
5431
5432   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5433                          op, VPR128, VPR128, v4i32, v4i32, v4i32,
5434                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5435
5436   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
5437                          op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
5438                          BinOpFrag<(Neon_vduplane
5439                                     (Neon_low8H node:$LHS), node:$RHS)>>;
5440
5441   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
5442                          op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
5443                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5444
5445   // Index can only be half of the max value for lane in 64-bit vector
5446
5447   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5448                         op, VPR64, VPR64, v2i32, v2i32, v2i32,
5449                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5450
5451   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5452                         op, VPR128, VPR64, v4i32, v4i32, v2i32,
5453                         BinOpFrag<(Neon_vduplane
5454                                     (Neon_combine_4S node:$LHS, undef),
5455                                      node:$RHS)>>;
5456
5457   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
5458                         op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
5459                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5460
5461   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
5462                         op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
5463                         BinOpFrag<(Neon_vduplane
5464                                     (Neon_combine_8H node:$LHS, undef),
5465                                     node:$RHS)>>;
5466 }
5467
5468 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
5469 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
5470 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
5471
5472 // Variant 2
5473
5474 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
5475   // vector register class for element is always 128-bit to cover the max index
5476   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5477                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
5478     let Inst{11} = {Index{1}};
5479     let Inst{21} = {Index{0}};
5480     let Inst{20-16} = Re;
5481   }
5482
5483   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5484                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
5485     let Inst{11} = {Index{1}};
5486     let Inst{21} = {Index{0}};
5487     let Inst{20-16} = Re;
5488   }
5489
5490   // _1d2d doesn't exist!
5491
5492   def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
5493                          neon_uimm1_bare, VPR128, VPR128, VPR128> {
5494     let Inst{11} = {Index{0}};
5495     let Inst{21} = 0b0;
5496     let Inst{20-16} = Re;
5497   }
5498 }
5499
5500 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
5501 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
5502
5503 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
5504                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5505                          ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5506                          SDPatternOperator coreop>
5507   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5508           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
5509         (INST OpVPR:$Rn, 
5510           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
5511
5512 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
5513   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5514                          op, VPR64, VPR128, v2f32, v2f32, v4f32,
5515                          BinOpFrag<(Neon_vduplane
5516                                      (Neon_low4f node:$LHS), node:$RHS)>>;
5517
5518   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5519                          op, VPR128, VPR128, v4f32, v4f32, v4f32,
5520                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5521
5522   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
5523                          op, VPR128, VPR128, v2f64, v2f64, v2f64,
5524                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5525
5526   // Index can only be half of the max value for lane in 64-bit vector
5527
5528   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5529                         op, VPR64, VPR64, v2f32, v2f32, v2f32,
5530                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5531
5532   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5533                         op, VPR128, VPR64, v4f32, v4f32, v2f32,
5534                         BinOpFrag<(Neon_vduplane
5535                                     (Neon_combine_4f node:$LHS, undef),
5536                                     node:$RHS)>>;
5537
5538   def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
5539                            op, VPR128, VPR64, v2f64, v2f64, v1f64,
5540                            BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
5541 }
5542
5543 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
5544 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
5545
5546 // The followings are patterns using fma
5547 // -ffp-contract=fast generates fma
5548
5549 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
5550   // vector register class for element is always 128-bit to cover the max index
5551   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5552                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
5553     let Inst{11} = {Index{1}};
5554     let Inst{21} = {Index{0}};
5555     let Inst{20-16} = Re;
5556   }
5557
5558   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5559                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
5560     let Inst{11} = {Index{1}};
5561     let Inst{21} = {Index{0}};
5562     let Inst{20-16} = Re;
5563   }
5564
5565   // _1d2d doesn't exist!
5566   
5567   def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
5568                      neon_uimm1_bare, VPR128, VPR128, VPR128> {
5569     let Inst{11} = {Index{0}};
5570     let Inst{21} = 0b0;
5571     let Inst{20-16} = Re;
5572   }
5573 }
5574
5575 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
5576 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
5577
5578 // Pattern for lane in 128-bit vector
5579 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5580                        RegisterOperand ResVPR, RegisterOperand OpVPR,
5581                        ValueType ResTy, ValueType OpTy,
5582                        SDPatternOperator coreop>
5583   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
5584                    (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
5585         (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
5586
5587 // Pattern for lane in 64-bit vector
5588 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5589                       RegisterOperand ResVPR, RegisterOperand OpVPR,
5590                       ValueType ResTy, ValueType OpTy,
5591                       SDPatternOperator coreop>
5592   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
5593                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
5594         (INST ResVPR:$src, ResVPR:$Rn, 
5595           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
5596
5597 // Pattern for lane in 64-bit vector
5598 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
5599                            SDPatternOperator op,
5600                            RegisterOperand ResVPR, RegisterOperand OpVPR,
5601                            ValueType ResTy, ValueType OpTy,
5602                            SDPatternOperator coreop>
5603   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
5604                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
5605         (INST ResVPR:$src, ResVPR:$Rn, 
5606           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
5607
5608
5609 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
5610   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5611                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5612                          BinOpFrag<(Neon_vduplane
5613                                      (Neon_low4f node:$LHS), node:$RHS)>>;
5614
5615   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5616                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5617                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5618
5619   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5620                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5621                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5622
5623   // Index can only be half of the max value for lane in 64-bit vector
5624
5625   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5626                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5627                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5628
5629   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5630                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5631                         BinOpFrag<(Neon_vduplane
5632                                     (Neon_combine_4f node:$LHS, undef),
5633                                     node:$RHS)>>;
5634
5635   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5636                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5637                              BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
5638 }
5639
5640 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
5641
5642 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
5643 {
5644   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5645                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5646                          BinOpFrag<(fneg (Neon_vduplane
5647                                      (Neon_low4f node:$LHS), node:$RHS))>>;
5648
5649   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5650                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5651                          BinOpFrag<(Neon_vduplane
5652                                      (Neon_low4f (fneg node:$LHS)),
5653                                      node:$RHS)>>;
5654
5655   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5656                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5657                          BinOpFrag<(fneg (Neon_vduplane
5658                                      node:$LHS, node:$RHS))>>;
5659
5660   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5661                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5662                          BinOpFrag<(Neon_vduplane
5663                                      (fneg node:$LHS), node:$RHS)>>;
5664
5665   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5666                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5667                          BinOpFrag<(fneg (Neon_vduplane
5668                                      node:$LHS, node:$RHS))>>;
5669
5670   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5671                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5672                          BinOpFrag<(Neon_vduplane
5673                                      (fneg node:$LHS), node:$RHS)>>;
5674
5675   // Index can only be half of the max value for lane in 64-bit vector
5676
5677   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5678                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5679                         BinOpFrag<(fneg (Neon_vduplane
5680                                     node:$LHS, node:$RHS))>>;
5681
5682   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5683                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5684                         BinOpFrag<(Neon_vduplane
5685                                     (fneg node:$LHS), node:$RHS)>>;
5686
5687   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5688                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5689                         BinOpFrag<(fneg (Neon_vduplane
5690                                     (Neon_combine_4f node:$LHS, undef),
5691                                     node:$RHS))>>;
5692
5693   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5694                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5695                         BinOpFrag<(Neon_vduplane
5696                                     (Neon_combine_4f (fneg node:$LHS), undef),
5697                                     node:$RHS)>>;
5698
5699   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5700                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5701                              BinOpFrag<(fneg (Neon_combine_2d
5702                                          node:$LHS, node:$RHS))>>;
5703
5704   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5705                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5706                              BinOpFrag<(Neon_combine_2d
5707                                          (fneg node:$LHS), (fneg node:$RHS))>>;
5708 }
5709
5710 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
5711
5712 // Variant 3: Long type
5713 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
5714 //      SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
5715
5716 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
5717   // vector register class for element is always 128-bit to cover the max index
5718   def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
5719                      neon_uimm2_bare, VPR128, VPR64, VPR128> {
5720     let Inst{11} = {Index{1}};
5721     let Inst{21} = {Index{0}};
5722     let Inst{20-16} = Re;
5723   }
5724   
5725   def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
5726                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
5727     let Inst{11} = {Index{1}};
5728     let Inst{21} = {Index{0}};
5729     let Inst{20-16} = Re;
5730   }
5731
5732   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5733   def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
5734                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5735     let Inst{11} = {Index{2}};
5736     let Inst{21} = {Index{1}};
5737     let Inst{20} = {Index{0}};
5738     let Inst{19-16} = Re{3-0};
5739   }
5740   
5741   def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
5742                      neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
5743     let Inst{11} = {Index{2}};
5744     let Inst{21} = {Index{1}};
5745     let Inst{20} = {Index{0}};
5746     let Inst{19-16} = Re{3-0};
5747   }
5748 }
5749
5750 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
5751 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
5752 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
5753 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
5754 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
5755 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
5756
5757 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
5758   // vector register class for element is always 128-bit to cover the max index
5759   def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
5760                          neon_uimm2_bare, VPR128, VPR64, VPR128> {
5761     let Inst{11} = {Index{1}};
5762     let Inst{21} = {Index{0}};
5763     let Inst{20-16} = Re;
5764   }
5765   
5766   def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
5767                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
5768     let Inst{11} = {Index{1}};
5769     let Inst{21} = {Index{0}};
5770     let Inst{20-16} = Re;
5771   }
5772
5773   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5774   def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
5775                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5776     let Inst{11} = {Index{2}};
5777     let Inst{21} = {Index{1}};
5778     let Inst{20} = {Index{0}};
5779     let Inst{19-16} = Re{3-0};
5780   }
5781   
5782   def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
5783                          neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
5784     let Inst{11} = {Index{2}};
5785     let Inst{21} = {Index{1}};
5786     let Inst{20} = {Index{0}};
5787     let Inst{19-16} = Re{3-0};
5788   }
5789 }
5790
5791 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
5792 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
5793 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
5794
5795 // Pattern for lane in 128-bit vector
5796 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5797                      RegisterOperand EleOpVPR, ValueType ResTy,
5798                      ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5799                      SDPatternOperator hiop, SDPatternOperator coreop>
5800   : Pat<(ResTy (op (ResTy VPR128:$src),
5801           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5802           (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5803         (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5804
5805 // Pattern for lane in 64-bit vector
5806 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5807                     RegisterOperand EleOpVPR, ValueType ResTy,
5808                     ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5809                     SDPatternOperator hiop, SDPatternOperator coreop>
5810   : Pat<(ResTy (op (ResTy VPR128:$src),
5811           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5812           (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5813         (INST VPR128:$src, VPR128:$Rn, 
5814           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5815
5816 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
5817   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5818                      op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
5819                      BinOpFrag<(Neon_vduplane
5820                                  (Neon_low8H node:$LHS), node:$RHS)>>;
5821   
5822   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5823                      op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
5824                      BinOpFrag<(Neon_vduplane
5825                                  (Neon_low4S node:$LHS), node:$RHS)>>;
5826   
5827   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5828                        op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
5829                        BinOpFrag<(Neon_vduplane
5830                                    (Neon_low8H node:$LHS), node:$RHS)>>;
5831   
5832   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5833                        op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5834                        BinOpFrag<(Neon_vduplane
5835                                    (Neon_low4S node:$LHS), node:$RHS)>>;
5836   
5837   // Index can only be half of the max value for lane in 64-bit vector
5838
5839   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5840                     op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
5841                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5842   
5843   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5844                     op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
5845                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5846
5847   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5848                       op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5849                       BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5850   
5851   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5852                       op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5853                       BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5854 }
5855
5856 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
5857 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
5858 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
5859 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
5860
5861 // Pattern for lane in 128-bit vector
5862 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5863                          RegisterOperand EleOpVPR, ValueType ResTy,
5864                          ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5865                          SDPatternOperator hiop, SDPatternOperator coreop>
5866   : Pat<(ResTy (op 
5867           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5868           (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5869         (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5870
5871 // Pattern for lane in 64-bit vector
5872 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5873                         RegisterOperand EleOpVPR, ValueType ResTy,
5874                         ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5875                         SDPatternOperator hiop, SDPatternOperator coreop>
5876   : Pat<(ResTy (op
5877           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5878           (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5879         (INST VPR128:$Rn, 
5880           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5881
5882 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
5883   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5884                          op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
5885                          BinOpFrag<(Neon_vduplane
5886                                      (Neon_low8H node:$LHS), node:$RHS)>>;
5887
5888   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5889                          op, VPR64, VPR128, v2i64, v2i32, v4i32,
5890                          BinOpFrag<(Neon_vduplane
5891                                      (Neon_low4S node:$LHS), node:$RHS)>>;
5892
5893   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5894                            op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
5895                            Neon_High8H,
5896                            BinOpFrag<(Neon_vduplane
5897                                        (Neon_low8H node:$LHS), node:$RHS)>>;
5898   
5899   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5900                            op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5901                            BinOpFrag<(Neon_vduplane
5902                                        (Neon_low4S node:$LHS), node:$RHS)>>;
5903   
5904   // Index can only be half of the max value for lane in 64-bit vector
5905
5906   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5907                         op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
5908                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5909
5910   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5911                         op, VPR64, VPR64, v2i64, v2i32, v2i32,
5912                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5913
5914   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5915                           op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5916                           BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5917   
5918   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5919                           op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5920                           BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5921 }
5922
5923 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
5924 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
5925 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
5926
5927 multiclass NI_qdma<SDPatternOperator op> {
5928   def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5929                     (op node:$Ra,
5930                       (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5931
5932   def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5933                     (op node:$Ra,
5934                       (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5935 }
5936
5937 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
5938 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
5939
5940 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
5941   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5942                      !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
5943                      v4i32, v4i16, v8i16,
5944                      BinOpFrag<(Neon_vduplane
5945                                  (Neon_low8H node:$LHS), node:$RHS)>>;
5946   
5947   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5948                      !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
5949                      v2i64, v2i32, v4i32,
5950                      BinOpFrag<(Neon_vduplane
5951                                  (Neon_low4S node:$LHS), node:$RHS)>>;
5952   
5953   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5954                        !cast<PatFrag>(op # "_4s"), VPR128Lo,
5955                        v4i32, v8i16, v8i16, v4i16, Neon_High8H,
5956                        BinOpFrag<(Neon_vduplane
5957                                    (Neon_low8H node:$LHS), node:$RHS)>>;
5958   
5959   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5960                        !cast<PatFrag>(op # "_2d"), VPR128,
5961                        v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5962                        BinOpFrag<(Neon_vduplane
5963                                    (Neon_low4S node:$LHS), node:$RHS)>>;
5964   
5965   // Index can only be half of the max value for lane in 64-bit vector
5966
5967   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5968                     !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
5969                     v4i32, v4i16, v4i16,
5970                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5971   
5972   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5973                     !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
5974                     v2i64, v2i32, v2i32,
5975                     BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5976
5977   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5978                       !cast<PatFrag>(op # "_4s"), VPR64Lo,
5979                       v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5980                       BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5981   
5982   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5983                       !cast<PatFrag>(op # "_2d"), VPR64,
5984                       v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5985                       BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5986 }
5987
5988 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
5989 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
5990
5991 // End of implementation for instruction class (3V Elem)
5992
5993 //Insert element (vector, from main)
5994 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
5995                            neon_uimm4_bare> {
5996   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5997 }
5998 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
5999                            neon_uimm3_bare> {
6000   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6001 }
6002 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6003                            neon_uimm2_bare> {
6004   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6005 }
6006 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6007                            neon_uimm1_bare> {
6008   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6009 }
6010
6011 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6012                              RegisterClass OpGPR, ValueType OpTy, 
6013                              Operand OpImm, Instruction INS> 
6014   : Pat<(ResTy (vector_insert
6015               (ResTy VPR64:$src),
6016               (OpTy OpGPR:$Rn),
6017               (OpImm:$Imm))),
6018         (ResTy (EXTRACT_SUBREG 
6019           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6020             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6021
6022 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6023                                           neon_uimm3_bare, INSbw>;
6024 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6025                                           neon_uimm2_bare, INShw>;
6026 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6027                                           neon_uimm1_bare, INSsw>;
6028 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6029                                           neon_uimm0_bare, INSdx>;
6030
6031 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6032   : NeonI_insert<0b1, 0b1,
6033                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, 
6034                  ResImm:$Immd, ResImm:$Immn),
6035                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6036                  [],
6037                  NoItinerary> {
6038   let Constraints = "$src = $Rd";
6039   bits<4> Immd;
6040   bits<4> Immn;
6041 }
6042
6043 //Insert element (vector, from element)
6044 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6045   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6046   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6047 }
6048 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6049   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6050   let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
6051   // bit 11 is unspecified.
6052 }
6053 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6054   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6055   let Inst{14-13} = {Immn{1}, Immn{0}};
6056   // bits 11-12 are unspecified.
6057 }
6058 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6059   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6060   let Inst{14} = Immn{0};
6061   // bits 11-13 are unspecified.
6062 }
6063
6064 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6065                                 ValueType MidTy, Operand StImm, Operand NaImm,
6066                                 Instruction INS> {
6067 def : Pat<(ResTy (vector_insert
6068             (ResTy VPR128:$src),
6069             (MidTy (vector_extract
6070               (ResTy VPR128:$Rn),
6071               (StImm:$Immn))),
6072             (StImm:$Immd))),
6073           (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6074               StImm:$Immd, StImm:$Immn)>;
6075
6076 def : Pat <(ResTy (vector_insert
6077              (ResTy VPR128:$src),
6078              (MidTy (vector_extract
6079                (NaTy VPR64:$Rn),
6080                (NaImm:$Immn))),
6081              (StImm:$Immd))),
6082            (INS (ResTy VPR128:$src),
6083              (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6084              StImm:$Immd, NaImm:$Immn)>;
6085
6086 def : Pat <(NaTy (vector_insert
6087              (NaTy VPR64:$src),
6088              (MidTy (vector_extract
6089                (ResTy VPR128:$Rn),
6090                (StImm:$Immn))),
6091              (NaImm:$Immd))),
6092            (NaTy (EXTRACT_SUBREG
6093              (ResTy (INS
6094                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6095                (ResTy VPR128:$Rn),
6096                NaImm:$Immd, StImm:$Immn)),
6097              sub_64))>;
6098
6099 def : Pat <(NaTy (vector_insert
6100              (NaTy VPR64:$src),
6101              (MidTy (vector_extract
6102                (NaTy VPR64:$Rn),
6103                (NaImm:$Immn))),
6104              (NaImm:$Immd))),
6105            (NaTy (EXTRACT_SUBREG
6106              (ResTy (INS
6107                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6108                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6109                NaImm:$Immd, NaImm:$Immn)),
6110              sub_64))>;
6111 }
6112
6113 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6114                             neon_uimm1_bare, INSELs>;
6115 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6116                             neon_uimm0_bare, INSELd>;
6117 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6118                             neon_uimm3_bare, INSELb>;
6119 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6120                             neon_uimm2_bare, INSELh>;
6121 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6122                             neon_uimm1_bare, INSELs>;
6123 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6124                             neon_uimm0_bare, INSELd>;
6125
6126 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6127                                       ValueType MidTy,
6128                                       RegisterClass OpFPR, Operand ResImm,
6129                                       SubRegIndex SubIndex, Instruction INS> {
6130 def : Pat <(ResTy (vector_insert
6131              (ResTy VPR128:$src),
6132              (MidTy OpFPR:$Rn),
6133              (ResImm:$Imm))),
6134            (INS (ResTy VPR128:$src),
6135              (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6136              ResImm:$Imm,
6137              (i64 0))>;
6138
6139 def : Pat <(NaTy (vector_insert
6140              (NaTy VPR64:$src),
6141              (MidTy OpFPR:$Rn),
6142              (ResImm:$Imm))),
6143            (NaTy (EXTRACT_SUBREG 
6144              (ResTy (INS 
6145                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6146                (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6147                ResImm:$Imm,
6148                (i64 0))),
6149              sub_64))>;
6150 }
6151
6152 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6153                                   sub_32, INSELs>;
6154 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6155                                   sub_64, INSELd>;
6156
6157 class NeonI_SMOV<string asmop, string Res, bit Q,
6158                  ValueType OpTy, ValueType eleTy,
6159                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6160   : NeonI_copy<Q, 0b0, 0b0101,
6161                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6162                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6163                [(set (ResTy ResGPR:$Rd),
6164                  (ResTy (sext_inreg
6165                    (ResTy (vector_extract
6166                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
6167                    eleTy)))],
6168                NoItinerary> {
6169   bits<4> Imm;
6170 }
6171
6172 //Signed integer move (main, from element)
6173 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6174                         GPR32, i32> {
6175   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6176 }
6177 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6178                         GPR32, i32> {
6179   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6180 }
6181 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6182                         GPR64, i64> {
6183   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6184 }
6185 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6186                         GPR64, i64> {
6187   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6188 }
6189 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6190                         GPR64, i64> {
6191   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6192 }
6193
6194 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6195                                ValueType eleTy, Operand StImm,  Operand NaImm,
6196                                Instruction SMOVI> {
6197   def : Pat<(i64 (sext_inreg
6198               (i64 (anyext
6199                 (i32 (vector_extract
6200                   (StTy VPR128:$Rn), (StImm:$Imm))))),
6201               eleTy)),
6202             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6203   
6204   def : Pat<(i64 (sext
6205               (i32 (vector_extract
6206                 (StTy VPR128:$Rn), (StImm:$Imm))))),
6207             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6208   
6209   def : Pat<(i64 (sext_inreg
6210               (i64 (vector_extract
6211                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6212               eleTy)),
6213             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6214               NaImm:$Imm)>;
6215   
6216   def : Pat<(i64 (sext_inreg
6217               (i64 (anyext
6218                 (i32 (vector_extract
6219                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6220               eleTy)),
6221             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6222               NaImm:$Imm)>;
6223   
6224   def : Pat<(i64 (sext
6225               (i32 (vector_extract
6226                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6227             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6228               NaImm:$Imm)>; 
6229 }
6230
6231 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6232                           neon_uimm3_bare, SMOVxb>;
6233 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6234                           neon_uimm2_bare, SMOVxh>;
6235 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6236                           neon_uimm1_bare, SMOVxs>;
6237
6238 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6239                           ValueType eleTy, Operand StImm,  Operand NaImm,
6240                           Instruction SMOVI>
6241   : Pat<(i32 (sext_inreg
6242           (i32 (vector_extract
6243             (NaTy VPR64:$Rn), (NaImm:$Imm))),
6244           eleTy)),
6245         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6246           NaImm:$Imm)>;
6247
6248 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6249                          neon_uimm3_bare, SMOVwb>;
6250 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6251                          neon_uimm2_bare, SMOVwh>;
6252
6253 class NeonI_UMOV<string asmop, string Res, bit Q,
6254                  ValueType OpTy, Operand OpImm,
6255                  RegisterClass ResGPR, ValueType ResTy>
6256   : NeonI_copy<Q, 0b0, 0b0111,
6257                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6258                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6259                [(set (ResTy ResGPR:$Rd),
6260                   (ResTy (vector_extract
6261                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6262                NoItinerary> {
6263   bits<4> Imm;
6264 }
6265
6266 //Unsigned integer move (main, from element)
6267 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6268                          GPR32, i32> {
6269   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6270 }
6271 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6272                          GPR32, i32> {
6273   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6274 }
6275 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6276                          GPR32, i32> {
6277   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6278 }
6279 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6280                          GPR64, i64> {
6281   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6282 }
6283
6284 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6285                          Operand StImm,  Operand NaImm,
6286                          Instruction SMOVI>
6287   : Pat<(ResTy (vector_extract
6288           (NaTy VPR64:$Rn), NaImm:$Imm)),
6289         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6290           NaImm:$Imm)>;
6291
6292 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6293                         neon_uimm3_bare, UMOVwb>;
6294 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6295                         neon_uimm2_bare, UMOVwh>; 
6296 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6297                         neon_uimm1_bare, UMOVws>;
6298
6299 def : Pat<(i32 (and
6300             (i32 (vector_extract
6301               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6302             255)),
6303           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6304
6305 def : Pat<(i32 (and
6306             (i32 (vector_extract
6307               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6308             65535)),
6309           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6310
6311 def : Pat<(i64 (zext
6312             (i32 (vector_extract
6313               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6314           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6315
6316 def : Pat<(i32 (and
6317             (i32 (vector_extract
6318               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6319             255)),
6320           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6321             neon_uimm3_bare:$Imm)>;
6322
6323 def : Pat<(i32 (and
6324             (i32 (vector_extract
6325               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6326             65535)),
6327           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6328             neon_uimm2_bare:$Imm)>;
6329
6330 def : Pat<(i64 (zext
6331             (i32 (vector_extract
6332               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6333           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6334             neon_uimm0_bare:$Imm)>;
6335
6336 // Additional copy patterns for scalar types
6337 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6338           (UMOVwb (v16i8
6339             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6340
6341 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6342           (UMOVwh (v8i16
6343             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6344
6345 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6346           (FMOVws FPR32:$Rn)>;
6347
6348 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6349           (FMOVxd FPR64:$Rn)>;
6350                
6351 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6352           (f64 FPR64:$Rn)>;
6353
6354 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
6355           (f32 FPR32:$Rn)>;
6356
6357 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6358           (v1i8 (EXTRACT_SUBREG (v16i8
6359             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6360             sub_8))>;
6361
6362 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6363           (v1i16 (EXTRACT_SUBREG (v8i16
6364             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6365             sub_16))>;
6366
6367 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6368           (FMOVsw $src)>;
6369
6370 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6371           (FMOVdx $src)>;
6372
6373 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
6374           (v1f32 FPR32:$Rn)>;
6375 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6376           (v1f64 FPR64:$Rn)>;
6377
6378 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
6379           (FMOVdd $src)>;
6380
6381 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6382           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6383                          (f64 FPR64:$src), sub_64)>;
6384
6385 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
6386                     RegisterOperand ResVPR, Operand OpImm>
6387   : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6388                (ins VPR128:$Rn, OpImm:$Imm),
6389                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6390                [],
6391                NoItinerary> {
6392   bits<4> Imm;
6393 }
6394
6395 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6396                               neon_uimm4_bare> {
6397   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6398 }
6399
6400 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6401                               neon_uimm3_bare> {
6402   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6403 }
6404
6405 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6406                               neon_uimm2_bare> {
6407   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6408 }
6409
6410 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6411                               neon_uimm1_bare> {
6412   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6413 }
6414
6415 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6416                               neon_uimm4_bare> {
6417   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6418 }
6419
6420 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6421                               neon_uimm3_bare> {
6422   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6423 }
6424
6425 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6426                               neon_uimm2_bare> {
6427   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6428 }
6429
6430 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6431                                        ValueType OpTy,ValueType NaTy,
6432                                        ValueType ExTy, Operand OpLImm,
6433                                        Operand OpNImm> {
6434 def  : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6435         (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6436
6437 def : Pat<(ResTy (Neon_vduplane
6438             (NaTy VPR64:$Rn), OpNImm:$Imm)),
6439           (ResTy (DUPELT
6440             (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6441 }
6442 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6443                              neon_uimm4_bare, neon_uimm3_bare>;
6444 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6445                              neon_uimm4_bare, neon_uimm3_bare>;
6446 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6447                              neon_uimm3_bare, neon_uimm2_bare>;
6448 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6449                              neon_uimm3_bare, neon_uimm2_bare>;
6450 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6451                              neon_uimm2_bare, neon_uimm1_bare>;
6452 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6453                              neon_uimm2_bare, neon_uimm1_bare>;
6454 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6455                              neon_uimm1_bare, neon_uimm0_bare>;
6456 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6457                              neon_uimm2_bare, neon_uimm1_bare>;
6458 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6459                              neon_uimm2_bare, neon_uimm1_bare>;
6460 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6461                              neon_uimm1_bare, neon_uimm0_bare>;
6462
6463 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6464           (v2f32 (DUPELT2s 
6465             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6466             (i64 0)))>;
6467 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6468           (v4f32 (DUPELT4s 
6469             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6470             (i64 0)))>;
6471 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6472           (v2f64 (DUPELT2d 
6473             (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6474             (i64 0)))>;
6475
6476 class NeonI_DUP<bit Q, string asmop, string rdlane,
6477                 RegisterOperand ResVPR, ValueType ResTy,
6478                 RegisterClass OpGPR, ValueType OpTy>
6479   : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6480                asmop # "\t$Rd" # rdlane # ", $Rn",
6481                [(set (ResTy ResVPR:$Rd), 
6482                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6483                NoItinerary>;
6484
6485 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6486   let Inst{16} = 0b1;
6487   // bits 17-19 are unspecified.
6488 }
6489
6490 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6491   let Inst{17-16} = 0b10;
6492   // bits 18-19 are unspecified.
6493 }
6494
6495 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6496   let Inst{18-16} = 0b100;
6497   // bit 19 is unspecified.
6498 }
6499
6500 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6501   let Inst{19-16} = 0b1000;
6502 }
6503
6504 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6505   let Inst{16} = 0b1;
6506   // bits 17-19 are unspecified.
6507 }
6508
6509 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6510   let Inst{17-16} = 0b10;
6511   // bits 18-19 are unspecified.
6512 }
6513
6514 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6515   let Inst{18-16} = 0b100;
6516   // bit 19 is unspecified.
6517 }
6518
6519 // patterns for CONCAT_VECTORS
6520 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6521 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6522           (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6523 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6524           (INSELd 
6525             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6526             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6527             (i64 1),
6528             (i64 0))>;
6529 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6530           (DUPELT2d 
6531             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6532             (i64 0))> ;
6533 }
6534
6535 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6536 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6537 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6538 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6539 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6540 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6541
6542 //patterns for EXTRACT_SUBVECTOR
6543 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6544           (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6545 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6546           (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6547 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6548           (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6549 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6550           (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6551 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6552           (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6553 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6554           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6555
6556 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
6557                 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
6558                 SDPatternOperator Neon_Rev>
6559   : NeonI_2VMisc<Q, U, size, opcode,
6560                (outs ResVPR:$Rd), (ins ResVPR:$Rn),
6561                asmop # "\t$Rd." # Res # ", $Rn." # Res,
6562                [(set (ResTy ResVPR:$Rd),
6563                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
6564                NoItinerary> ;
6565
6566 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
6567                           v16i8, Neon_rev64>;
6568 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
6569                          v8i16, Neon_rev64>;
6570 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
6571                          v4i32, Neon_rev64>;
6572 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
6573                          v8i8, Neon_rev64>;
6574 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
6575                          v4i16, Neon_rev64>;
6576 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
6577                          v2i32, Neon_rev64>;
6578
6579 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
6580 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
6581
6582 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
6583                           v16i8, Neon_rev32>;
6584 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
6585                           v8i16, Neon_rev32>;
6586 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
6587                          v8i8, Neon_rev32>;
6588 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
6589                          v4i16, Neon_rev32>;
6590
6591 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
6592                           v16i8, Neon_rev16>;
6593 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
6594                          v8i8, Neon_rev16>;
6595
6596 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
6597                              SDPatternOperator Neon_Padd> {
6598   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
6599                            (outs VPR128:$Rd), (ins VPR128:$Rn),
6600                            asmop # "\t$Rd.8h, $Rn.16b",
6601                            [(set (v8i16 VPR128:$Rd),
6602                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
6603                            NoItinerary>;
6604   
6605   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
6606                           (outs VPR64:$Rd), (ins VPR64:$Rn),
6607                           asmop # "\t$Rd.4h, $Rn.8b",
6608                           [(set (v4i16 VPR64:$Rd),
6609                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
6610                           NoItinerary>;
6611   
6612   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
6613                            (outs VPR128:$Rd), (ins VPR128:$Rn),
6614                            asmop # "\t$Rd.4s, $Rn.8h",
6615                            [(set (v4i32 VPR128:$Rd),
6616                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
6617                            NoItinerary>;
6618   
6619   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
6620                           (outs VPR64:$Rd), (ins VPR64:$Rn),
6621                           asmop # "\t$Rd.2s, $Rn.4h",
6622                           [(set (v2i32 VPR64:$Rd),
6623                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
6624                           NoItinerary>;
6625   
6626   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
6627                            (outs VPR128:$Rd), (ins VPR128:$Rn),
6628                            asmop # "\t$Rd.2d, $Rn.4s",
6629                            [(set (v2i64 VPR128:$Rd),
6630                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
6631                            NoItinerary>;
6632   
6633   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
6634                           (outs VPR64:$Rd), (ins VPR64:$Rn),
6635                           asmop # "\t$Rd.1d, $Rn.2s",
6636                           [(set (v1i64 VPR64:$Rd),
6637                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
6638                           NoItinerary>;
6639 }
6640
6641 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
6642                                 int_arm_neon_vpaddls>;
6643 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
6644                                 int_arm_neon_vpaddlu>;
6645
6646 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
6647                              SDPatternOperator Neon_Padd> {
6648   let Constraints = "$src = $Rd" in {
6649     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
6650                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6651                              asmop # "\t$Rd.8h, $Rn.16b",
6652                              [(set (v8i16 VPR128:$Rd),
6653                                 (v8i16 (Neon_Padd 
6654                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
6655                              NoItinerary>;
6656     
6657     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
6658                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6659                             asmop # "\t$Rd.4h, $Rn.8b",
6660                             [(set (v4i16 VPR64:$Rd),
6661                                (v4i16 (Neon_Padd 
6662                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
6663                             NoItinerary>;
6664     
6665     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
6666                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6667                             asmop # "\t$Rd.4s, $Rn.8h",
6668                             [(set (v4i32 VPR128:$Rd),
6669                                (v4i32 (Neon_Padd
6670                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
6671                             NoItinerary>;
6672     
6673     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
6674                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6675                             asmop # "\t$Rd.2s, $Rn.4h",
6676                             [(set (v2i32 VPR64:$Rd),
6677                                (v2i32 (Neon_Padd
6678                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
6679                             NoItinerary>;
6680     
6681     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
6682                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6683                             asmop # "\t$Rd.2d, $Rn.4s",
6684                             [(set (v2i64 VPR128:$Rd),
6685                                (v2i64 (Neon_Padd
6686                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
6687                             NoItinerary>;
6688     
6689     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
6690                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6691                             asmop # "\t$Rd.1d, $Rn.2s",
6692                             [(set (v1i64 VPR64:$Rd),
6693                                (v1i64 (Neon_Padd
6694                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
6695                             NoItinerary>;
6696   }
6697 }
6698
6699 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
6700                                    int_arm_neon_vpadals>;
6701 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
6702                                    int_arm_neon_vpadalu>;
6703
6704 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
6705   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
6706                          (outs VPR128:$Rd), (ins VPR128:$Rn),
6707                          asmop # "\t$Rd.16b, $Rn.16b",
6708                          [], NoItinerary>;
6709   
6710   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
6711                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6712                         asmop # "\t$Rd.8h, $Rn.8h",
6713                         [], NoItinerary>;
6714   
6715   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
6716                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6717                         asmop # "\t$Rd.4s, $Rn.4s",
6718                         [], NoItinerary>;
6719   
6720   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
6721                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6722                         asmop # "\t$Rd.2d, $Rn.2d",
6723                         [], NoItinerary>;
6724   
6725   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
6726                          (outs VPR64:$Rd), (ins VPR64:$Rn),
6727                          asmop # "\t$Rd.8b, $Rn.8b",
6728                          [], NoItinerary>;
6729   
6730   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
6731                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6732                         asmop # "\t$Rd.4h, $Rn.4h",
6733                         [], NoItinerary>;
6734   
6735   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
6736                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6737                         asmop # "\t$Rd.2s, $Rn.2s",
6738                         [], NoItinerary>;
6739 }
6740
6741 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
6742 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
6743 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
6744 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
6745
6746 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
6747                                           SDPatternOperator Neon_Op> {
6748   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
6749             (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
6750
6751   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
6752             (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
6753
6754   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
6755             (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
6756
6757   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
6758             (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
6759
6760   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
6761             (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
6762
6763   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
6764             (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
6765
6766   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
6767             (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
6768 }
6769
6770 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
6771 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
6772 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
6773
6774 def : Pat<(v16i8 (sub 
6775             (v16i8 Neon_AllZero),
6776             (v16i8 VPR128:$Rn))),
6777           (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
6778 def : Pat<(v8i8 (sub 
6779             (v8i8 Neon_AllZero),
6780             (v8i8 VPR64:$Rn))),
6781           (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
6782 def : Pat<(v8i16 (sub 
6783             (v8i16 (bitconvert (v16i8 Neon_AllZero))),
6784             (v8i16 VPR128:$Rn))),
6785           (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
6786 def : Pat<(v4i16 (sub 
6787             (v4i16 (bitconvert (v8i8 Neon_AllZero))),
6788             (v4i16 VPR64:$Rn))),
6789           (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
6790 def : Pat<(v4i32 (sub 
6791             (v4i32 (bitconvert (v16i8 Neon_AllZero))),
6792             (v4i32 VPR128:$Rn))),
6793           (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
6794 def : Pat<(v2i32 (sub 
6795             (v2i32 (bitconvert (v8i8 Neon_AllZero))),
6796             (v2i32 VPR64:$Rn))),
6797           (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
6798 def : Pat<(v2i64 (sub 
6799             (v2i64 (bitconvert (v16i8 Neon_AllZero))),
6800             (v2i64 VPR128:$Rn))),
6801           (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
6802
6803 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
6804   let Constraints = "$src = $Rd" in {
6805     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
6806                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6807                            asmop # "\t$Rd.16b, $Rn.16b",
6808                            [], NoItinerary>;
6809     
6810     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
6811                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6812                           asmop # "\t$Rd.8h, $Rn.8h",
6813                           [], NoItinerary>;
6814     
6815     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
6816                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6817                           asmop # "\t$Rd.4s, $Rn.4s",
6818                           [], NoItinerary>;
6819     
6820     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
6821                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6822                           asmop # "\t$Rd.2d, $Rn.2d",
6823                           [], NoItinerary>;
6824     
6825     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
6826                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6827                           asmop # "\t$Rd.8b, $Rn.8b",
6828                           [], NoItinerary>;
6829     
6830     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
6831                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6832                           asmop # "\t$Rd.4h, $Rn.4h",
6833                           [], NoItinerary>;
6834     
6835     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
6836                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6837                           asmop # "\t$Rd.2s, $Rn.2s",
6838                           [], NoItinerary>;
6839   }
6840 }
6841
6842 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
6843 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
6844
6845 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
6846                                            SDPatternOperator Neon_Op> {
6847   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
6848             (v16i8 (!cast<Instruction>(Prefix # 16b)
6849               (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
6850
6851   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
6852             (v8i16 (!cast<Instruction>(Prefix # 8h)
6853               (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
6854
6855   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
6856             (v4i32 (!cast<Instruction>(Prefix # 4s)
6857               (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
6858
6859   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
6860             (v2i64 (!cast<Instruction>(Prefix # 2d)
6861               (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
6862
6863   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
6864             (v8i8 (!cast<Instruction>(Prefix # 8b)
6865               (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
6866
6867   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
6868             (v4i16 (!cast<Instruction>(Prefix # 4h)
6869               (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
6870
6871   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
6872             (v2i32 (!cast<Instruction>(Prefix # 2s)
6873               (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
6874 }
6875
6876 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
6877 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
6878
6879 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
6880                           SDPatternOperator Neon_Op> {
6881   def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
6882                          (outs VPR128:$Rd), (ins VPR128:$Rn),
6883                          asmop # "\t$Rd.16b, $Rn.16b",
6884                          [(set (v16i8 VPR128:$Rd),
6885                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
6886                          NoItinerary>;
6887   
6888   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
6889                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6890                         asmop # "\t$Rd.8h, $Rn.8h",
6891                         [(set (v8i16 VPR128:$Rd),
6892                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
6893                         NoItinerary>;
6894   
6895   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
6896                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6897                         asmop # "\t$Rd.4s, $Rn.4s",
6898                         [(set (v4i32 VPR128:$Rd),
6899                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
6900                         NoItinerary>;
6901   
6902   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
6903                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6904                         asmop # "\t$Rd.8b, $Rn.8b",
6905                         [(set (v8i8 VPR64:$Rd),
6906                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
6907                         NoItinerary>;
6908   
6909   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
6910                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6911                         asmop # "\t$Rd.4h, $Rn.4h",
6912                         [(set (v4i16 VPR64:$Rd),
6913                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
6914                         NoItinerary>;
6915   
6916   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
6917                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6918                         asmop # "\t$Rd.2s, $Rn.2s",
6919                         [(set (v2i32 VPR64:$Rd),
6920                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
6921                         NoItinerary>;
6922 }
6923
6924 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
6925 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
6926
6927 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
6928                               bits<5> Opcode> {
6929   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
6930                          (outs VPR128:$Rd), (ins VPR128:$Rn),
6931                          asmop # "\t$Rd.16b, $Rn.16b",
6932                          [], NoItinerary>;
6933   
6934   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
6935                         (outs VPR64:$Rd), (ins VPR64:$Rn),
6936                         asmop # "\t$Rd.8b, $Rn.8b",
6937                         [], NoItinerary>;
6938 }
6939
6940 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
6941 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
6942 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
6943
6944 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
6945                     (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
6946 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
6947                     (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
6948
6949 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
6950           (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
6951 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
6952           (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
6953
6954 def : Pat<(v16i8 (xor 
6955             (v16i8 VPR128:$Rn),
6956             (v16i8 Neon_AllOne))),
6957           (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
6958 def : Pat<(v8i8 (xor 
6959             (v8i8 VPR64:$Rn),
6960             (v8i8 Neon_AllOne))),
6961           (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
6962 def : Pat<(v8i16 (xor 
6963             (v8i16 VPR128:$Rn),
6964             (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
6965           (NOT16b VPR128:$Rn)>;
6966 def : Pat<(v4i16 (xor 
6967             (v4i16 VPR64:$Rn),
6968             (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
6969           (NOT8b VPR64:$Rn)>;
6970 def : Pat<(v4i32 (xor 
6971             (v4i32 VPR128:$Rn),
6972             (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
6973           (NOT16b VPR128:$Rn)>;
6974 def : Pat<(v2i32 (xor 
6975             (v2i32 VPR64:$Rn),
6976             (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
6977           (NOT8b VPR64:$Rn)>;
6978 def : Pat<(v2i64 (xor 
6979             (v2i64 VPR128:$Rn),
6980             (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
6981           (NOT16b VPR128:$Rn)>;
6982
6983 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
6984           (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
6985 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
6986           (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
6987
6988 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
6989                                 SDPatternOperator Neon_Op> {
6990   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
6991                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6992                         asmop # "\t$Rd.4s, $Rn.4s",
6993                         [(set (v4f32 VPR128:$Rd),
6994                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
6995                         NoItinerary>;
6996   
6997   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
6998                         (outs VPR128:$Rd), (ins VPR128:$Rn),
6999                         asmop # "\t$Rd.2d, $Rn.2d",
7000                         [(set (v2f64 VPR128:$Rd),
7001                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
7002                         NoItinerary>;
7003   
7004   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7005                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7006                         asmop # "\t$Rd.2s, $Rn.2s",
7007                         [(set (v2f32 VPR64:$Rd),
7008                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
7009                         NoItinerary>;
7010 }
7011
7012 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
7013 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
7014
7015 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
7016   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7017                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7018                           asmop # "\t$Rd.8b, $Rn.8h",
7019                           [], NoItinerary>;
7020
7021   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7022                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7023                           asmop # "\t$Rd.4h, $Rn.4s",
7024                           [], NoItinerary>;
7025
7026   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7027                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7028                           asmop # "\t$Rd.2s, $Rn.2d",
7029                           [], NoItinerary>;
7030
7031   let Constraints = "$Rd = $src" in {
7032     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7033                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7034                              asmop # "2\t$Rd.16b, $Rn.8h",
7035                              [], NoItinerary>;
7036   
7037     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7038                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7039                             asmop # "2\t$Rd.8h, $Rn.4s",
7040                             [], NoItinerary>;
7041   
7042     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7043                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7044                             asmop # "2\t$Rd.4s, $Rn.2d",
7045                             [], NoItinerary>;
7046   }
7047 }
7048
7049 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
7050 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
7051 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
7052 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
7053
7054 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix, 
7055                                         SDPatternOperator Neon_Op> {
7056   def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
7057             (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
7058
7059   def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
7060             (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
7061
7062   def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
7063             (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
7064   
7065   def : Pat<(v16i8 (concat_vectors
7066               (v8i8 VPR64:$src),
7067               (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
7068             (!cast<Instruction>(Prefix # 8h16b) 
7069               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7070               VPR128:$Rn)>;
7071
7072   def : Pat<(v8i16 (concat_vectors
7073               (v4i16 VPR64:$src),
7074               (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
7075             (!cast<Instruction>(Prefix # 4s8h)
7076               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7077               VPR128:$Rn)>;
7078
7079   def : Pat<(v4i32 (concat_vectors
7080               (v2i32 VPR64:$src),
7081               (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
7082             (!cast<Instruction>(Prefix # 2d4s)
7083               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7084               VPR128:$Rn)>;
7085 }
7086
7087 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
7088 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
7089 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
7090 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
7091
7092 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
7093   def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7094                           (outs VPR128:$Rd),
7095                           (ins VPR64:$Rn, uimm_exact8:$Imm),
7096                           asmop # "\t$Rd.8h, $Rn.8b, $Imm",
7097                           [], NoItinerary>;
7098
7099   def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7100                           (outs VPR128:$Rd),
7101                           (ins VPR64:$Rn, uimm_exact16:$Imm),
7102                           asmop # "\t$Rd.4s, $Rn.4h, $Imm",
7103                           [], NoItinerary>;
7104
7105   def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7106                           (outs VPR128:$Rd),
7107                           (ins VPR64:$Rn, uimm_exact32:$Imm),
7108                           asmop # "\t$Rd.2d, $Rn.2s, $Imm",
7109                           [], NoItinerary>;
7110
7111   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7112                           (outs VPR128:$Rd),
7113                           (ins VPR128:$Rn, uimm_exact8:$Imm),
7114                           asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
7115                           [], NoItinerary>;
7116
7117   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7118                           (outs VPR128:$Rd),
7119                           (ins VPR128:$Rn, uimm_exact16:$Imm),
7120                           asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
7121                           [], NoItinerary>;
7122
7123   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7124                           (outs VPR128:$Rd),
7125                           (ins VPR128:$Rn, uimm_exact32:$Imm),
7126                           asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
7127                           [], NoItinerary>;
7128 }
7129
7130 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
7131
7132 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
7133                           SDPatternOperator ExtOp, Operand Neon_Imm,
7134                           string suffix> 
7135   : Pat<(DesTy (shl
7136           (DesTy (ExtOp (OpTy VPR64:$Rn))),
7137             (DesTy (Neon_vdup
7138               (i32 Neon_Imm:$Imm))))),
7139         (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
7140     
7141 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
7142                                SDPatternOperator ExtOp, Operand Neon_Imm,
7143                                string suffix, PatFrag GetHigh> 
7144   : Pat<(DesTy (shl
7145           (DesTy (ExtOp
7146             (OpTy (GetHigh VPR128:$Rn)))),
7147               (DesTy (Neon_vdup
7148                 (i32 Neon_Imm:$Imm))))),
7149         (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
7150
7151 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
7152 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
7153 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
7154 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
7155 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
7156 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
7157 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
7158                                Neon_High16B>;
7159 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
7160                                Neon_High16B>;
7161 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
7162                                Neon_High8H>;
7163 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
7164                                Neon_High8H>;
7165 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
7166                                Neon_High4S>;
7167 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
7168                                Neon_High4S>;
7169
7170 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
7171   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7172                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7173                           asmop # "\t$Rd.4h, $Rn.4s",
7174                           [], NoItinerary>;
7175
7176   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7177                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7178                           asmop # "\t$Rd.2s, $Rn.2d",
7179                           [], NoItinerary>;
7180   
7181   let Constraints = "$src = $Rd" in {
7182     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7183                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7184                             asmop # "2\t$Rd.8h, $Rn.4s",
7185                             [], NoItinerary>;
7186   
7187     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7188                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7189                             asmop # "2\t$Rd.4s, $Rn.2d",
7190                             [], NoItinerary>;
7191   }
7192 }
7193
7194 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
7195
7196 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
7197                                        SDPatternOperator f32_to_f16_Op,
7198                                        SDPatternOperator f64_to_f32_Op> {
7199   
7200   def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
7201               (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
7202   
7203   def : Pat<(v8i16 (concat_vectors
7204                 (v4i16 VPR64:$src),
7205                 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
7206                   (!cast<Instruction>(prefix # "4s8h")
7207                     (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7208                     (v4f32 VPR128:$Rn))>;  
7209     
7210   def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
7211             (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
7212   
7213   def : Pat<(v4f32 (concat_vectors
7214               (v2f32 VPR64:$src),
7215               (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
7216                 (!cast<Instruction>(prefix # "2d4s")
7217                   (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7218                   (v2f64 VPR128:$Rn))>;
7219 }
7220
7221 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
7222
7223 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
7224                                  bits<5> opcode> {
7225   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7226                           (outs VPR64:$Rd), (ins VPR128:$Rn),
7227                           asmop # "\t$Rd.2s, $Rn.2d",
7228                           [], NoItinerary>;
7229
7230   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7231                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7232                           asmop # "2\t$Rd.4s, $Rn.2d",
7233                           [], NoItinerary> {
7234     let Constraints = "$src = $Rd";
7235   }
7236   
7237   def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
7238             (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
7239
7240   def : Pat<(v4f32 (concat_vectors
7241               (v2f32 VPR64:$src),
7242               (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
7243             (!cast<Instruction>(prefix # "2d4s")
7244                (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7245                VPR128:$Rn)>;
7246 }
7247
7248 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
7249
7250 def Neon_High4Float : PatFrag<(ops node:$in),
7251                               (extract_subvector (v4f32 node:$in), (iPTR 2))>;
7252
7253 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
7254   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
7255                           (outs VPR128:$Rd), (ins VPR64:$Rn),
7256                           asmop # "\t$Rd.4s, $Rn.4h",
7257                           [], NoItinerary>;
7258
7259   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
7260                           (outs VPR128:$Rd), (ins VPR64:$Rn),
7261                           asmop # "\t$Rd.2d, $Rn.2s",
7262                           [], NoItinerary>;
7263
7264   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
7265                           (outs VPR128:$Rd), (ins VPR128:$Rn),
7266                           asmop # "2\t$Rd.4s, $Rn.8h",
7267                           [], NoItinerary>;
7268
7269   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
7270                           (outs VPR128:$Rd), (ins VPR128:$Rn),
7271                           asmop # "2\t$Rd.2d, $Rn.4s",
7272                           [], NoItinerary>;
7273 }
7274
7275 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
7276
7277 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
7278   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
7279             (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
7280   
7281   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
7282               (v4i16 (Neon_High8H
7283                 (v8i16 VPR128:$Rn))))),
7284             (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
7285   
7286   def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
7287             (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
7288   
7289   def : Pat<(v2f64 (fextend
7290               (v2f32 (Neon_High4Float
7291                 (v4f32 VPR128:$Rn))))),
7292             (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
7293 }
7294
7295 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
7296
7297 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
7298                                 ValueType ResTy4s, ValueType OpTy4s,
7299                                 ValueType ResTy2d, ValueType OpTy2d,
7300                                 ValueType ResTy2s, ValueType OpTy2s,
7301                                 SDPatternOperator Neon_Op> {
7302   
7303   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
7304                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7305                         asmop # "\t$Rd.4s, $Rn.4s",
7306                         [(set (ResTy4s VPR128:$Rd),
7307                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
7308                         NoItinerary>;
7309
7310   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
7311                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7312                         asmop # "\t$Rd.2d, $Rn.2d",
7313                         [(set (ResTy2d VPR128:$Rd),
7314                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
7315                         NoItinerary>;
7316   
7317   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
7318                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7319                         asmop # "\t$Rd.2s, $Rn.2s",
7320                         [(set (ResTy2s VPR64:$Rd),
7321                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
7322                         NoItinerary>;
7323 }
7324
7325 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
7326                                   bits<5> opcode, SDPatternOperator Neon_Op> {
7327   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
7328                                 v2f64, v2i32, v2f32, Neon_Op>;
7329 }
7330
7331 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
7332                                      int_aarch64_neon_fcvtns>;
7333 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
7334                                      int_aarch64_neon_fcvtnu>;
7335 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
7336                                      int_aarch64_neon_fcvtps>;
7337 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
7338                                      int_aarch64_neon_fcvtpu>;
7339 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
7340                                      int_aarch64_neon_fcvtms>;
7341 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
7342                                      int_aarch64_neon_fcvtmu>;
7343 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
7344 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
7345 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
7346                                      int_aarch64_neon_fcvtas>;
7347 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
7348                                      int_aarch64_neon_fcvtau>;
7349
7350 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
7351                                   bits<5> opcode, SDPatternOperator Neon_Op> {
7352   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
7353                                 v2i64, v2f32, v2i32, Neon_Op>;
7354 }
7355
7356 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
7357 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
7358
7359 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
7360                                  bits<5> opcode, SDPatternOperator Neon_Op> {
7361   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
7362                                 v2f64, v2f32, v2f32, Neon_Op>;
7363 }
7364
7365 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
7366                                      int_aarch64_neon_frintn>;
7367 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
7368 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
7369 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
7370 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
7371 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
7372 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
7373 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
7374                                     int_arm_neon_vrecpe>;
7375 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
7376                                      int_arm_neon_vrsqrte>;
7377 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111,
7378                                    int_aarch64_neon_fsqrt>;
7379
7380 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
7381                                bits<5> opcode, SDPatternOperator Neon_Op> {
7382   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
7383                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7384                         asmop # "\t$Rd.4s, $Rn.4s",
7385                         [(set (v4i32 VPR128:$Rd),
7386                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
7387                         NoItinerary>;
7388   
7389   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
7390                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7391                         asmop # "\t$Rd.2s, $Rn.2s",
7392                         [(set (v2i32 VPR64:$Rd),
7393                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
7394                         NoItinerary>;
7395 }
7396
7397 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
7398                                   int_arm_neon_vrecpe>;
7399 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
7400                                    int_arm_neon_vrsqrte>;
7401
7402 // Crypto Class
7403 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
7404                          string asmop, SDPatternOperator opnode>
7405   : NeonI_Crypto_AES<size, opcode,
7406                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7407                      asmop # "\t$Rd.16b, $Rn.16b",
7408                      [(set (v16i8 VPR128:$Rd),
7409                         (v16i8 (opnode (v16i8 VPR128:$src),
7410                                        (v16i8 VPR128:$Rn))))],
7411                      NoItinerary>{
7412   let Constraints = "$src = $Rd";
7413 }
7414
7415 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
7416 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
7417
7418 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
7419                       string asmop, SDPatternOperator opnode>
7420   : NeonI_Crypto_AES<size, opcode,
7421                      (outs VPR128:$Rd), (ins VPR128:$Rn),
7422                      asmop # "\t$Rd.16b, $Rn.16b",
7423                      [(set (v16i8 VPR128:$Rd),
7424                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
7425                      NoItinerary>;
7426
7427 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
7428 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
7429
7430 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
7431                          string asmop, SDPatternOperator opnode>
7432   : NeonI_Crypto_SHA<size, opcode,
7433                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7434                      asmop # "\t$Rd.4s, $Rn.4s",
7435                      [(set (v4i32 VPR128:$Rd),
7436                         (v4i32 (opnode (v4i32 VPR128:$src),
7437                                        (v4i32 VPR128:$Rn))))],
7438                      NoItinerary> {
7439   let Constraints = "$src = $Rd";
7440 }
7441
7442 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
7443                                  int_arm_neon_sha1su1>;
7444 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
7445                                    int_arm_neon_sha256su0>;
7446
7447 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
7448                          string asmop, SDPatternOperator opnode>
7449   : NeonI_Crypto_SHA<size, opcode,
7450                      (outs FPR32:$Rd), (ins FPR32:$Rn),
7451                      asmop # "\t$Rd, $Rn",
7452                      [(set (v1i32 FPR32:$Rd),
7453                         (v1i32 (opnode (v1i32 FPR32:$Rn))))],
7454                      NoItinerary>;
7455
7456 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
7457
7458 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
7459                            SDPatternOperator opnode>
7460   : NeonI_Crypto_3VSHA<size, opcode,
7461                        (outs VPR128:$Rd),
7462                        (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
7463                        asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
7464                        [(set (v4i32 VPR128:$Rd),
7465                           (v4i32 (opnode (v4i32 VPR128:$src),
7466                                          (v4i32 VPR128:$Rn),
7467                                          (v4i32 VPR128:$Rm))))],
7468                        NoItinerary> {
7469   let Constraints = "$src = $Rd";
7470 }
7471
7472 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
7473                                    int_arm_neon_sha1su0>;
7474 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
7475                                      int_arm_neon_sha256su1>;
7476
7477 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
7478                            SDPatternOperator opnode>
7479   : NeonI_Crypto_3VSHA<size, opcode,
7480                        (outs FPR128:$Rd),
7481                        (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
7482                        asmop # "\t$Rd, $Rn, $Rm.4s",
7483                        [(set (v4i32 FPR128:$Rd),
7484                           (v4i32 (opnode (v4i32 FPR128:$src),
7485                                          (v4i32 FPR128:$Rn),
7486                                          (v4i32 VPR128:$Rm))))],
7487                        NoItinerary> {
7488   let Constraints = "$src = $Rd";
7489 }
7490
7491 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
7492                                    int_arm_neon_sha256h>;
7493 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
7494                                     int_arm_neon_sha256h2>;
7495
7496 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
7497                            SDPatternOperator opnode>
7498   : NeonI_Crypto_3VSHA<size, opcode,
7499                        (outs FPR128:$Rd),
7500                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
7501                        asmop # "\t$Rd, $Rn, $Rm.4s",
7502                        [(set (v4i32 FPR128:$Rd),
7503                           (v4i32 (opnode (v4i32 FPR128:$src),
7504                                          (v1i32 FPR32:$Rn),
7505                                          (v4i32 VPR128:$Rm))))],
7506                        NoItinerary> {
7507   let Constraints = "$src = $Rd";
7508 }
7509
7510 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
7511 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
7512 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
7513