[AArch64 NEON]Implment loading vector constant form constant pool.
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
20
21 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
22
23 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
24
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
28
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
32
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
36
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
40
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
42                                      SDTCisVT<2, i32>]>;
43 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
45
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
47                                SDTCisSameAs<0, 2>]>;
48 def Neon_uzp1    : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2    : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1    : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2    : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1    : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2    : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
54
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
60                        [SDTCisVec<0>]>>;
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62                            [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64                            [SDTCisVec<0>,  SDTCisSameAs<0, 1>,
65                            SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
66
67 def SDT_assertext : SDTypeProfile<1, 1,
68   [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
69 def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
70 def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
71
72 //===----------------------------------------------------------------------===//
73 // Addressing-mode instantiations
74 //===----------------------------------------------------------------------===//
75
76 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
77 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
78                       !foreach(decls.pattern, Offset,
79                                !subst(OFFSET, dword_uimm12, decls.pattern)),
80                       !foreach(decls.pattern, address,
81                                !subst(OFFSET, dword_uimm12,
82                                !subst(ALIGN, min_align8, decls.pattern))),
83                       Ty>;
84 }
85
86 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
87 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
88                        !foreach(decls.pattern, Offset,
89                                 !subst(OFFSET, qword_uimm12, decls.pattern)),
90                        !foreach(decls.pattern, address,
91                                 !subst(OFFSET, qword_uimm12,
92                                 !subst(ALIGN, min_align16, decls.pattern))),
93                       Ty>;
94 }
95
96 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
97   defm : ls_64_pats<address, Base, Offset, v8i8>;
98   defm : ls_64_pats<address, Base, Offset, v4i16>;
99   defm : ls_64_pats<address, Base, Offset, v2i32>;
100   defm : ls_64_pats<address, Base, Offset, v1i64>;
101   defm : ls_64_pats<address, Base, Offset, v2f32>;
102   defm : ls_64_pats<address, Base, Offset, v1f64>;
103
104   defm : ls_128_pats<address, Base, Offset, v16i8>;
105   defm : ls_128_pats<address, Base, Offset, v8i16>;
106   defm : ls_128_pats<address, Base, Offset, v4i32>;
107   defm : ls_128_pats<address, Base, Offset, v2i64>;
108   defm : ls_128_pats<address, Base, Offset, v4f32>;
109   defm : ls_128_pats<address, Base, Offset, v2f64>;
110 }
111
112 defm : uimm12_neon_pats<(A64WrapperSmall
113                           tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
114                         (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
115
116 //===----------------------------------------------------------------------===//
117 // Multiclasses
118 //===----------------------------------------------------------------------===//
119
120 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
121                                 string asmop, SDPatternOperator opnode8B,
122                                 SDPatternOperator opnode16B,
123                                 bit Commutable = 0> {
124   let isCommutable = Commutable in {
125     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
126                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128                [(set (v8i8 VPR64:$Rd),
129                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
130                NoItinerary>;
131
132     def _16B : NeonI_3VSame<0b1, u, size, opcode,
133                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135                [(set (v16i8 VPR128:$Rd),
136                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137                NoItinerary>;
138   }
139
140 }
141
142 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
143                                   string asmop, SDPatternOperator opnode,
144                                   bit Commutable = 0> {
145   let isCommutable = Commutable in {
146     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
147               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
148               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
149               [(set (v4i16 VPR64:$Rd),
150                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
151               NoItinerary>;
152
153     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
154               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
155               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
156               [(set (v8i16 VPR128:$Rd),
157                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
158               NoItinerary>;
159
160     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
161               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
162               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
163               [(set (v2i32 VPR64:$Rd),
164                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
165               NoItinerary>;
166
167     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
168               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
169               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
170               [(set (v4i32 VPR128:$Rd),
171                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
172               NoItinerary>;
173   }
174 }
175 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
176                                   string asmop, SDPatternOperator opnode,
177                                   bit Commutable = 0>
178    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> {
179   let isCommutable = Commutable in {
180     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
181                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
182                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
183                [(set (v8i8 VPR64:$Rd),
184                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
185                NoItinerary>;
186
187     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
188                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
189                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
190                [(set (v16i8 VPR128:$Rd),
191                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
192                NoItinerary>;
193   }
194 }
195
196 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
197                                    string asmop, SDPatternOperator opnode,
198                                    bit Commutable = 0>
199    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> {
200   let isCommutable = Commutable in {
201     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
202               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
203               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
204               [(set (v2i64 VPR128:$Rd),
205                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
206               NoItinerary>;
207   }
208 }
209
210 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
211 // but Result types can be integer or floating point types.
212 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
213                                  string asmop, SDPatternOperator opnode2S,
214                                  SDPatternOperator opnode4S,
215                                  SDPatternOperator opnode2D,
216                                  ValueType ResTy2S, ValueType ResTy4S,
217                                  ValueType ResTy2D, bit Commutable = 0> {
218   let isCommutable = Commutable in {
219     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
220               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
221               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
222               [(set (ResTy2S VPR64:$Rd),
223                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
224               NoItinerary>;
225
226     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
227               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
229               [(set (ResTy4S VPR128:$Rd),
230                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
231               NoItinerary>;
232
233     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
234               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
235               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
236               [(set (ResTy2D VPR128:$Rd),
237                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
238                NoItinerary>;
239   }
240 }
241
242 //===----------------------------------------------------------------------===//
243 // Instruction Definitions
244 //===----------------------------------------------------------------------===//
245
246 // Vector Arithmetic Instructions
247
248 // Vector Add (Integer and Floating-Point)
249
250 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
251 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
252                                      v2f32, v4f32, v2f64, 1>;
253
254 // Vector Sub (Integer and Floating-Point)
255
256 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
257 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
258                                      v2f32, v4f32, v2f64, 0>;
259
260 // Vector Multiply (Integer and Floating-Point)
261
262 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
263 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
264                                      v2f32, v4f32, v2f64, 1>;
265
266 // Vector Multiply (Polynomial)
267
268 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
269                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
270
271 // Vector Multiply-accumulate and Multiply-subtract (Integer)
272
273 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
274 // two operands constraints.
275 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
276   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
277   bits<5> opcode, SDPatternOperator opnode>
278   : NeonI_3VSame<q, u, size, opcode,
279     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
280     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
281     [(set (OpTy VPRC:$Rd),
282        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
283     NoItinerary> {
284   let Constraints = "$src = $Rd";
285 }
286
287 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
288                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
289
290 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
291                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
292
293
294 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
295                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
296 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
297                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
298 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
299                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
300 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
301                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
302 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
303                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
304 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
305                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
306
307 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
308                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
309 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
310                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
311 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
312                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
313 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
314                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
315 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
316                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
317 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
318                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
319
320 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
321
322 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
323                         (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
324
325 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
326                         (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
327
328 let Predicates = [HasNEON, UseFusedMAC] in {
329 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
330                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
331 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
332                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
333 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
334                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
335
336 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
337                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
338 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
339                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
340 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
341                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
342 }
343
344 // We're also allowed to match the fma instruction regardless of compile
345 // options.
346 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
347           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
348 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
349           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
350 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
351           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
352
353 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
354           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
355 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
356           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
357 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
358           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
359
360 // Vector Divide (Floating-Point)
361
362 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
363                                      v2f32, v4f32, v2f64, 0>;
364
365 // Vector Bitwise Operations
366
367 // Vector Bitwise AND
368
369 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
370
371 // Vector Bitwise Exclusive OR
372
373 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
374
375 // Vector Bitwise OR
376
377 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
378
379 // ORR disassembled as MOV if Vn==Vm
380
381 // Vector Move - register
382 // Alias for ORR if Vn=Vm.
383 // FIXME: This is actually the preferred syntax but TableGen can't deal with
384 // custom printing of aliases.
385 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
386                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
387 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
388                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
389
390 // The MOVI instruction takes two immediate operands.  The first is the
391 // immediate encoding, while the second is the cmode.  A cmode of 14, or
392 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
393 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
394 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
395
396 def Neon_not8B  : PatFrag<(ops node:$in),
397                           (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
398 def Neon_not16B : PatFrag<(ops node:$in),
399                           (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
400
401 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
402                          (or node:$Rn, (Neon_not8B node:$Rm))>;
403
404 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
405                           (or node:$Rn, (Neon_not16B node:$Rm))>;
406
407 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
408                          (and node:$Rn, (Neon_not8B node:$Rm))>;
409
410 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
411                           (and node:$Rn, (Neon_not16B node:$Rm))>;
412
413
414 // Vector Bitwise OR NOT - register
415
416 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
417                                    Neon_orn8B, Neon_orn16B, 0>;
418
419 // Vector Bitwise Bit Clear (AND NOT) - register
420
421 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
422                                    Neon_bic8B, Neon_bic16B, 0>;
423
424 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
425                                    SDPatternOperator opnode16B,
426                                    Instruction INST8B,
427                                    Instruction INST16B> {
428   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
429             (INST8B VPR64:$Rn, VPR64:$Rm)>;
430   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
431             (INST8B VPR64:$Rn, VPR64:$Rm)>;
432   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
433             (INST8B VPR64:$Rn, VPR64:$Rm)>;
434   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
435             (INST16B VPR128:$Rn, VPR128:$Rm)>;
436   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
437             (INST16B VPR128:$Rn, VPR128:$Rm)>;
438   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
439             (INST16B VPR128:$Rn, VPR128:$Rm)>;
440 }
441
442 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
443 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
444 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
445 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
446 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
447 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
448
449 //   Vector Bitwise Select
450 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
451                                               0b0, 0b1, 0b01, 0b00011, vselect>;
452
453 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
454                                               0b1, 0b1, 0b01, 0b00011, vselect>;
455
456 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
457                                    Instruction INST8B,
458                                    Instruction INST16B> {
459   // Disassociate type from instruction definition
460   def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
461             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462   def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
463             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
464   def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
465             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466   def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
467             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468   def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
469             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470   def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
471             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472   def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
473             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474   def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
475             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476
477   // Allow to match BSL instruction pattern with non-constant operand
478   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
479                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
480           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
481   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
482                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
483           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
484   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
485                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
486           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
487   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
488                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
489           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
490   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
491                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
492           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
493   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
494                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
495           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
496   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
497                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
498           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
499   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
500                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
501           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
502
503   // Allow to match llvm.arm.* intrinsics.
504   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
505                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
506             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
507   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
508                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
509             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
511                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
512             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
513   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
514                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
515             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
517                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
518             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
519   def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
520                     (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
521             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
522   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
523                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
524             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
525   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
526                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
527             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
528   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
529                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
530             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
531   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
532                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
533             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
534   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
535                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
536             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
537   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
538                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
539             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
540 }
541
542 // Additional patterns for bitwise instruction BSL
543 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
544
545 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
546                            (vselect node:$src, node:$Rn, node:$Rm),
547                            [{ (void)N; return false; }]>;
548
549 // Vector Bitwise Insert if True
550
551 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
552                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
553 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
554                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
555
556 // Vector Bitwise Insert if False
557
558 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
559                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
560 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
561                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
562
563 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
564
565 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
566                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
567 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
568                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
569
570 // Vector Absolute Difference and Accumulate (Unsigned)
571 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
572                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
573 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
574                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
575 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
576                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
577 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
578                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
579 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
580                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
581 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
582                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
583
584 // Vector Absolute Difference and Accumulate (Signed)
585 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
586                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
587 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
588                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
589 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
590                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
591 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
592                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
593 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
594                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
595 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
596                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
597
598
599 // Vector Absolute Difference (Signed, Unsigned)
600 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
601 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
602
603 // Vector Absolute Difference (Floating Point)
604 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
605                                     int_arm_neon_vabds, int_arm_neon_vabds,
606                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
607
608 // Vector Reciprocal Step (Floating Point)
609 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
610                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
611                                        int_arm_neon_vrecps,
612                                        v2f32, v4f32, v2f64, 0>;
613
614 // Vector Reciprocal Square Root Step (Floating Point)
615 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
616                                         int_arm_neon_vrsqrts,
617                                         int_arm_neon_vrsqrts,
618                                         int_arm_neon_vrsqrts,
619                                         v2f32, v4f32, v2f64, 0>;
620
621 // Vector Comparisons
622
623 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
624                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
625 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
626                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
627 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
628                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
629 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
630                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
631 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
632                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
633
634 // NeonI_compare_aliases class: swaps register operands to implement
635 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
636 class NeonI_compare_aliases<string asmop, string asmlane,
637                             Instruction inst, RegisterOperand VPRC>
638   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
639                     ", $Rm" # asmlane,
640                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
641
642 // Vector Comparisons (Integer)
643
644 // Vector Compare Mask Equal (Integer)
645 let isCommutable =1 in {
646 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
647 }
648
649 // Vector Compare Mask Higher or Same (Unsigned Integer)
650 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
651
652 // Vector Compare Mask Greater Than or Equal (Integer)
653 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
654
655 // Vector Compare Mask Higher (Unsigned Integer)
656 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
657
658 // Vector Compare Mask Greater Than (Integer)
659 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
660
661 // Vector Compare Mask Bitwise Test (Integer)
662 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
663
664 // Vector Compare Mask Less or Same (Unsigned Integer)
665 // CMLS is alias for CMHS with operands reversed.
666 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
667 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
668 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
669 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
670 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
671 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
672 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
673
674 // Vector Compare Mask Less Than or Equal (Integer)
675 // CMLE is alias for CMGE with operands reversed.
676 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
677 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
678 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
679 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
680 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
681 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
682 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
683
684 // Vector Compare Mask Lower (Unsigned Integer)
685 // CMLO is alias for CMHI with operands reversed.
686 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
687 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
688 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
689 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
690 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
691 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
692 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
693
694 // Vector Compare Mask Less Than (Integer)
695 // CMLT is alias for CMGT with operands reversed.
696 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
697 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
698 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
699 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
700 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
701 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
702 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
703
704
705 def neon_uimm0_asmoperand : AsmOperandClass
706 {
707   let Name = "UImm0";
708   let PredicateMethod = "isUImm<0>";
709   let RenderMethod = "addImmOperands";
710 }
711
712 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
713   let ParserMatchClass = neon_uimm0_asmoperand;
714   let PrintMethod = "printNeonUImm0Operand";
715
716 }
717
718 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
719 {
720   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
721              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
722              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
723              [(set (v8i8 VPR64:$Rd),
724                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
725              NoItinerary>;
726
727   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
728              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
729              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
730              [(set (v16i8 VPR128:$Rd),
731                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
732              NoItinerary>;
733
734   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
735             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
736             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
737             [(set (v4i16 VPR64:$Rd),
738                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
739             NoItinerary>;
740
741   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
742             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
743             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
744             [(set (v8i16 VPR128:$Rd),
745                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
746             NoItinerary>;
747
748   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
749             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
750             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
751             [(set (v2i32 VPR64:$Rd),
752                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
753             NoItinerary>;
754
755   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
756             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
757             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
758             [(set (v4i32 VPR128:$Rd),
759                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
760             NoItinerary>;
761
762   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
763             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
764             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
765             [(set (v2i64 VPR128:$Rd),
766                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
767             NoItinerary>;
768 }
769
770 // Vector Compare Mask Equal to Zero (Integer)
771 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
772
773 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
774 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
775
776 // Vector Compare Mask Greater Than Zero (Signed Integer)
777 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
778
779 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
780 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
781
782 // Vector Compare Mask Less Than Zero (Signed Integer)
783 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
784
785 // Vector Comparisons (Floating Point)
786
787 // Vector Compare Mask Equal (Floating Point)
788 let isCommutable =1 in {
789 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
790                                       Neon_cmeq, Neon_cmeq,
791                                       v2i32, v4i32, v2i64, 0>;
792 }
793
794 // Vector Compare Mask Greater Than Or Equal (Floating Point)
795 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
796                                       Neon_cmge, Neon_cmge,
797                                       v2i32, v4i32, v2i64, 0>;
798
799 // Vector Compare Mask Greater Than (Floating Point)
800 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
801                                       Neon_cmgt, Neon_cmgt,
802                                       v2i32, v4i32, v2i64, 0>;
803
804 // Vector Compare Mask Less Than Or Equal (Floating Point)
805 // FCMLE is alias for FCMGE with operands reversed.
806 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
807 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
808 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
809
810 // Vector Compare Mask Less Than (Floating Point)
811 // FCMLT is alias for FCMGT with operands reversed.
812 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
813 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
814 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
815
816
817 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
818                               string asmop, CondCode CC>
819 {
820   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
821             (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
822             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
823             [(set (v2i32 VPR64:$Rd),
824                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpz32:$FPImm), CC)))],
825             NoItinerary>;
826
827   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
828             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
829             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
830             [(set (v4i32 VPR128:$Rd),
831                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
832             NoItinerary>;
833
834   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
835             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
836             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
837             [(set (v2i64 VPR128:$Rd),
838                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
839             NoItinerary>;
840 }
841
842 // Vector Compare Mask Equal to Zero (Floating Point)
843 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
844
845 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
846 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
847
848 // Vector Compare Mask Greater Than Zero (Floating Point)
849 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
850
851 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
852 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
853
854 // Vector Compare Mask Less Than Zero (Floating Point)
855 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
856
857 // Vector Absolute Comparisons (Floating Point)
858
859 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
860 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
861                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
862                                       int_aarch64_neon_vacgeq,
863                                       v2i32, v4i32, v2i64, 0>;
864
865 // Vector Absolute Compare Mask Greater Than (Floating Point)
866 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
867                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
868                                       int_aarch64_neon_vacgtq,
869                                       v2i32, v4i32, v2i64, 0>;
870
871 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
872 // FACLE is alias for FACGE with operands reversed.
873 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
874 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
875 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
876
877 // Vector Absolute Compare Mask Less Than (Floating Point)
878 // FACLT is alias for FACGT with operands reversed.
879 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
880 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
881 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
882
883 // Vector halving add (Integer Signed, Unsigned)
884 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
885                                         int_arm_neon_vhadds, 1>;
886 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
887                                         int_arm_neon_vhaddu, 1>;
888
889 // Vector halving sub (Integer Signed, Unsigned)
890 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
891                                         int_arm_neon_vhsubs, 0>;
892 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
893                                         int_arm_neon_vhsubu, 0>;
894
895 // Vector rouding halving add (Integer Signed, Unsigned)
896 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
897                                          int_arm_neon_vrhadds, 1>;
898 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
899                                          int_arm_neon_vrhaddu, 1>;
900
901 // Vector Saturating add (Integer Signed, Unsigned)
902 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
903                    int_arm_neon_vqadds, 1>;
904 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
905                    int_arm_neon_vqaddu, 1>;
906
907 // Vector Saturating sub (Integer Signed, Unsigned)
908 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
909                    int_arm_neon_vqsubs, 1>;
910 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
911                    int_arm_neon_vqsubu, 1>;
912
913 // Vector Shift Left (Signed and Unsigned Integer)
914 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
915                  int_arm_neon_vshifts, 1>;
916 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
917                  int_arm_neon_vshiftu, 1>;
918
919 // Vector Saturating Shift Left (Signed and Unsigned Integer)
920 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
921                   int_arm_neon_vqshifts, 1>;
922 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
923                   int_arm_neon_vqshiftu, 1>;
924
925 // Vector Rouding Shift Left (Signed and Unsigned Integer)
926 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
927                   int_arm_neon_vrshifts, 1>;
928 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
929                   int_arm_neon_vrshiftu, 1>;
930
931 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
932 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
933                    int_arm_neon_vqrshifts, 1>;
934 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
935                    int_arm_neon_vqrshiftu, 1>;
936
937 // Vector Maximum (Signed and Unsigned Integer)
938 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
939 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
940
941 // Vector Minimum (Signed and Unsigned Integer)
942 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
943 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
944
945 // Vector Maximum (Floating Point)
946 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
947                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
948                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
949
950 // Vector Minimum (Floating Point)
951 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
952                                      int_arm_neon_vmins, int_arm_neon_vmins,
953                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
954
955 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
956 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
957                                        int_aarch64_neon_vmaxnm,
958                                        int_aarch64_neon_vmaxnm,
959                                        int_aarch64_neon_vmaxnm,
960                                        v2f32, v4f32, v2f64, 1>;
961
962 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
963 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
964                                        int_aarch64_neon_vminnm,
965                                        int_aarch64_neon_vminnm,
966                                        int_aarch64_neon_vminnm,
967                                        v2f32, v4f32, v2f64, 1>;
968
969 // Vector Maximum Pairwise (Signed and Unsigned Integer)
970 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
971 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
972
973 // Vector Minimum Pairwise (Signed and Unsigned Integer)
974 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
975 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
976
977 // Vector Maximum Pairwise (Floating Point)
978 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
979                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
980                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
981
982 // Vector Minimum Pairwise (Floating Point)
983 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
984                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
985                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
986
987 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
988 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
989                                        int_aarch64_neon_vpmaxnm,
990                                        int_aarch64_neon_vpmaxnm,
991                                        int_aarch64_neon_vpmaxnm,
992                                        v2f32, v4f32, v2f64, 1>;
993
994 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
995 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
996                                        int_aarch64_neon_vpminnm,
997                                        int_aarch64_neon_vpminnm,
998                                        int_aarch64_neon_vpminnm,
999                                        v2f32, v4f32, v2f64, 1>;
1000
1001 // Vector Addition Pairwise (Integer)
1002 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1003
1004 // Vector Addition Pairwise (Floating Point)
1005 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1006                                        int_arm_neon_vpadd,
1007                                        int_arm_neon_vpadd,
1008                                        int_arm_neon_vpadd,
1009                                        v2f32, v4f32, v2f64, 1>;
1010
1011 // Vector Saturating Doubling Multiply High
1012 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1013                     int_arm_neon_vqdmulh, 1>;
1014
1015 // Vector Saturating Rouding Doubling Multiply High
1016 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1017                      int_arm_neon_vqrdmulh, 1>;
1018
1019 // Vector Multiply Extended (Floating Point)
1020 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1021                                       int_aarch64_neon_vmulx,
1022                                       int_aarch64_neon_vmulx,
1023                                       int_aarch64_neon_vmulx,
1024                                       v2f32, v4f32, v2f64, 1>;
1025
1026 // Patterns to match llvm.aarch64.* intrinsic for 
1027 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1028 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1029   : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1030         (EXTRACT_SUBREG
1031              (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1032              sub_32)>;
1033
1034 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1035 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1036 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1037 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1038 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1039
1040 // Vector Immediate Instructions
1041
1042 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1043 {
1044   def _asmoperand : AsmOperandClass
1045     {
1046       let Name = "NeonMovImmShift" # PREFIX;
1047       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1048       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1049     }
1050 }
1051
1052 // Definition of vector immediates shift operands
1053
1054 // The selectable use-cases extract the shift operation
1055 // information from the OpCmode fields encoded in the immediate.
1056 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1057   uint64_t OpCmode = N->getZExtValue();
1058   unsigned ShiftImm;
1059   unsigned ShiftOnesIn;
1060   unsigned HasShift =
1061     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1062   if (!HasShift) return SDValue();
1063   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1064 }]>;
1065
1066 // Vector immediates shift operands which accept LSL and MSL
1067 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1068 // or 0, 8 (LSLH) or 8, 16 (MSL).
1069 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1070 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1071 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
1072 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1073
1074 multiclass neon_mov_imm_shift_operands<string PREFIX,
1075                                        string HALF, string ISHALF, code pred>
1076 {
1077    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1078     {
1079       let PrintMethod =
1080         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1081       let DecoderMethod =
1082         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1083       let ParserMatchClass =
1084         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1085     }
1086 }
1087
1088 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1089   unsigned ShiftImm;
1090   unsigned ShiftOnesIn;
1091   unsigned HasShift =
1092     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1093   return (HasShift && !ShiftOnesIn);
1094 }]>;
1095
1096 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1097   unsigned ShiftImm;
1098   unsigned ShiftOnesIn;
1099   unsigned HasShift =
1100     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1101   return (HasShift && ShiftOnesIn);
1102 }]>;
1103
1104 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1105   unsigned ShiftImm;
1106   unsigned ShiftOnesIn;
1107   unsigned HasShift =
1108     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1109   return (HasShift && !ShiftOnesIn);
1110 }]>;
1111
1112 def neon_uimm1_asmoperand : AsmOperandClass
1113 {
1114   let Name = "UImm1";
1115   let PredicateMethod = "isUImm<1>";
1116   let RenderMethod = "addImmOperands";
1117 }
1118
1119 def neon_uimm2_asmoperand : AsmOperandClass
1120 {
1121   let Name = "UImm2";
1122   let PredicateMethod = "isUImm<2>";
1123   let RenderMethod = "addImmOperands";
1124 }
1125
1126 def neon_uimm8_asmoperand : AsmOperandClass
1127 {
1128   let Name = "UImm8";
1129   let PredicateMethod = "isUImm<8>";
1130   let RenderMethod = "addImmOperands";
1131 }
1132
1133 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1134   let ParserMatchClass = neon_uimm8_asmoperand;
1135   let PrintMethod = "printUImmHexOperand";
1136 }
1137
1138 def neon_uimm64_mask_asmoperand : AsmOperandClass
1139 {
1140   let Name = "NeonUImm64Mask";
1141   let PredicateMethod = "isNeonUImm64Mask";
1142   let RenderMethod = "addNeonUImm64MaskOperands";
1143 }
1144
1145 // MCOperand for 64-bit bytemask with each byte having only the
1146 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1147 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1148   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1149   let PrintMethod = "printNeonUImm64MaskOperand";
1150 }
1151
1152 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1153                                    SDPatternOperator opnode>
1154 {
1155     // shift zeros, per word
1156     def _2S  : NeonI_1VModImm<0b0, op,
1157                               (outs VPR64:$Rd),
1158                               (ins neon_uimm8:$Imm,
1159                                 neon_mov_imm_LSL_operand:$Simm),
1160                               !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1161                               [(set (v2i32 VPR64:$Rd),
1162                                  (v2i32 (opnode (timm:$Imm),
1163                                    (neon_mov_imm_LSL_operand:$Simm))))],
1164                               NoItinerary> {
1165        bits<2> Simm;
1166        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1167      }
1168
1169     def _4S  : NeonI_1VModImm<0b1, op,
1170                               (outs VPR128:$Rd),
1171                               (ins neon_uimm8:$Imm,
1172                                 neon_mov_imm_LSL_operand:$Simm),
1173                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1174                               [(set (v4i32 VPR128:$Rd),
1175                                  (v4i32 (opnode (timm:$Imm),
1176                                    (neon_mov_imm_LSL_operand:$Simm))))],
1177                               NoItinerary> {
1178       bits<2> Simm;
1179       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1180     }
1181
1182     // shift zeros, per halfword
1183     def _4H  : NeonI_1VModImm<0b0, op,
1184                               (outs VPR64:$Rd),
1185                               (ins neon_uimm8:$Imm,
1186                                 neon_mov_imm_LSLH_operand:$Simm),
1187                               !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1188                               [(set (v4i16 VPR64:$Rd),
1189                                  (v4i16 (opnode (timm:$Imm),
1190                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1191                               NoItinerary> {
1192       bit  Simm;
1193       let cmode = {0b1, 0b0, Simm, 0b0};
1194     }
1195
1196     def _8H  : NeonI_1VModImm<0b1, op,
1197                               (outs VPR128:$Rd),
1198                               (ins neon_uimm8:$Imm,
1199                                 neon_mov_imm_LSLH_operand:$Simm),
1200                               !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1201                               [(set (v8i16 VPR128:$Rd),
1202                                  (v8i16 (opnode (timm:$Imm),
1203                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1204                               NoItinerary> {
1205       bit Simm;
1206       let cmode = {0b1, 0b0, Simm, 0b0};
1207      }
1208 }
1209
1210 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1211                                                    SDPatternOperator opnode,
1212                                                    SDPatternOperator neonopnode>
1213 {
1214   let Constraints = "$src = $Rd" in {
1215     // shift zeros, per word
1216     def _2S  : NeonI_1VModImm<0b0, op,
1217                  (outs VPR64:$Rd),
1218                  (ins VPR64:$src, neon_uimm8:$Imm,
1219                    neon_mov_imm_LSL_operand:$Simm),
1220                  !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1221                  [(set (v2i32 VPR64:$Rd),
1222                     (v2i32 (opnode (v2i32 VPR64:$src),
1223                       (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1224                         neon_mov_imm_LSL_operand:$Simm)))))))],
1225                  NoItinerary> {
1226       bits<2> Simm;
1227       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1228     }
1229
1230     def _4S  : NeonI_1VModImm<0b1, op,
1231                  (outs VPR128:$Rd),
1232                  (ins VPR128:$src, neon_uimm8:$Imm,
1233                    neon_mov_imm_LSL_operand:$Simm),
1234                  !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1235                  [(set (v4i32 VPR128:$Rd),
1236                     (v4i32 (opnode (v4i32 VPR128:$src),
1237                       (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1238                         neon_mov_imm_LSL_operand:$Simm)))))))],
1239                  NoItinerary> {
1240       bits<2> Simm;
1241       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1242     }
1243
1244     // shift zeros, per halfword
1245     def _4H  : NeonI_1VModImm<0b0, op,
1246                  (outs VPR64:$Rd),
1247                  (ins VPR64:$src, neon_uimm8:$Imm,
1248                    neon_mov_imm_LSLH_operand:$Simm),
1249                  !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1250                  [(set (v4i16 VPR64:$Rd),
1251                     (v4i16 (opnode (v4i16 VPR64:$src),
1252                        (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1253                           neon_mov_imm_LSL_operand:$Simm)))))))],
1254                  NoItinerary> {
1255       bit  Simm;
1256       let cmode = {0b1, 0b0, Simm, 0b1};
1257     }
1258
1259     def _8H  : NeonI_1VModImm<0b1, op,
1260                  (outs VPR128:$Rd),
1261                  (ins VPR128:$src, neon_uimm8:$Imm,
1262                    neon_mov_imm_LSLH_operand:$Simm),
1263                  !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1264                  [(set (v8i16 VPR128:$Rd),
1265                     (v8i16 (opnode (v8i16 VPR128:$src),
1266                       (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1267                         neon_mov_imm_LSL_operand:$Simm)))))))],
1268                  NoItinerary> {
1269       bit Simm;
1270       let cmode = {0b1, 0b0, Simm, 0b1};
1271     }
1272   }
1273 }
1274
1275 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1276                                    SDPatternOperator opnode>
1277 {
1278     // shift ones, per word
1279     def _2S  : NeonI_1VModImm<0b0, op,
1280                              (outs VPR64:$Rd),
1281                              (ins neon_uimm8:$Imm,
1282                                neon_mov_imm_MSL_operand:$Simm),
1283                              !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1284                               [(set (v2i32 VPR64:$Rd),
1285                                  (v2i32 (opnode (timm:$Imm),
1286                                    (neon_mov_imm_MSL_operand:$Simm))))],
1287                              NoItinerary> {
1288        bit Simm;
1289        let cmode = {0b1, 0b1, 0b0, Simm};
1290      }
1291
1292    def _4S  : NeonI_1VModImm<0b1, op,
1293                               (outs VPR128:$Rd),
1294                               (ins neon_uimm8:$Imm,
1295                                 neon_mov_imm_MSL_operand:$Simm),
1296                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1297                               [(set (v4i32 VPR128:$Rd),
1298                                  (v4i32 (opnode (timm:$Imm),
1299                                    (neon_mov_imm_MSL_operand:$Simm))))],
1300                               NoItinerary> {
1301      bit Simm;
1302      let cmode = {0b1, 0b1, 0b0, Simm};
1303    }
1304 }
1305
1306 // Vector Move Immediate Shifted
1307 let isReMaterializable = 1 in {
1308 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1309 }
1310
1311 // Vector Move Inverted Immediate Shifted
1312 let isReMaterializable = 1 in {
1313 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1314 }
1315
1316 // Vector Bitwise Bit Clear (AND NOT) - immediate
1317 let isReMaterializable = 1 in {
1318 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1319                                                          and, Neon_mvni>;
1320 }
1321
1322 // Vector Bitwise OR - immedidate
1323
1324 let isReMaterializable = 1 in {
1325 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1326                                                            or, Neon_movi>;
1327 }
1328
1329 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1330 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1331 // BIC immediate instructions selection requires additional patterns to
1332 // transform Neon_movi operands into BIC immediate operands
1333
1334 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1335   uint64_t OpCmode = N->getZExtValue();
1336   unsigned ShiftImm;
1337   unsigned ShiftOnesIn;
1338   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1339   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1340   // Transform encoded shift amount 0 to 1 and 1 to 0.
1341   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1342 }]>;
1343
1344 def neon_mov_imm_LSLH_transform_operand
1345   : ImmLeaf<i32, [{
1346     unsigned ShiftImm;
1347     unsigned ShiftOnesIn;
1348     unsigned HasShift =
1349       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1350     return (HasShift && !ShiftOnesIn); }],
1351   neon_mov_imm_LSLH_transform_XFORM>;
1352
1353 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1354 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1355 def : Pat<(v4i16 (and VPR64:$src,
1356             (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1357           (BICvi_lsl_4H VPR64:$src, 0,
1358             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1359
1360 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1361 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1362 def : Pat<(v8i16 (and VPR128:$src,
1363             (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1364           (BICvi_lsl_8H VPR128:$src, 0,
1365             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1366
1367
1368 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1369                                    SDPatternOperator neonopnode,
1370                                    Instruction INST4H,
1371                                    Instruction INST8H> {
1372   def : Pat<(v8i8 (opnode VPR64:$src,
1373                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1374                       neon_mov_imm_LSLH_operand:$Simm))))),
1375             (INST4H VPR64:$src, neon_uimm8:$Imm,
1376               neon_mov_imm_LSLH_operand:$Simm)>;
1377   def : Pat<(v1i64 (opnode VPR64:$src,
1378                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1379                     neon_mov_imm_LSLH_operand:$Simm))))),
1380           (INST4H VPR64:$src, neon_uimm8:$Imm,
1381             neon_mov_imm_LSLH_operand:$Simm)>;
1382
1383   def : Pat<(v16i8 (opnode VPR128:$src,
1384                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1385                      neon_mov_imm_LSLH_operand:$Simm))))),
1386           (INST8H VPR128:$src, neon_uimm8:$Imm,
1387             neon_mov_imm_LSLH_operand:$Simm)>;
1388   def : Pat<(v4i32 (opnode VPR128:$src,
1389                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1390                      neon_mov_imm_LSLH_operand:$Simm))))),
1391           (INST8H VPR128:$src, neon_uimm8:$Imm,
1392             neon_mov_imm_LSLH_operand:$Simm)>;
1393   def : Pat<(v2i64 (opnode VPR128:$src,
1394                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1395                      neon_mov_imm_LSLH_operand:$Simm))))),
1396           (INST8H VPR128:$src, neon_uimm8:$Imm,
1397             neon_mov_imm_LSLH_operand:$Simm)>;
1398 }
1399
1400 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1401 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1402
1403 // Additional patterns for Vector Bitwise OR - immedidate
1404 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1405
1406
1407 // Vector Move Immediate Masked
1408 let isReMaterializable = 1 in {
1409 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1410 }
1411
1412 // Vector Move Inverted Immediate Masked
1413 let isReMaterializable = 1 in {
1414 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1415 }
1416
1417 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1418                                 Instruction inst, RegisterOperand VPRC>
1419   : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1420                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1421
1422 // Aliases for Vector Move Immediate Shifted
1423 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1424 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1425 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1426 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1427
1428 // Aliases for Vector Move Inverted Immediate Shifted
1429 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1430 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1431 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1432 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1433
1434 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1435 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1436 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1437 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1438 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1439
1440 // Aliases for Vector Bitwise OR - immedidate
1441 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1442 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1443 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1444 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1445
1446 //  Vector Move Immediate - per byte
1447 let isReMaterializable = 1 in {
1448 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1449                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1450                                "movi\t$Rd.8b, $Imm",
1451                                [(set (v8i8 VPR64:$Rd),
1452                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1453                                 NoItinerary> {
1454   let cmode = 0b1110;
1455 }
1456
1457 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1458                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1459                                 "movi\t$Rd.16b, $Imm",
1460                                 [(set (v16i8 VPR128:$Rd),
1461                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1462                                  NoItinerary> {
1463   let cmode = 0b1110;
1464 }
1465 }
1466
1467 // Vector Move Immediate - bytemask, per double word
1468 let isReMaterializable = 1 in {
1469 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1470                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1471                                "movi\t $Rd.2d, $Imm",
1472                                [(set (v2i64 VPR128:$Rd),
1473                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1474                                NoItinerary> {
1475   let cmode = 0b1110;
1476 }
1477 }
1478
1479 // Vector Move Immediate - bytemask, one doubleword
1480
1481 let isReMaterializable = 1 in {
1482 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1483                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1484                            "movi\t $Rd, $Imm",
1485                            [(set (v1i64 FPR64:$Rd),
1486                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1487                            NoItinerary> {
1488   let cmode = 0b1110;
1489 }
1490 }
1491
1492 // Vector Floating Point Move Immediate
1493
1494 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1495                       Operand immOpType, bit q, bit op>
1496   : NeonI_1VModImm<q, op,
1497                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1498                    "fmov\t$Rd" # asmlane # ", $Imm",
1499                    [(set (OpTy VPRC:$Rd),
1500                       (OpTy (Neon_fmovi (timm:$Imm))))],
1501                    NoItinerary> {
1502      let cmode = 0b1111;
1503    }
1504
1505 let isReMaterializable = 1 in {
1506 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1507 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1508 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1509 }
1510
1511 // Vector Shift (Immediate)
1512 // Immediate in [0, 63]
1513 def imm0_63 : Operand<i32> {
1514   let ParserMatchClass = uimm6_asmoperand;
1515 }
1516
1517 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1518 // as follows:
1519 //
1520 //    Offset    Encoding
1521 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1522 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1523 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1524 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1525 //
1526 // The shift right immediate amount, in the range 1 to element bits, is computed
1527 // as Offset - UInt(immh:immb).  The shift left immediate amount, in the range 0
1528 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1529
1530 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1531   let Name = "ShrImm" # OFFSET;
1532   let RenderMethod = "addImmOperands";
1533   let DiagnosticType = "ShrImm" # OFFSET;
1534 }
1535
1536 class shr_imm<string OFFSET> : Operand<i32> {
1537   let EncoderMethod = "getShiftRightImm" # OFFSET;
1538   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1539   let ParserMatchClass =
1540     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1541 }
1542
1543 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1544 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1545 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1546 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1547
1548 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1549 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1550 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1551 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1552
1553 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1554   let Name = "ShlImm" # OFFSET;
1555   let RenderMethod = "addImmOperands";
1556   let DiagnosticType = "ShlImm" # OFFSET;
1557 }
1558
1559 class shl_imm<string OFFSET> : Operand<i32> {
1560   let EncoderMethod = "getShiftLeftImm" # OFFSET;
1561   let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1562   let ParserMatchClass =
1563     !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1564 }
1565
1566 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1567 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1568 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1569 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1570
1571 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1572 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1573 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1574 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1575
1576 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1577                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1578   : NeonI_2VShiftImm<q, u, opcode,
1579                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1580                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1581                      [(set (Ty VPRC:$Rd),
1582                         (Ty (OpNode (Ty VPRC:$Rn),
1583                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1584                      NoItinerary>;
1585
1586 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1587   // 64-bit vector types.
1588   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1589     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1590   }
1591
1592   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1593     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1594   }
1595
1596   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1597     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1598   }
1599
1600   // 128-bit vector types.
1601   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1602     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1603   }
1604
1605   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1606     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1607   }
1608
1609   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1610     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1611   }
1612
1613   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1614     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1615   }
1616 }
1617
1618 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1619   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1620                      OpNode> {
1621     let Inst{22-19} = 0b0001;
1622   }
1623
1624   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1625                      OpNode> {
1626     let Inst{22-20} = 0b001;
1627   }
1628
1629   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1630                      OpNode> {
1631      let Inst{22-21} = 0b01;
1632   }
1633
1634   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1635                       OpNode> {
1636                       let Inst{22-19} = 0b0001;
1637                     }
1638
1639   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1640                      OpNode> {
1641                      let Inst{22-20} = 0b001;
1642                     }
1643
1644   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1645                      OpNode> {
1646                       let Inst{22-21} = 0b01;
1647                     }
1648
1649   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1650                      OpNode> {
1651                       let Inst{22} = 0b1;
1652                     }
1653 }
1654
1655 // Shift left
1656 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1657
1658 // Shift right
1659 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1660 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1661
1662 def Neon_High16B : PatFrag<(ops node:$in),
1663                            (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1664 def Neon_High8H  : PatFrag<(ops node:$in),
1665                            (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1666 def Neon_High4S  : PatFrag<(ops node:$in),
1667                            (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1668 def Neon_High2D  : PatFrag<(ops node:$in),
1669                            (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1670 def Neon_High4float : PatFrag<(ops node:$in),
1671                                (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1672 def Neon_High2double : PatFrag<(ops node:$in),
1673                                (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1674
1675 def Neon_Low16B : PatFrag<(ops node:$in),
1676                           (v8i8 (extract_subvector (v16i8 node:$in),
1677                                                    (iPTR 0)))>;
1678 def Neon_Low8H : PatFrag<(ops node:$in),
1679                          (v4i16 (extract_subvector (v8i16 node:$in),
1680                                                    (iPTR 0)))>;
1681 def Neon_Low4S : PatFrag<(ops node:$in),
1682                          (v2i32 (extract_subvector (v4i32 node:$in),
1683                                                    (iPTR 0)))>;
1684 def Neon_Low2D : PatFrag<(ops node:$in),
1685                          (v1i64 (extract_subvector (v2i64 node:$in),
1686                                                    (iPTR 0)))>;
1687 def Neon_Low4float : PatFrag<(ops node:$in),
1688                              (v2f32 (extract_subvector (v4f32 node:$in),
1689                                                        (iPTR 0)))>;
1690 def Neon_Low2double : PatFrag<(ops node:$in),
1691                               (v1f64 (extract_subvector (v2f64 node:$in),
1692                                                         (iPTR 0)))>;
1693
1694 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1695                    string SrcT, ValueType DestTy, ValueType SrcTy,
1696                    Operand ImmTy, SDPatternOperator ExtOp>
1697   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1698                      (ins VPR64:$Rn, ImmTy:$Imm),
1699                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1700                      [(set (DestTy VPR128:$Rd),
1701                         (DestTy (shl
1702                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1703                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1704                      NoItinerary>;
1705
1706 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1707                        string SrcT, ValueType DestTy, ValueType SrcTy,
1708                        int StartIndex, Operand ImmTy,
1709                        SDPatternOperator ExtOp, PatFrag getTop>
1710   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1711                      (ins VPR128:$Rn, ImmTy:$Imm),
1712                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1713                      [(set (DestTy VPR128:$Rd),
1714                         (DestTy (shl
1715                           (DestTy (ExtOp
1716                             (SrcTy (getTop VPR128:$Rn)))),
1717                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1718                      NoItinerary>;
1719
1720 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1721                          SDNode ExtOp> {
1722   // 64-bit vector types.
1723   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1724                          shl_imm8, ExtOp> {
1725     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1726   }
1727
1728   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1729                          shl_imm16, ExtOp> {
1730     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1731   }
1732
1733   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1734                          shl_imm32, ExtOp> {
1735     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1736   }
1737
1738   // 128-bit vector types
1739   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1740                               8, shl_imm8, ExtOp, Neon_High16B> {
1741     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1742   }
1743
1744   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1745                              4, shl_imm16, ExtOp, Neon_High8H> {
1746     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1747   }
1748
1749   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1750                              2, shl_imm32, ExtOp, Neon_High4S> {
1751     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1752   }
1753
1754   // Use other patterns to match when the immediate is 0.
1755   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1756             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1757
1758   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1759             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1760
1761   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1762             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1763
1764   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1765             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1766
1767   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1768             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1769
1770   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1771             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1772 }
1773
1774 // Shift left long
1775 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1776 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1777
1778 // Rounding/Saturating shift
1779 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1780                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1781                   SDPatternOperator OpNode>
1782   : NeonI_2VShiftImm<q, u, opcode,
1783                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1784                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1785                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1786                         (i32 ImmTy:$Imm))))],
1787                      NoItinerary>;
1788
1789 // shift right (vector by immediate)
1790 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1791                            SDPatternOperator OpNode> {
1792   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1793                          OpNode> {
1794     let Inst{22-19} = 0b0001;
1795   }
1796
1797   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1798                          OpNode> {
1799     let Inst{22-20} = 0b001;
1800   }
1801
1802   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1803                          OpNode> {
1804     let Inst{22-21} = 0b01;
1805   }
1806
1807   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1808                          OpNode> {
1809     let Inst{22-19} = 0b0001;
1810   }
1811
1812   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1813                         OpNode> {
1814     let Inst{22-20} = 0b001;
1815   }
1816
1817   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1818                         OpNode> {
1819     let Inst{22-21} = 0b01;
1820   }
1821
1822   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1823                         OpNode> {
1824     let Inst{22} = 0b1;
1825   }
1826 }
1827
1828 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1829                           SDPatternOperator OpNode> {
1830   // 64-bit vector types.
1831   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
1832                         OpNode> {
1833     let Inst{22-19} = 0b0001;
1834   }
1835
1836   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
1837                         OpNode> {
1838     let Inst{22-20} = 0b001;
1839   }
1840
1841   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
1842                         OpNode> {
1843     let Inst{22-21} = 0b01;
1844   }
1845
1846   // 128-bit vector types.
1847   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
1848                          OpNode> {
1849     let Inst{22-19} = 0b0001;
1850   }
1851
1852   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
1853                         OpNode> {
1854     let Inst{22-20} = 0b001;
1855   }
1856
1857   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
1858                         OpNode> {
1859     let Inst{22-21} = 0b01;
1860   }
1861
1862   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
1863                         OpNode> {
1864     let Inst{22} = 0b1;
1865   }
1866 }
1867
1868 // Rounding shift right
1869 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1870                                 int_aarch64_neon_vsrshr>;
1871 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1872                                 int_aarch64_neon_vurshr>;
1873
1874 // Saturating shift left unsigned
1875 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1876
1877 // Saturating shift left
1878 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1879 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1880
1881 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1882                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1883                   SDNode OpNode>
1884   : NeonI_2VShiftImm<q, u, opcode,
1885            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1886            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1887            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1888               (Ty (OpNode (Ty VPRC:$Rn),
1889                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
1890            NoItinerary> {
1891   let Constraints = "$src = $Rd";
1892 }
1893
1894 // Shift Right accumulate
1895 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1896   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1897                         OpNode> {
1898     let Inst{22-19} = 0b0001;
1899   }
1900
1901   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1902                         OpNode> {
1903     let Inst{22-20} = 0b001;
1904   }
1905
1906   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1907                         OpNode> {
1908     let Inst{22-21} = 0b01;
1909   }
1910
1911   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1912                          OpNode> {
1913     let Inst{22-19} = 0b0001;
1914   }
1915
1916   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1917                         OpNode> {
1918     let Inst{22-20} = 0b001;
1919   }
1920
1921   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1922                         OpNode> {
1923     let Inst{22-21} = 0b01;
1924   }
1925
1926   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1927                         OpNode> {
1928     let Inst{22} = 0b1;
1929   }
1930 }
1931
1932 // Shift right and accumulate
1933 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1934 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1935
1936 // Rounding shift accumulate
1937 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1938                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1939                     SDPatternOperator OpNode>
1940   : NeonI_2VShiftImm<q, u, opcode,
1941                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1942                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1943                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1944                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
1945                      NoItinerary> {
1946   let Constraints = "$src = $Rd";
1947 }
1948
1949 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1950                              SDPatternOperator OpNode> {
1951   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1952                           OpNode> {
1953     let Inst{22-19} = 0b0001;
1954   }
1955
1956   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1957                           OpNode> {
1958     let Inst{22-20} = 0b001;
1959   }
1960
1961   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1962                           OpNode> {
1963     let Inst{22-21} = 0b01;
1964   }
1965
1966   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1967                            OpNode> {
1968     let Inst{22-19} = 0b0001;
1969   }
1970
1971   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1972                           OpNode> {
1973     let Inst{22-20} = 0b001;
1974   }
1975
1976   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1977                           OpNode> {
1978     let Inst{22-21} = 0b01;
1979   }
1980
1981   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1982                           OpNode> {
1983     let Inst{22} = 0b1;
1984   }
1985 }
1986
1987 // Rounding shift right and accumulate
1988 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1989 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1990
1991 // Shift insert by immediate
1992 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1993                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1994                   SDPatternOperator OpNode>
1995     : NeonI_2VShiftImm<q, u, opcode,
1996            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1997            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1998            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1999              (i32 ImmTy:$Imm))))],
2000            NoItinerary> {
2001   let Constraints = "$src = $Rd";
2002 }
2003
2004 // shift left insert (vector by immediate)
2005 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2006   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2007                         int_aarch64_neon_vsli> {
2008     let Inst{22-19} = 0b0001;
2009   }
2010
2011   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2012                         int_aarch64_neon_vsli> {
2013     let Inst{22-20} = 0b001;
2014   }
2015
2016   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2017                         int_aarch64_neon_vsli> {
2018     let Inst{22-21} = 0b01;
2019   }
2020
2021     // 128-bit vector types
2022   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2023                          int_aarch64_neon_vsli> {
2024     let Inst{22-19} = 0b0001;
2025   }
2026
2027   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2028                         int_aarch64_neon_vsli> {
2029     let Inst{22-20} = 0b001;
2030   }
2031
2032   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2033                         int_aarch64_neon_vsli> {
2034     let Inst{22-21} = 0b01;
2035   }
2036
2037   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2038                         int_aarch64_neon_vsli> {
2039     let Inst{22} = 0b1;
2040   }
2041 }
2042
2043 // shift right insert (vector by immediate)
2044 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2045     // 64-bit vector types.
2046   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2047                         int_aarch64_neon_vsri> {
2048     let Inst{22-19} = 0b0001;
2049   }
2050
2051   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2052                         int_aarch64_neon_vsri> {
2053     let Inst{22-20} = 0b001;
2054   }
2055
2056   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2057                         int_aarch64_neon_vsri> {
2058     let Inst{22-21} = 0b01;
2059   }
2060
2061     // 128-bit vector types
2062   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2063                          int_aarch64_neon_vsri> {
2064     let Inst{22-19} = 0b0001;
2065   }
2066
2067   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2068                         int_aarch64_neon_vsri> {
2069     let Inst{22-20} = 0b001;
2070   }
2071
2072   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2073                         int_aarch64_neon_vsri> {
2074     let Inst{22-21} = 0b01;
2075   }
2076
2077   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2078                         int_aarch64_neon_vsri> {
2079     let Inst{22} = 0b1;
2080   }
2081 }
2082
2083 // Shift left and insert
2084 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2085
2086 // Shift right and insert
2087 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2088
2089 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2090                     string SrcT, Operand ImmTy>
2091   : NeonI_2VShiftImm<q, u, opcode,
2092                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2093                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2094                      [], NoItinerary>;
2095
2096 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2097                        string SrcT, Operand ImmTy>
2098   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2099                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2100                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2101                      [], NoItinerary> {
2102   let Constraints = "$src = $Rd";
2103 }
2104
2105 // left long shift by immediate
2106 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2107   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2108     let Inst{22-19} = 0b0001;
2109   }
2110
2111   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2112     let Inst{22-20} = 0b001;
2113   }
2114
2115   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2116     let Inst{22-21} = 0b01;
2117   }
2118
2119   // Shift Narrow High
2120   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2121                               shr_imm8> {
2122     let Inst{22-19} = 0b0001;
2123   }
2124
2125   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2126                              shr_imm16> {
2127     let Inst{22-20} = 0b001;
2128   }
2129
2130   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2131                              shr_imm32> {
2132     let Inst{22-21} = 0b01;
2133   }
2134 }
2135
2136 // Shift right narrow
2137 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2138
2139 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2140 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2141 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2142 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2143 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2144 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2145 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2146 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2147
2148 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2149                               (v2i64 (concat_vectors (v1i64 node:$Rm),
2150                                                      (v1i64 node:$Rn)))>;
2151 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2152                               (v8i16 (concat_vectors (v4i16 node:$Rm),
2153                                                      (v4i16 node:$Rn)))>;
2154 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2155                               (v4i32 (concat_vectors (v2i32 node:$Rm),
2156                                                      (v2i32 node:$Rn)))>;
2157 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2158                               (v4f32 (concat_vectors (v2f32 node:$Rm),
2159                                                      (v2f32 node:$Rn)))>;
2160 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2161                               (v2f64 (concat_vectors (v1f64 node:$Rm),
2162                                                      (v1f64 node:$Rn)))>;
2163
2164 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2165                              (v8i16 (srl (v8i16 node:$lhs),
2166                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2167 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2168                              (v4i32 (srl (v4i32 node:$lhs),
2169                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2170 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2171                              (v2i64 (srl (v2i64 node:$lhs),
2172                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2173 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2174                              (v8i16 (sra (v8i16 node:$lhs),
2175                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2176 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2177                              (v4i32 (sra (v4i32 node:$lhs),
2178                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2179 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2180                              (v2i64 (sra (v2i64 node:$lhs),
2181                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2182
2183 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2184 multiclass Neon_shiftNarrow_patterns<string shr> {
2185   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2186               (i32 shr_imm8:$Imm)))),
2187             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2188   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2189               (i32 shr_imm16:$Imm)))),
2190             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2191   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2192               (i32 shr_imm32:$Imm)))),
2193             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2194
2195   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2196               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2197                 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2198             (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2199                          VPR128:$Rn, imm:$Imm)>;
2200   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2201               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2202                 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2203             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2204                         VPR128:$Rn, imm:$Imm)>;
2205   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2206               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2207                 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2208             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2209                         VPR128:$Rn, imm:$Imm)>;
2210 }
2211
2212 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2213   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2214             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2215   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2216             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2217   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2218             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2219
2220   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2221                 (v1i64 (bitconvert (v8i8
2222                     (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2223             (!cast<Instruction>(prefix # "_16B")
2224                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2225                 VPR128:$Rn, imm:$Imm)>;
2226   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2227                 (v1i64 (bitconvert (v4i16
2228                     (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2229             (!cast<Instruction>(prefix # "_8H")
2230                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2231                 VPR128:$Rn, imm:$Imm)>;
2232   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2233                 (v1i64 (bitconvert (v2i32
2234                     (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2235             (!cast<Instruction>(prefix # "_4S")
2236                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2237                   VPR128:$Rn, imm:$Imm)>;
2238 }
2239
2240 defm : Neon_shiftNarrow_patterns<"lshr">;
2241 defm : Neon_shiftNarrow_patterns<"ashr">;
2242
2243 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2244 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2245 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2246 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2247 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2248 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2249 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2250
2251 // Convert fix-point and float-pointing
2252 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2253                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2254                 Operand ImmTy, SDPatternOperator IntOp>
2255   : NeonI_2VShiftImm<q, u, opcode,
2256                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2257                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2258                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2259                        (i32 ImmTy:$Imm))))],
2260                      NoItinerary>;
2261
2262 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2263                               SDPatternOperator IntOp> {
2264   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2265                       shr_imm32, IntOp> {
2266     let Inst{22-21} = 0b01;
2267   }
2268
2269   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2270                       shr_imm32, IntOp> {
2271     let Inst{22-21} = 0b01;
2272   }
2273
2274   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2275                       shr_imm64, IntOp> {
2276     let Inst{22} = 0b1;
2277   }
2278 }
2279
2280 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2281                               SDPatternOperator IntOp> {
2282   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2283                       shr_imm32, IntOp> {
2284     let Inst{22-21} = 0b01;
2285   }
2286
2287   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2288                       shr_imm32, IntOp> {
2289     let Inst{22-21} = 0b01;
2290   }
2291
2292   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2293                       shr_imm64, IntOp> {
2294     let Inst{22} = 0b1;
2295   }
2296 }
2297
2298 // Convert fixed-point to floating-point
2299 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2300                                    int_arm_neon_vcvtfxs2fp>;
2301 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2302                                    int_arm_neon_vcvtfxu2fp>;
2303
2304 // Convert floating-point to fixed-point
2305 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2306                                    int_arm_neon_vcvtfp2fxs>;
2307 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2308                                    int_arm_neon_vcvtfp2fxu>;
2309
2310 multiclass Neon_sshll2_0<SDNode ext>
2311 {
2312   def _v8i8  : PatFrag<(ops node:$Rn),
2313                        (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2314   def _v4i16 : PatFrag<(ops node:$Rn),
2315                        (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2316   def _v2i32 : PatFrag<(ops node:$Rn),
2317                        (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2318 }
2319
2320 defm NI_sext_high : Neon_sshll2_0<sext>;
2321 defm NI_zext_high : Neon_sshll2_0<zext>;
2322
2323
2324 //===----------------------------------------------------------------------===//
2325 // Multiclasses for NeonI_Across
2326 //===----------------------------------------------------------------------===//
2327
2328 // Variant 1
2329
2330 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2331                             string asmop, SDPatternOperator opnode>
2332 {
2333     def _1h8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2334                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2335                 asmop # "\t$Rd, $Rn.8b",
2336                 [(set (v1i16 FPR16:$Rd),
2337                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2338                 NoItinerary>;
2339
2340     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2341                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2342                 asmop # "\t$Rd, $Rn.16b",
2343                 [(set (v1i16 FPR16:$Rd),
2344                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2345                 NoItinerary>;
2346
2347     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2348                 (outs FPR32:$Rd), (ins VPR64:$Rn),
2349                 asmop # "\t$Rd, $Rn.4h",
2350                 [(set (v1i32 FPR32:$Rd),
2351                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2352                 NoItinerary>;
2353
2354     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2355                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2356                 asmop # "\t$Rd, $Rn.8h",
2357                 [(set (v1i32 FPR32:$Rd),
2358                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2359                 NoItinerary>;
2360
2361     // _1d2s doesn't exist!
2362
2363     def _1d4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2364                 (outs FPR64:$Rd), (ins VPR128:$Rn),
2365                 asmop # "\t$Rd, $Rn.4s",
2366                 [(set (v1i64 FPR64:$Rd),
2367                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2368                 NoItinerary>;
2369 }
2370
2371 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2372 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2373
2374 // Variant 2
2375
2376 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2377                             string asmop, SDPatternOperator opnode>
2378 {
2379     def _1b8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2380                 (outs FPR8:$Rd), (ins VPR64:$Rn),
2381                 asmop # "\t$Rd, $Rn.8b",
2382                 [(set (v1i8 FPR8:$Rd),
2383                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2384                 NoItinerary>;
2385
2386     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2387                 (outs FPR8:$Rd), (ins VPR128:$Rn),
2388                 asmop # "\t$Rd, $Rn.16b",
2389                 [(set (v1i8 FPR8:$Rd),
2390                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2391                 NoItinerary>;
2392
2393     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2394                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2395                 asmop # "\t$Rd, $Rn.4h",
2396                 [(set (v1i16 FPR16:$Rd),
2397                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2398                 NoItinerary>;
2399
2400     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2401                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2402                 asmop # "\t$Rd, $Rn.8h",
2403                 [(set (v1i16 FPR16:$Rd),
2404                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2405                 NoItinerary>;
2406
2407     // _1s2s doesn't exist!
2408
2409     def _1s4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2410                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2411                 asmop # "\t$Rd, $Rn.4s",
2412                 [(set (v1i32 FPR32:$Rd),
2413                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2414                 NoItinerary>;
2415 }
2416
2417 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2418 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2419
2420 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2421 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2422
2423 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2424
2425 // Variant 3
2426
2427 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2428                             string asmop, SDPatternOperator opnode> {
2429     def _1s4s:  NeonI_2VAcross<0b1, u, size, opcode,
2430                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2431                 asmop # "\t$Rd, $Rn.4s",
2432                 [(set (f32 FPR32:$Rd),
2433                     (f32 (opnode (v4f32 VPR128:$Rn))))],
2434                 NoItinerary>;
2435 }
2436
2437 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2438                                 int_aarch64_neon_vmaxnmv>;
2439 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2440                                 int_aarch64_neon_vminnmv>;
2441
2442 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2443                               int_aarch64_neon_vmaxv>;
2444 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2445                               int_aarch64_neon_vminv>;
2446
2447 // The followings are for instruction class (Perm)
2448
2449 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2450                     string asmop, RegisterOperand OpVPR, string OpS,
2451                     SDPatternOperator opnode, ValueType Ty>
2452   : NeonI_Perm<q, size, opcode,
2453                (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2454                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2455                [(set (Ty OpVPR:$Rd),
2456                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2457                NoItinerary>;
2458
2459 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2460                           SDPatternOperator opnode> {
2461   def _8b  : NeonI_Permute<0b0, 0b00, opcode, asmop,
2462                            VPR64, "8b", opnode, v8i8>;
2463   def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2464                            VPR128, "16b",opnode, v16i8>;
2465   def _4h  : NeonI_Permute<0b0, 0b01, opcode, asmop,
2466                            VPR64, "4h", opnode, v4i16>;
2467   def _8h  : NeonI_Permute<0b1, 0b01, opcode, asmop,
2468                            VPR128, "8h", opnode, v8i16>;
2469   def _2s  : NeonI_Permute<0b0, 0b10, opcode, asmop,
2470                            VPR64, "2s", opnode, v2i32>;
2471   def _4s  : NeonI_Permute<0b1, 0b10, opcode, asmop,
2472                            VPR128, "4s", opnode, v4i32>;
2473   def _2d  : NeonI_Permute<0b1, 0b11, opcode, asmop,
2474                            VPR128, "2d", opnode, v2i64>;
2475 }
2476
2477 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2478 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2479 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2480 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2481 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2482 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2483
2484 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2485   def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2486             (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2487
2488   def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2489             (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2490
2491   def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2492             (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2493 }
2494
2495 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2496 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2497 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2498 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2499 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2500 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2501
2502 // The followings are for instruction class (3V Diff)
2503
2504 // normal long/long2 pattern
2505 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2506                  string asmop, string ResS, string OpS,
2507                  SDPatternOperator opnode, SDPatternOperator ext,
2508                  RegisterOperand OpVPR,
2509                  ValueType ResTy, ValueType OpTy>
2510   : NeonI_3VDiff<q, u, size, opcode,
2511                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2512                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2513                  [(set (ResTy VPR128:$Rd),
2514                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2515                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2516                  NoItinerary>;
2517
2518 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2519                         string asmop, SDPatternOperator opnode,
2520                         bit Commutable = 0> {
2521   let isCommutable = Commutable in {
2522     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2523                            opnode, sext, VPR64, v8i16, v8i8>;
2524     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2525                            opnode, sext, VPR64, v4i32, v4i16>;
2526     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2527                            opnode, sext, VPR64, v2i64, v2i32>;
2528   }
2529 }
2530
2531 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2532                          SDPatternOperator opnode, bit Commutable = 0> {
2533   let isCommutable = Commutable in {
2534     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2535                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2536     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2537                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2538     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2539                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2540   }
2541 }
2542
2543 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2544                         SDPatternOperator opnode, bit Commutable = 0> {
2545   let isCommutable = Commutable in {
2546     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2547                            opnode, zext, VPR64, v8i16, v8i8>;
2548     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2549                            opnode, zext, VPR64, v4i32, v4i16>;
2550     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2551                            opnode, zext, VPR64, v2i64, v2i32>;
2552   }
2553 }
2554
2555 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2556                          SDPatternOperator opnode, bit Commutable = 0> {
2557   let isCommutable = Commutable in {
2558     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2559                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2560     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2561                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2562     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2563                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2564   }
2565 }
2566
2567 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2568 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2569
2570 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2571 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2572
2573 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2574 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2575
2576 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2577 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2578
2579 // normal wide/wide2 pattern
2580 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2581                  string asmop, string ResS, string OpS,
2582                  SDPatternOperator opnode, SDPatternOperator ext,
2583                  RegisterOperand OpVPR,
2584                  ValueType ResTy, ValueType OpTy>
2585   : NeonI_3VDiff<q, u, size, opcode,
2586                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2587                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2588                  [(set (ResTy VPR128:$Rd),
2589                     (ResTy (opnode (ResTy VPR128:$Rn),
2590                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2591                  NoItinerary>;
2592
2593 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2594                         SDPatternOperator opnode> {
2595   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2596                          opnode, sext, VPR64, v8i16, v8i8>;
2597   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2598                          opnode, sext, VPR64, v4i32, v4i16>;
2599   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2600                          opnode, sext, VPR64, v2i64, v2i32>;
2601 }
2602
2603 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2604 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2605
2606 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2607                          SDPatternOperator opnode> {
2608   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2609                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2610   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2611                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2612   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2613                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2614 }
2615
2616 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2617 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2618
2619 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2620                         SDPatternOperator opnode> {
2621   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2622                          opnode, zext, VPR64, v8i16, v8i8>;
2623   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2624                          opnode, zext, VPR64, v4i32, v4i16>;
2625   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2626                          opnode, zext, VPR64, v2i64, v2i32>;
2627 }
2628
2629 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2630 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2631
2632 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2633                          SDPatternOperator opnode> {
2634   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2635                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2636   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2638   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2639                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2640 }
2641
2642 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2643 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2644
2645 // Get the high half part of the vector element.
2646 multiclass NeonI_get_high {
2647   def _8h : PatFrag<(ops node:$Rn),
2648                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2649                                              (v8i16 (Neon_vdup (i32 8)))))))>;
2650   def _4s : PatFrag<(ops node:$Rn),
2651                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2652                                               (v4i32 (Neon_vdup (i32 16)))))))>;
2653   def _2d : PatFrag<(ops node:$Rn),
2654                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2655                                               (v2i64 (Neon_vdup (i32 32)))))))>;
2656 }
2657
2658 defm NI_get_hi : NeonI_get_high;
2659
2660 // pattern for addhn/subhn with 2 operands
2661 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2662                            string asmop, string ResS, string OpS,
2663                            SDPatternOperator opnode, SDPatternOperator get_hi,
2664                            ValueType ResTy, ValueType OpTy>
2665   : NeonI_3VDiff<q, u, size, opcode,
2666                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2667                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2668                  [(set (ResTy VPR64:$Rd),
2669                     (ResTy (get_hi
2670                       (OpTy (opnode (OpTy VPR128:$Rn),
2671                                     (OpTy VPR128:$Rm))))))],
2672                  NoItinerary>;
2673
2674 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2675                                 SDPatternOperator opnode, bit Commutable = 0> {
2676   let isCommutable = Commutable in {
2677     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2678                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2679     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2680                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2681     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2682                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2683   }
2684 }
2685
2686 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2687 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2688
2689 // pattern for operation with 2 operands
2690 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2691                     string asmop, string ResS, string OpS,
2692                     SDPatternOperator opnode,
2693                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2694                     ValueType ResTy, ValueType OpTy>
2695   : NeonI_3VDiff<q, u, size, opcode,
2696                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2697                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2698                  [(set (ResTy ResVPR:$Rd),
2699                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2700                  NoItinerary>;
2701
2702 // normal narrow pattern
2703 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2704                           SDPatternOperator opnode, bit Commutable = 0> {
2705   let isCommutable = Commutable in {
2706     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2707                               opnode, VPR64, VPR128, v8i8, v8i16>;
2708     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2709                               opnode, VPR64, VPR128, v4i16, v4i32>;
2710     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2711                               opnode, VPR64, VPR128, v2i32, v2i64>;
2712   }
2713 }
2714
2715 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2716 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2717
2718 // pattern for acle intrinsic with 3 operands
2719 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2720                      string asmop, string ResS, string OpS>
2721   : NeonI_3VDiff<q, u, size, opcode,
2722                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2723                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2724                  [], NoItinerary> {
2725   let Constraints = "$src = $Rd";
2726   let neverHasSideEffects = 1;
2727 }
2728
2729 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2730   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2731   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2732   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2733 }
2734
2735 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2736 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2737
2738 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2739 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2740
2741 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2742 // part.
2743 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2744                         SDPatternOperator coreop>
2745   : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2746                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2747                                                         (SrcTy VPR128:$Rm)))))),
2748         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2749               VPR128:$Rn, VPR128:$Rm)>;
2750
2751 // addhn2 patterns
2752 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2753           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2754 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2755           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2756 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2757           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2758
2759 // subhn2 patterns
2760 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2761           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2762 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2763           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2764 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2765           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2766
2767 // raddhn2 patterns
2768 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2769 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2770 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2771
2772 // rsubhn2 patterns
2773 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2774 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2775 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2776
2777 // pattern that need to extend result
2778 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2779                      string asmop, string ResS, string OpS,
2780                      SDPatternOperator opnode,
2781                      RegisterOperand OpVPR,
2782                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2783   : NeonI_3VDiff<q, u, size, opcode,
2784                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2785                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2786                  [(set (ResTy VPR128:$Rd),
2787                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2788                                                 (OpTy OpVPR:$Rm))))))],
2789                  NoItinerary>;
2790
2791 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2792                            SDPatternOperator opnode, bit Commutable = 0> {
2793   let isCommutable = Commutable in {
2794     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2795                                opnode, VPR64, v8i16, v8i8, v8i8>;
2796     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2797                                opnode, VPR64, v4i32, v4i16, v4i16>;
2798     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2799                                opnode, VPR64, v2i64, v2i32, v2i32>;
2800   }
2801 }
2802
2803 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2804 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2805
2806 multiclass NeonI_Op_High<SDPatternOperator op> {
2807   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2808                      (op (v8i8 (Neon_High16B node:$Rn)),
2809                          (v8i8 (Neon_High16B node:$Rm)))>;
2810   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
2811                      (op (v4i16 (Neon_High8H node:$Rn)),
2812                          (v4i16 (Neon_High8H node:$Rm)))>;
2813   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
2814                      (op (v2i32 (Neon_High4S node:$Rn)),
2815                          (v2i32 (Neon_High4S node:$Rm)))>;
2816 }
2817
2818 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2819 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2820 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2821 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2822 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2823 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2824
2825 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2826                             bit Commutable = 0> {
2827   let isCommutable = Commutable in {
2828     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2829                                 !cast<PatFrag>(opnode # "_16B"),
2830                                 VPR128, v8i16, v16i8, v8i8>;
2831     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2832                                 !cast<PatFrag>(opnode # "_8H"),
2833                                 VPR128, v4i32, v8i16, v4i16>;
2834     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2835                                 !cast<PatFrag>(opnode # "_4S"),
2836                                 VPR128, v2i64, v4i32, v2i32>;
2837   }
2838 }
2839
2840 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2841 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2842
2843 // For pattern that need two operators being chained.
2844 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2845                      string asmop, string ResS, string OpS,
2846                      SDPatternOperator opnode, SDPatternOperator subop,
2847                      RegisterOperand OpVPR,
2848                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2849   : NeonI_3VDiff<q, u, size, opcode,
2850                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2851                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2852                  [(set (ResTy VPR128:$Rd),
2853                     (ResTy (opnode
2854                       (ResTy VPR128:$src),
2855                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2856                                                  (OpTy OpVPR:$Rm))))))))],
2857                  NoItinerary> {
2858   let Constraints = "$src = $Rd";
2859 }
2860
2861 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2862                              SDPatternOperator opnode, SDPatternOperator subop>{
2863   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2864                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2865   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2866                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2867   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2868                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2869 }
2870
2871 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2872                                    add, int_arm_neon_vabds>;
2873 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2874                                    add, int_arm_neon_vabdu>;
2875
2876 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2877                               SDPatternOperator opnode, string subop> {
2878   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2879                              opnode, !cast<PatFrag>(subop # "_16B"),
2880                              VPR128, v8i16, v16i8, v8i8>;
2881   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2882                              opnode, !cast<PatFrag>(subop # "_8H"),
2883                              VPR128, v4i32, v8i16, v4i16>;
2884   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2885                              opnode, !cast<PatFrag>(subop # "_4S"),
2886                              VPR128, v2i64, v4i32, v2i32>;
2887 }
2888
2889 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2890                                      "NI_sabdl_hi">;
2891 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2892                                      "NI_uabdl_hi">;
2893
2894 // Long pattern with 2 operands
2895 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2896                           SDPatternOperator opnode, bit Commutable = 0> {
2897   let isCommutable = Commutable in {
2898     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2899                               opnode, VPR128, VPR64, v8i16, v8i8>;
2900     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2901                               opnode, VPR128, VPR64, v4i32, v4i16>;
2902     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2903                               opnode, VPR128, VPR64, v2i64, v2i32>;
2904   }
2905 }
2906
2907 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2908 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2909
2910 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2911                            string asmop, string ResS, string OpS,
2912                            SDPatternOperator opnode,
2913                            ValueType ResTy, ValueType OpTy>
2914   : NeonI_3VDiff<q, u, size, opcode,
2915                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2916                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2917                  [(set (ResTy VPR128:$Rd),
2918                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2919                  NoItinerary>;
2920
2921 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2922                                    string opnode, bit Commutable = 0> {
2923   let isCommutable = Commutable in {
2924     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2925                                       !cast<PatFrag>(opnode # "_16B"),
2926                                       v8i16, v16i8>;
2927     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2928                                      !cast<PatFrag>(opnode # "_8H"),
2929                                      v4i32, v8i16>;
2930     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2931                                      !cast<PatFrag>(opnode # "_4S"),
2932                                      v2i64, v4i32>;
2933   }
2934 }
2935
2936 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2937                                          "NI_smull_hi", 1>;
2938 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2939                                          "NI_umull_hi", 1>;
2940
2941 // Long pattern with 3 operands
2942 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2943                      string asmop, string ResS, string OpS,
2944                      SDPatternOperator opnode,
2945                      ValueType ResTy, ValueType OpTy>
2946   : NeonI_3VDiff<q, u, size, opcode,
2947                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2948                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2949                  [(set (ResTy VPR128:$Rd),
2950                     (ResTy (opnode
2951                       (ResTy VPR128:$src),
2952                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2953                NoItinerary> {
2954   let Constraints = "$src = $Rd";
2955 }
2956
2957 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2958                              SDPatternOperator opnode> {
2959   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2960                              opnode, v8i16, v8i8>;
2961   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2962                              opnode, v4i32, v4i16>;
2963   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2964                              opnode, v2i64, v2i32>;
2965 }
2966
2967 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2968                          (add node:$Rd,
2969                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2970
2971 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2972                          (add node:$Rd,
2973                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2974
2975 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2976                          (sub node:$Rd,
2977                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2978
2979 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2980                          (sub node:$Rd,
2981                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2982
2983 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2984 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2985
2986 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2987 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2988
2989 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2990                            string asmop, string ResS, string OpS,
2991                            SDPatternOperator subop, SDPatternOperator opnode,
2992                            RegisterOperand OpVPR,
2993                            ValueType ResTy, ValueType OpTy>
2994   : NeonI_3VDiff<q, u, size, opcode,
2995                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2996                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2997                [(set (ResTy VPR128:$Rd),
2998                   (ResTy (subop
2999                     (ResTy VPR128:$src),
3000                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3001                NoItinerary> {
3002   let Constraints = "$src = $Rd";
3003 }
3004
3005 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3006                                    SDPatternOperator subop, string opnode> {
3007   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3008                                     subop, !cast<PatFrag>(opnode # "_16B"),
3009                                     VPR128, v8i16, v16i8>;
3010   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3011                                    subop, !cast<PatFrag>(opnode # "_8H"),
3012                                    VPR128, v4i32, v8i16>;
3013   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3014                                    subop, !cast<PatFrag>(opnode # "_4S"),
3015                                    VPR128, v2i64, v4i32>;
3016 }
3017
3018 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3019                                           add, "NI_smull_hi">;
3020 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3021                                           add, "NI_umull_hi">;
3022
3023 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3024                                           sub, "NI_smull_hi">;
3025 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3026                                           sub, "NI_umull_hi">;
3027
3028 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3029                                     SDPatternOperator opnode> {
3030   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3031                                    opnode, int_arm_neon_vqdmull,
3032                                    VPR64, v4i32, v4i16>;
3033   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3034                                    opnode, int_arm_neon_vqdmull,
3035                                    VPR64, v2i64, v2i32>;
3036 }
3037
3038 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3039                                            int_arm_neon_vqadds>;
3040 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3041                                            int_arm_neon_vqsubs>;
3042
3043 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3044                          SDPatternOperator opnode, bit Commutable = 0> {
3045   let isCommutable = Commutable in {
3046     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3047                               opnode, VPR128, VPR64, v4i32, v4i16>;
3048     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3049                               opnode, VPR128, VPR64, v2i64, v2i32>;
3050   }
3051 }
3052
3053 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3054                                 int_arm_neon_vqdmull, 1>;
3055
3056 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3057                                    string opnode, bit Commutable = 0> {
3058   let isCommutable = Commutable in {
3059     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3060                                      !cast<PatFrag>(opnode # "_8H"),
3061                                      v4i32, v8i16>;
3062     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3063                                      !cast<PatFrag>(opnode # "_4S"),
3064                                      v2i64, v4i32>;
3065   }
3066 }
3067
3068 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3069                                            "NI_qdmull_hi", 1>;
3070
3071 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3072                                      SDPatternOperator opnode> {
3073   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3074                                    opnode, NI_qdmull_hi_8H,
3075                                    VPR128, v4i32, v8i16>;
3076   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3077                                    opnode, NI_qdmull_hi_4S,
3078                                    VPR128, v2i64, v4i32>;
3079 }
3080
3081 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3082                                              int_arm_neon_vqadds>;
3083 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3084                                              int_arm_neon_vqsubs>;
3085
3086 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3087                          SDPatternOperator opnode_8h8b,
3088                          SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3089   let isCommutable = Commutable in {
3090     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3091                               opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3092
3093     def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3094                               opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3095   }
3096 }
3097
3098 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3099                               int_aarch64_neon_vmull_p64, 1>;
3100
3101 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3102                                    string opnode, bit Commutable = 0> {
3103   let isCommutable = Commutable in {
3104     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3105                                       !cast<PatFrag>(opnode # "_16B"),
3106                                       v8i16, v16i8>;
3107
3108     def _1q2d : 
3109       NeonI_3VDiff<0b1, u, 0b11, opcode,
3110                    (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3111                    asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3112                    [(set (v16i8 VPR128:$Rd),
3113                       (v16i8 (int_aarch64_neon_vmull_p64 
3114                         (v1i64 (scalar_to_vector
3115                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3116                         (v1i64 (scalar_to_vector
3117                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3118                    NoItinerary>;
3119   }
3120 }
3121
3122 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3123                                          1>;
3124
3125 // End of implementation for instruction class (3V Diff)
3126
3127 // The followings are vector load/store multiple N-element structure
3128 // (class SIMD lselem).
3129
3130 // ld1:         load multiple 1-element structure to 1/2/3/4 registers.
3131 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3132 //              The structure consists of a sequence of sets of N values.
3133 //              The first element of the structure is placed in the first lane
3134 //              of the first first vector, the second element in the first lane
3135 //              of the second vector, and so on.
3136 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3137 // the three 64-bit vectors list {BA, DC, FE}.
3138 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3139 // 64-bit vectors list {DA, EB, FC}.
3140 // Store instructions store multiple structure to N registers like load.
3141
3142
3143 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3144                     RegisterOperand VecList, string asmop>
3145   : NeonI_LdStMult<q, 1, opcode, size,
3146                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3147                  asmop # "\t$Rt, [$Rn]",
3148                  [],
3149                  NoItinerary> {
3150   let mayLoad = 1;
3151   let neverHasSideEffects = 1;
3152 }
3153
3154 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3155   def _8B : NeonI_LDVList<0, opcode, 0b00,
3156                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3157
3158   def _4H : NeonI_LDVList<0, opcode, 0b01,
3159                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3160
3161   def _2S : NeonI_LDVList<0, opcode, 0b10,
3162                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3163
3164   def _16B : NeonI_LDVList<1, opcode, 0b00,
3165                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3166
3167   def _8H : NeonI_LDVList<1, opcode, 0b01,
3168                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3169
3170   def _4S : NeonI_LDVList<1, opcode, 0b10,
3171                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3172
3173   def _2D : NeonI_LDVList<1, opcode, 0b11,
3174                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3175 }
3176
3177 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3178 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3179 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3180
3181 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3182
3183 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3184
3185 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3186
3187 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3188 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3189 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3190
3191 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3192 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3193
3194 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3195 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3196
3197 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3198                     RegisterOperand VecList, string asmop>
3199   : NeonI_LdStMult<q, 0, opcode, size,
3200                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3201                  asmop # "\t$Rt, [$Rn]",
3202                  [],
3203                  NoItinerary> {
3204   let mayStore = 1;
3205   let neverHasSideEffects = 1;
3206 }
3207
3208 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3209   def _8B : NeonI_STVList<0, opcode, 0b00,
3210                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3211
3212   def _4H : NeonI_STVList<0, opcode, 0b01,
3213                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3214
3215   def _2S : NeonI_STVList<0, opcode, 0b10,
3216                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3217
3218   def _16B : NeonI_STVList<1, opcode, 0b00,
3219                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3220
3221   def _8H : NeonI_STVList<1, opcode, 0b01,
3222                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3223
3224   def _4S : NeonI_STVList<1, opcode, 0b10,
3225                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3226
3227   def _2D : NeonI_STVList<1, opcode, 0b11,
3228                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3229 }
3230
3231 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3232 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3233 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3234
3235 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3236
3237 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3238
3239 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3240
3241 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3242 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3243 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3244
3245 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3246 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3247
3248 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3249 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3250
3251 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3252 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3253
3254 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3255 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3256
3257 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3258 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3259
3260 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3261 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3262
3263 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3264 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3265
3266 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3267 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3268
3269 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3270           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3271 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3272           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3273
3274 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3275           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3276 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3277           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3278
3279 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3280           (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3281 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3282           (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3283
3284 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3285           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3286 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3287           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3288
3289 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3290           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3291 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3292           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3293
3294 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3295           (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3296 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3297           (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3298
3299 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3300 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3301 // these patterns are not needed any more.
3302 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3303 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3304 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3305
3306 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3307           (LSFP8_STR $value, $addr, 0)>;
3308 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3309           (LSFP16_STR $value, $addr, 0)>;
3310 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3311           (LSFP32_STR $value, $addr, 0)>;
3312
3313
3314 // End of vector load/store multiple N-element structure(class SIMD lselem)
3315
3316 // The followings are post-index vector load/store multiple N-element
3317 // structure(class SIMD lselem-post)
3318 def exact1_asmoperand : AsmOperandClass {
3319   let Name = "Exact1";
3320   let PredicateMethod = "isExactImm<1>";
3321   let RenderMethod = "addImmOperands";
3322 }
3323 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3324   let ParserMatchClass = exact1_asmoperand;
3325 }
3326
3327 def exact2_asmoperand : AsmOperandClass {
3328   let Name = "Exact2";
3329   let PredicateMethod = "isExactImm<2>";
3330   let RenderMethod = "addImmOperands";
3331 }
3332 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3333   let ParserMatchClass = exact2_asmoperand;
3334 }
3335
3336 def exact3_asmoperand : AsmOperandClass {
3337   let Name = "Exact3";
3338   let PredicateMethod = "isExactImm<3>";
3339   let RenderMethod = "addImmOperands";
3340 }
3341 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3342   let ParserMatchClass = exact3_asmoperand;
3343 }
3344
3345 def exact4_asmoperand : AsmOperandClass {
3346   let Name = "Exact4";
3347   let PredicateMethod = "isExactImm<4>";
3348   let RenderMethod = "addImmOperands";
3349 }
3350 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3351   let ParserMatchClass = exact4_asmoperand;
3352 }
3353
3354 def exact6_asmoperand : AsmOperandClass {
3355   let Name = "Exact6";
3356   let PredicateMethod = "isExactImm<6>";
3357   let RenderMethod = "addImmOperands";
3358 }
3359 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3360   let ParserMatchClass = exact6_asmoperand;
3361 }
3362
3363 def exact8_asmoperand : AsmOperandClass {
3364   let Name = "Exact8";
3365   let PredicateMethod = "isExactImm<8>";
3366   let RenderMethod = "addImmOperands";
3367 }
3368 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3369   let ParserMatchClass = exact8_asmoperand;
3370 }
3371
3372 def exact12_asmoperand : AsmOperandClass {
3373   let Name = "Exact12";
3374   let PredicateMethod = "isExactImm<12>";
3375   let RenderMethod = "addImmOperands";
3376 }
3377 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3378   let ParserMatchClass = exact12_asmoperand;
3379 }
3380
3381 def exact16_asmoperand : AsmOperandClass {
3382   let Name = "Exact16";
3383   let PredicateMethod = "isExactImm<16>";
3384   let RenderMethod = "addImmOperands";
3385 }
3386 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3387   let ParserMatchClass = exact16_asmoperand;
3388 }
3389
3390 def exact24_asmoperand : AsmOperandClass {
3391   let Name = "Exact24";
3392   let PredicateMethod = "isExactImm<24>";
3393   let RenderMethod = "addImmOperands";
3394 }
3395 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3396   let ParserMatchClass = exact24_asmoperand;
3397 }
3398
3399 def exact32_asmoperand : AsmOperandClass {
3400   let Name = "Exact32";
3401   let PredicateMethod = "isExactImm<32>";
3402   let RenderMethod = "addImmOperands";
3403 }
3404 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3405   let ParserMatchClass = exact32_asmoperand;
3406 }
3407
3408 def exact48_asmoperand : AsmOperandClass {
3409   let Name = "Exact48";
3410   let PredicateMethod = "isExactImm<48>";
3411   let RenderMethod = "addImmOperands";
3412 }
3413 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3414   let ParserMatchClass = exact48_asmoperand;
3415 }
3416
3417 def exact64_asmoperand : AsmOperandClass {
3418   let Name = "Exact64";
3419   let PredicateMethod = "isExactImm<64>";
3420   let RenderMethod = "addImmOperands";
3421 }
3422 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3423   let ParserMatchClass = exact64_asmoperand;
3424 }
3425
3426 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3427                            RegisterOperand VecList, Operand ImmTy,
3428                            string asmop> {
3429   let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3430       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3431     def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3432                      (outs VecList:$Rt, GPR64xsp:$wb),
3433                      (ins GPR64xsp:$Rn, ImmTy:$amt),
3434                      asmop # "\t$Rt, [$Rn], $amt",
3435                      [],
3436                      NoItinerary> {
3437       let Rm = 0b11111;
3438     }
3439
3440     def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3441                         (outs VecList:$Rt, GPR64xsp:$wb),
3442                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3443                         asmop # "\t$Rt, [$Rn], $Rm",
3444                         [],
3445                         NoItinerary>;
3446   }
3447 }
3448
3449 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3450     Operand ImmTy2, string asmop> {
3451   defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3452                               !cast<RegisterOperand>(List # "8B_operand"),
3453                               ImmTy, asmop>;
3454
3455   defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3456                               !cast<RegisterOperand>(List # "4H_operand"),
3457                               ImmTy, asmop>;
3458
3459   defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3460                               !cast<RegisterOperand>(List # "2S_operand"),
3461                               ImmTy, asmop>;
3462
3463   defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3464                                !cast<RegisterOperand>(List # "16B_operand"),
3465                                ImmTy2, asmop>;
3466
3467   defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3468                               !cast<RegisterOperand>(List # "8H_operand"),
3469                               ImmTy2, asmop>;
3470
3471   defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3472                               !cast<RegisterOperand>(List # "4S_operand"),
3473                               ImmTy2, asmop>;
3474
3475   defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3476                               !cast<RegisterOperand>(List # "2D_operand"),
3477                               ImmTy2, asmop>;
3478 }
3479
3480 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3481 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3482 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3483                                  "ld1">;
3484
3485 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3486
3487 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3488                              "ld3">;
3489
3490 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3491
3492 // Post-index load multiple 1-element structures from N consecutive registers
3493 // (N = 2,3,4)
3494 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3495                                "ld1">;
3496 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3497                                    uimm_exact16, "ld1">;
3498
3499 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3500                                "ld1">;
3501 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3502                                    uimm_exact24, "ld1">;
3503
3504 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3505                                 "ld1">;
3506 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3507                                    uimm_exact32, "ld1">;
3508
3509 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3510                             RegisterOperand VecList, Operand ImmTy,
3511                             string asmop> {
3512   let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3513       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3514     def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3515                      (outs GPR64xsp:$wb),
3516                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3517                      asmop # "\t$Rt, [$Rn], $amt",
3518                      [],
3519                      NoItinerary> {
3520       let Rm = 0b11111;
3521     }
3522
3523     def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3524                       (outs GPR64xsp:$wb),
3525                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3526                       asmop # "\t$Rt, [$Rn], $Rm",
3527                       [],
3528                       NoItinerary>;
3529   }
3530 }
3531
3532 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3533                            Operand ImmTy2, string asmop> {
3534   defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3535                  !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3536
3537   defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3538                               !cast<RegisterOperand>(List # "4H_operand"),
3539                               ImmTy, asmop>;
3540
3541   defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3542                               !cast<RegisterOperand>(List # "2S_operand"),
3543                               ImmTy, asmop>;
3544
3545   defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3546                                !cast<RegisterOperand>(List # "16B_operand"),
3547                                ImmTy2, asmop>;
3548
3549   defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3550                               !cast<RegisterOperand>(List # "8H_operand"),
3551                               ImmTy2, asmop>;
3552
3553   defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3554                               !cast<RegisterOperand>(List # "4S_operand"),
3555                               ImmTy2, asmop>;
3556
3557   defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3558                               !cast<RegisterOperand>(List # "2D_operand"),
3559                               ImmTy2, asmop>;
3560 }
3561
3562 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3563 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3564 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3565                                  "st1">;
3566
3567 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3568
3569 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3570                              "st3">;
3571
3572 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3573
3574 // Post-index load multiple 1-element structures from N consecutive registers
3575 // (N = 2,3,4)
3576 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3577                                "st1">;
3578 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3579                                    uimm_exact16, "st1">;
3580
3581 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3582                                "st1">;
3583 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3584                                    uimm_exact24, "st1">;
3585
3586 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3587                                "st1">;
3588 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3589                                    uimm_exact32, "st1">;
3590
3591 // End of post-index vector load/store multiple N-element structure
3592 // (class SIMD lselem-post)
3593
3594 // The followings are vector load/store single N-element structure
3595 // (class SIMD lsone).
3596 def neon_uimm0_bare : Operand<i64>,
3597                         ImmLeaf<i64, [{return Imm == 0;}]> {
3598   let ParserMatchClass = neon_uimm0_asmoperand;
3599   let PrintMethod = "printUImmBareOperand";
3600 }
3601
3602 def neon_uimm1_bare : Operand<i64>,
3603                         ImmLeaf<i64, [{return Imm < 2;}]> {
3604   let ParserMatchClass = neon_uimm1_asmoperand;
3605   let PrintMethod = "printUImmBareOperand";
3606 }
3607
3608 def neon_uimm2_bare : Operand<i64>,
3609                         ImmLeaf<i64, [{return Imm < 4;}]> {
3610   let ParserMatchClass = neon_uimm2_asmoperand;
3611   let PrintMethod = "printUImmBareOperand";
3612 }
3613
3614 def neon_uimm3_bare : Operand<i64>,
3615                         ImmLeaf<i64, [{return Imm < 8;}]> {
3616   let ParserMatchClass = uimm3_asmoperand;
3617   let PrintMethod = "printUImmBareOperand";
3618 }
3619
3620 def neon_uimm4_bare : Operand<i64>,
3621                         ImmLeaf<i64, [{return Imm < 16;}]> {
3622   let ParserMatchClass = uimm4_asmoperand;
3623   let PrintMethod = "printUImmBareOperand";
3624 }
3625
3626 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3627                     RegisterOperand VecList, string asmop>
3628     : NeonI_LdOne_Dup<q, r, opcode, size,
3629                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3630                       asmop # "\t$Rt, [$Rn]",
3631                       [],
3632                       NoItinerary> {
3633   let mayLoad = 1;
3634   let neverHasSideEffects = 1;
3635 }
3636
3637 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3638   def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3639                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3640
3641   def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3642                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3643
3644   def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3645                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3646
3647   def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3648                           !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3649
3650   def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3651                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3652
3653   def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3654                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3655
3656   def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3657                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3658
3659   def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3660                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3661 }
3662
3663 // Load single 1-element structure to all lanes of 1 register
3664 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3665
3666 // Load single N-element structure to all lanes of N consecutive
3667 // registers (N = 2,3,4)
3668 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3669 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3670 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3671
3672
3673 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3674                     Instruction INST>
3675     : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3676           (VTy (INST GPR64xsp:$Rn))>;
3677
3678 // Match all LD1R instructions
3679 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3680
3681 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3682
3683 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3684
3685 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3686
3687 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3688 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3689
3690 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3691 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3692
3693 def : LD1R_pattern<v1i64, i64, load, LD1R_1D>;
3694 def : LD1R_pattern<v1f64, f64, load, LD1R_1D>;
3695
3696 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3697 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3698
3699
3700 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3701                                 RegisterClass RegList> {
3702   defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3703   defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3704   defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3705   defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3706 }
3707
3708 // Special vector list operand of 128-bit vectors with bare layout.
3709 // i.e. only show ".b", ".h", ".s", ".d"
3710 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3711 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3712 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3713 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3714
3715 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3716                      Operand ImmOp, string asmop>
3717     : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3718                          (outs VList:$Rt),
3719                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3720                          asmop # "\t$Rt[$lane], [$Rn]",
3721                          [],
3722                          NoItinerary> {
3723   let mayLoad = 1;
3724   let neverHasSideEffects = 1;
3725   let hasExtraDefRegAllocReq = 1;
3726   let Constraints = "$src = $Rt";
3727 }
3728
3729 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3730   def _B : NeonI_LDN_Lane<r, 0b00, op0,
3731                           !cast<RegisterOperand>(List # "B_operand"),
3732                           neon_uimm4_bare, asmop> {
3733     let Inst{12-10} = lane{2-0};
3734     let Inst{30} = lane{3};
3735   }
3736
3737   def _H : NeonI_LDN_Lane<r, 0b01, op0,
3738                           !cast<RegisterOperand>(List # "H_operand"),
3739                           neon_uimm3_bare, asmop> {
3740     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3741     let Inst{30} = lane{2};
3742   }
3743
3744   def _S : NeonI_LDN_Lane<r, 0b10, op0,
3745                           !cast<RegisterOperand>(List # "S_operand"),
3746                           neon_uimm2_bare, asmop> {
3747     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3748     let Inst{30} = lane{1};
3749   }
3750
3751   def _D : NeonI_LDN_Lane<r, 0b10, op0,
3752                           !cast<RegisterOperand>(List # "D_operand"),
3753                           neon_uimm1_bare, asmop> {
3754     let Inst{12-10} = 0b001;
3755     let Inst{30} = lane{0};
3756   }
3757 }
3758
3759 // Load single 1-element structure to one lane of 1 register.
3760 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3761
3762 // Load single N-element structure to one lane of N consecutive registers
3763 // (N = 2,3,4)
3764 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3765 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3766 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3767
3768 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3769                           Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3770                           Instruction INST> {
3771   def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3772                      (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3773             (VTy (EXTRACT_SUBREG
3774                      (INST GPR64xsp:$Rn,
3775                            (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3776                            ImmOp:$lane),
3777                      sub_64))>;
3778
3779   def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3780                       (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3781             (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3782 }
3783
3784 // Match all LD1LN instructions
3785 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3786                       extloadi8, LD1LN_B>;
3787
3788 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3789                       extloadi16, LD1LN_H>;
3790
3791 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3792                       load, LD1LN_S>;
3793 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3794                       load, LD1LN_S>;
3795
3796 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3797                       load, LD1LN_D>;
3798 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3799                       load, LD1LN_D>;
3800
3801 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3802                      Operand ImmOp, string asmop>
3803     : NeonI_LdStOne_Lane<0, r, op2_1, op0,
3804                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
3805                          asmop # "\t$Rt[$lane], [$Rn]",
3806                          [],
3807                          NoItinerary> {
3808   let mayStore = 1;
3809   let neverHasSideEffects = 1;
3810   let hasExtraDefRegAllocReq = 1;
3811 }
3812
3813 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3814   def _B : NeonI_STN_Lane<r, 0b00, op0,
3815                           !cast<RegisterOperand>(List # "B_operand"),
3816                           neon_uimm4_bare, asmop> {
3817     let Inst{12-10} = lane{2-0};
3818     let Inst{30} = lane{3};
3819   }
3820
3821   def _H : NeonI_STN_Lane<r, 0b01, op0,
3822                           !cast<RegisterOperand>(List # "H_operand"),
3823                           neon_uimm3_bare, asmop> {
3824     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3825     let Inst{30} = lane{2};
3826   }
3827
3828   def _S : NeonI_STN_Lane<r, 0b10, op0,
3829                           !cast<RegisterOperand>(List # "S_operand"),
3830                            neon_uimm2_bare, asmop> {
3831     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3832     let Inst{30} = lane{1};
3833   }
3834
3835   def _D : NeonI_STN_Lane<r, 0b10, op0,
3836                           !cast<RegisterOperand>(List # "D_operand"),
3837                           neon_uimm1_bare, asmop>{
3838     let Inst{12-10} = 0b001;
3839     let Inst{30} = lane{0};
3840   }
3841 }
3842
3843 // Store single 1-element structure from one lane of 1 register.
3844 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
3845
3846 // Store single N-element structure from one lane of N consecutive registers
3847 // (N = 2,3,4)
3848 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
3849 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
3850 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
3851
3852 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3853                           Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
3854                           Instruction INST> {
3855   def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
3856                      GPR64xsp:$Rn),
3857             (INST GPR64xsp:$Rn,
3858                   (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
3859                   ImmOp:$lane)>;
3860
3861   def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
3862                      GPR64xsp:$Rn),
3863             (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
3864 }
3865
3866 // Match all ST1LN instructions
3867 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3868                       truncstorei8, ST1LN_B>;
3869
3870 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3871                       truncstorei16, ST1LN_H>;
3872
3873 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3874                       store, ST1LN_S>;
3875 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3876                       store, ST1LN_S>;
3877
3878 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3879                       store, ST1LN_D>;
3880 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3881                       store, ST1LN_D>;
3882
3883 // End of vector load/store single N-element structure (class SIMD lsone).
3884
3885
3886 // The following are post-index load/store single N-element instructions
3887 // (class SIMD lsone-post)
3888
3889 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3890                             RegisterOperand VecList, Operand ImmTy,
3891                             string asmop> {
3892   let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
3893   DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3894     def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3895                       (outs VecList:$Rt, GPR64xsp:$wb),
3896                       (ins GPR64xsp:$Rn, ImmTy:$amt),
3897                       asmop # "\t$Rt, [$Rn], $amt",
3898                       [],
3899                       NoItinerary> {
3900                         let Rm = 0b11111;
3901                       }
3902
3903     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3904                       (outs VecList:$Rt, GPR64xsp:$wb),
3905                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3906                       asmop # "\t$Rt, [$Rn], $Rm",
3907                       [],
3908                       NoItinerary>;
3909   }
3910 }
3911
3912 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
3913                          Operand uimm_b, Operand uimm_h,
3914                          Operand uimm_s, Operand uimm_d> {
3915   defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
3916                               !cast<RegisterOperand>(List # "8B_operand"),
3917                               uimm_b, asmop>;
3918
3919   defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
3920                               !cast<RegisterOperand>(List # "4H_operand"),
3921                               uimm_h, asmop>;
3922
3923   defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
3924                               !cast<RegisterOperand>(List # "2S_operand"),
3925                               uimm_s, asmop>;
3926
3927   defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
3928                               !cast<RegisterOperand>(List # "1D_operand"),
3929                               uimm_d, asmop>;
3930
3931   defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
3932                                !cast<RegisterOperand>(List # "16B_operand"),
3933                                uimm_b, asmop>;
3934
3935   defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
3936                               !cast<RegisterOperand>(List # "8H_operand"),
3937                               uimm_h, asmop>;
3938
3939   defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
3940                               !cast<RegisterOperand>(List # "4S_operand"),
3941                               uimm_s, asmop>;
3942
3943   defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
3944                               !cast<RegisterOperand>(List # "2D_operand"),
3945                               uimm_d, asmop>;
3946 }
3947
3948 // Post-index load single 1-element structure to all lanes of 1 register
3949 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
3950                              uimm_exact2, uimm_exact4, uimm_exact8>;
3951
3952 // Post-index load single N-element structure to all lanes of N consecutive
3953 // registers (N = 2,3,4)
3954 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
3955                              uimm_exact4, uimm_exact8, uimm_exact16>;
3956 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
3957                              uimm_exact6, uimm_exact12, uimm_exact24>;
3958 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
3959                              uimm_exact8, uimm_exact16, uimm_exact32>;
3960
3961 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
3962     Constraints = "$Rn = $wb, $Rt = $src",
3963     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3964   class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3965                                 Operand ImmTy, Operand ImmOp, string asmop>
3966       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3967                                 (outs VList:$Rt, GPR64xsp:$wb),
3968                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
3969                                     VList:$src, ImmOp:$lane),
3970                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
3971                                 [],
3972                                 NoItinerary> {
3973     let Rm = 0b11111;
3974   }
3975
3976   class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3977                                  Operand ImmTy, Operand ImmOp, string asmop>
3978       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3979                                 (outs VList:$Rt, GPR64xsp:$wb),
3980                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
3981                                     VList:$src, ImmOp:$lane),
3982                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
3983                                 [],
3984                                 NoItinerary>;
3985 }
3986
3987 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
3988                            Operand uimm_b, Operand uimm_h,
3989                            Operand uimm_s, Operand uimm_d> {
3990   def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
3991                                !cast<RegisterOperand>(List # "B_operand"),
3992                                uimm_b, neon_uimm4_bare, asmop> {
3993     let Inst{12-10} = lane{2-0};
3994     let Inst{30} = lane{3};
3995   }
3996
3997   def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
3998                                    !cast<RegisterOperand>(List # "B_operand"),
3999                                    uimm_b, neon_uimm4_bare, asmop> {
4000     let Inst{12-10} = lane{2-0};
4001     let Inst{30} = lane{3};
4002   }
4003
4004   def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4005                                !cast<RegisterOperand>(List # "H_operand"),
4006                                uimm_h, neon_uimm3_bare, asmop> {
4007     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4008     let Inst{30} = lane{2};
4009   }
4010
4011   def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4012                                    !cast<RegisterOperand>(List # "H_operand"),
4013                                    uimm_h, neon_uimm3_bare, asmop> {
4014     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4015     let Inst{30} = lane{2};
4016   }
4017
4018   def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4019                                !cast<RegisterOperand>(List # "S_operand"),
4020                                uimm_s, neon_uimm2_bare, asmop> {
4021     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4022     let Inst{30} = lane{1};
4023   }
4024
4025   def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4026                                    !cast<RegisterOperand>(List # "S_operand"),
4027                                    uimm_s, neon_uimm2_bare, asmop> {
4028     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4029     let Inst{30} = lane{1};
4030   }
4031
4032   def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4033                                !cast<RegisterOperand>(List # "D_operand"),
4034                                uimm_d, neon_uimm1_bare, asmop> {
4035     let Inst{12-10} = 0b001;
4036     let Inst{30} = lane{0};
4037   }
4038
4039   def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4040                                    !cast<RegisterOperand>(List # "D_operand"),
4041                                    uimm_d, neon_uimm1_bare, asmop> {
4042     let Inst{12-10} = 0b001;
4043     let Inst{30} = lane{0};
4044   }
4045 }
4046
4047 // Post-index load single 1-element structure to one lane of 1 register.
4048 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4049                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4050
4051 // Post-index load single N-element structure to one lane of N consecutive
4052 // registers
4053 // (N = 2,3,4)
4054 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4055                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4056 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4057                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4058 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4059                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4060
4061 let mayStore = 1, neverHasSideEffects = 1,
4062     hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4063     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4064   class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4065                       Operand ImmTy, Operand ImmOp, string asmop>
4066       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4067                                 (outs GPR64xsp:$wb),
4068                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4069                                     VList:$Rt, ImmOp:$lane),
4070                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4071                                 [],
4072                                 NoItinerary> {
4073     let Rm = 0b11111;
4074   }
4075
4076   class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4077                        Operand ImmTy, Operand ImmOp, string asmop>
4078       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4079                                 (outs GPR64xsp:$wb),
4080                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4081                                     ImmOp:$lane),
4082                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4083                                 [],
4084                                 NoItinerary>;
4085 }
4086
4087 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4088                            Operand uimm_b, Operand uimm_h,
4089                            Operand uimm_s, Operand uimm_d> {
4090   def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4091                                !cast<RegisterOperand>(List # "B_operand"),
4092                                uimm_b, neon_uimm4_bare, asmop> {
4093     let Inst{12-10} = lane{2-0};
4094     let Inst{30} = lane{3};
4095   }
4096
4097   def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4098                                    !cast<RegisterOperand>(List # "B_operand"),
4099                                    uimm_b, neon_uimm4_bare, asmop> {
4100     let Inst{12-10} = lane{2-0};
4101     let Inst{30} = lane{3};
4102   }
4103
4104   def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4105                                !cast<RegisterOperand>(List # "H_operand"),
4106                                uimm_h, neon_uimm3_bare, asmop> {
4107     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4108     let Inst{30} = lane{2};
4109   }
4110
4111   def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4112                                    !cast<RegisterOperand>(List # "H_operand"),
4113                                    uimm_h, neon_uimm3_bare, asmop> {
4114     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4115     let Inst{30} = lane{2};
4116   }
4117
4118   def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4119                                !cast<RegisterOperand>(List # "S_operand"),
4120                                uimm_s, neon_uimm2_bare, asmop> {
4121     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4122     let Inst{30} = lane{1};
4123   }
4124
4125   def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4126                                    !cast<RegisterOperand>(List # "S_operand"),
4127                                    uimm_s, neon_uimm2_bare, asmop> {
4128     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4129     let Inst{30} = lane{1};
4130   }
4131
4132   def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4133                                !cast<RegisterOperand>(List # "D_operand"),
4134                                uimm_d, neon_uimm1_bare, asmop> {
4135     let Inst{12-10} = 0b001;
4136     let Inst{30} = lane{0};
4137   }
4138
4139   def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4140                                    !cast<RegisterOperand>(List # "D_operand"),
4141                                    uimm_d, neon_uimm1_bare, asmop> {
4142     let Inst{12-10} = 0b001;
4143     let Inst{30} = lane{0};
4144   }
4145 }
4146
4147 // Post-index store single 1-element structure from one lane of 1 register.
4148 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4149                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4150
4151 // Post-index store single N-element structure from one lane of N consecutive
4152 // registers (N = 2,3,4)
4153 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4154                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4155 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4156                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4157 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4158                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4159
4160 // End of post-index load/store single N-element instructions
4161 // (class SIMD lsone-post)
4162
4163 // Neon Scalar instructions implementation
4164 // Scalar Three Same
4165
4166 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4167                              RegisterClass FPRC>
4168   : NeonI_Scalar3Same<u, size, opcode,
4169                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4170                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4171                       [],
4172                       NoItinerary>;
4173
4174 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4175   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4176
4177 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4178                                       bit Commutable = 0> {
4179   let isCommutable = Commutable in {
4180     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4181     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4182   }
4183 }
4184
4185 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4186                                       string asmop, bit Commutable = 0> {
4187   let isCommutable = Commutable in {
4188     def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4189     def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4190   }
4191 }
4192
4193 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4194                                         string asmop, bit Commutable = 0> {
4195   let isCommutable = Commutable in {
4196     def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4197     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4198     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4199     def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4200   }
4201 }
4202
4203 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4204                                             Instruction INSTD> {
4205   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4206             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4207 }
4208
4209 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4210                                                Instruction INSTB,
4211                                                Instruction INSTH,
4212                                                Instruction INSTS,
4213                                                Instruction INSTD>
4214   : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4215   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4216            (INSTB FPR8:$Rn, FPR8:$Rm)>;
4217   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4218            (INSTH FPR16:$Rn, FPR16:$Rm)>;
4219   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4220            (INSTS FPR32:$Rn, FPR32:$Rm)>;
4221 }
4222
4223 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4224                                              Instruction INSTH,
4225                                              Instruction INSTS> {
4226   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4227             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4228   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4229             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4230 }
4231
4232 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4233                                              ValueType SResTy, ValueType STy,
4234                                              Instruction INSTS, ValueType DResTy,
4235                                              ValueType DTy, Instruction INSTD> {
4236   def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4237             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4238   def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4239             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4240 }
4241
4242 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4243                                               Instruction INSTD>
4244   : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4245         (INSTD FPR64:$Rn, FPR64:$Rm)>;
4246
4247 // Scalar Three Different
4248
4249 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4250                              RegisterClass FPRCD, RegisterClass FPRCS>
4251   : NeonI_Scalar3Diff<u, size, opcode,
4252                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4253                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4254                       [],
4255                       NoItinerary>;
4256
4257 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4258   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4259   def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4260 }
4261
4262 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4263   let Constraints = "$Src = $Rd" in {
4264     def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4265                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4266                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4267                        [],
4268                        NoItinerary>;
4269     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4270                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4271                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4272                        [],
4273                        NoItinerary>;
4274   }
4275 }
4276
4277 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4278                                              Instruction INSTH,
4279                                              Instruction INSTS> {
4280   def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4281             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4282   def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4283             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4284 }
4285
4286 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4287                                              Instruction INSTH,
4288                                              Instruction INSTS> {
4289   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4290             (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4291   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4292             (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4293 }
4294
4295 // Scalar Two Registers Miscellaneous
4296
4297 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4298                              RegisterClass FPRCD, RegisterClass FPRCS>
4299   : NeonI_Scalar2SameMisc<u, size, opcode,
4300                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4301                           !strconcat(asmop, "\t$Rd, $Rn"),
4302                           [],
4303                           NoItinerary>;
4304
4305 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4306                                          string asmop> {
4307   def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4308                                       FPR32>;
4309   def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4310                                       FPR64>;
4311 }
4312
4313 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4314   def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4315 }
4316
4317 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4318   : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4319   def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4320   def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4321   def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4322 }
4323
4324 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4325   : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4326
4327 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4328                                                  string asmop> {
4329   def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4330   def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4331   def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4332 }
4333
4334 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4335                                        string asmop, RegisterClass FPRC>
4336   : NeonI_Scalar2SameMisc<u, size, opcode,
4337                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4338                           !strconcat(asmop, "\t$Rd, $Rn"),
4339                           [],
4340                           NoItinerary>;
4341
4342 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4343                                                  string asmop> {
4344
4345   let Constraints = "$Src = $Rd" in {
4346     def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4347     def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4348     def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4349     def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4350   }
4351 }
4352
4353 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4354                                                   Instruction INSTD>
4355   : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4356         (INSTD FPR64:$Rn)>;
4357
4358 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4359                                                       Instruction INSTS,
4360                                                       Instruction INSTD> {
4361   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4362             (INSTS FPR32:$Rn)>;
4363   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4364             (INSTD FPR64:$Rn)>;
4365 }
4366
4367 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4368                                                 Instruction INSTD>
4369   : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4370             (INSTD FPR64:$Rn)>;
4371
4372 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4373                                                      Instruction INSTS,
4374                                                      Instruction INSTD> {
4375   def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4376             (INSTS FPR32:$Rn)>;
4377   def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4378             (INSTD FPR64:$Rn)>;
4379 }
4380
4381 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4382                                                  Instruction INSTS,
4383                                                  Instruction INSTD> {
4384   def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4385             (INSTS FPR32:$Rn)>;
4386   def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4387             (INSTD FPR64:$Rn)>;
4388 }
4389
4390 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4391                                               Instruction INSTD>
4392   : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4393         (INSTD FPR64:$Rn)>;
4394
4395 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4396   : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4397                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4398                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4399                           [],
4400                           NoItinerary>;
4401
4402 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4403                                               string asmop> {
4404   def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4405                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
4406                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4407                            [],
4408                            NoItinerary>;
4409   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4410                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
4411                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4412                            [],
4413                            NoItinerary>;
4414 }
4415
4416 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4417                                                 Instruction INSTD>
4418   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4419                        (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4420         (INSTD FPR64:$Rn, 0)>;
4421
4422 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4423                                                    Instruction INSTD>
4424   : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4425                           (i32 neon_uimm0:$Imm), CC)),
4426         (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4427
4428 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4429                                                       CondCode CC,
4430                                                       Instruction INSTS,
4431                                                       Instruction INSTD> {
4432   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpz32:$FPImm))),
4433             (INSTS FPR32:$Rn, fpz32:$FPImm)>;
4434   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpz32:$FPImm))),
4435             (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4436   def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), CC)),
4437             (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4438 }
4439
4440 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4441                                                 Instruction INSTD> {
4442   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4443             (INSTD FPR64:$Rn)>;
4444 }
4445
4446 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4447                                                    Instruction INSTB,
4448                                                    Instruction INSTH,
4449                                                    Instruction INSTS,
4450                                                    Instruction INSTD>
4451   : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4452   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4453             (INSTB FPR8:$Rn)>;
4454   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4455             (INSTH FPR16:$Rn)>;
4456   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4457             (INSTS FPR32:$Rn)>;
4458 }
4459
4460 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4461                                                        SDPatternOperator opnode,
4462                                                        Instruction INSTH,
4463                                                        Instruction INSTS,
4464                                                        Instruction INSTD> {
4465   def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4466             (INSTH FPR16:$Rn)>;
4467   def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4468             (INSTS FPR32:$Rn)>;
4469   def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4470             (INSTD FPR64:$Rn)>;
4471
4472 }
4473
4474 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4475                                                        SDPatternOperator opnode,
4476                                                        Instruction INSTB,
4477                                                        Instruction INSTH,
4478                                                        Instruction INSTS,
4479                                                        Instruction INSTD> {
4480   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4481             (INSTB FPR8:$Src, FPR8:$Rn)>;
4482   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4483             (INSTH FPR16:$Src, FPR16:$Rn)>;
4484   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4485             (INSTS FPR32:$Src, FPR32:$Rn)>;
4486   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4487             (INSTD FPR64:$Src, FPR64:$Rn)>;
4488 }
4489
4490 // Scalar Shift By Immediate
4491
4492 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4493                                 RegisterClass FPRC, Operand ImmTy>
4494   : NeonI_ScalarShiftImm<u, opcode,
4495                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4496                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4497                          [], NoItinerary>;
4498
4499 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4500                                             string asmop> {
4501   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4502     bits<6> Imm;
4503     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4504     let Inst{21-16} = Imm;
4505   }
4506 }
4507
4508 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4509                                                string asmop>
4510   : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4511   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4512     bits<3> Imm;
4513     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4514     let Inst{18-16} = Imm;
4515   }
4516   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4517     bits<4> Imm;
4518     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4519     let Inst{19-16} = Imm;
4520   }
4521   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4522     bits<5> Imm;
4523     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4524     let Inst{20-16} = Imm;
4525   }
4526 }
4527
4528 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4529                                             string asmop> {
4530   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4531     bits<6> Imm;
4532     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4533     let Inst{21-16} = Imm;
4534   }
4535 }
4536
4537 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4538                                               string asmop>
4539   : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4540   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4541     bits<3> Imm;
4542     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4543     let Inst{18-16} = Imm;
4544   }
4545   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4546     bits<4> Imm;
4547     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4548     let Inst{19-16} = Imm;
4549   }
4550   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4551     bits<5> Imm;
4552     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4553     let Inst{20-16} = Imm;
4554   }
4555 }
4556
4557 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4558   : NeonI_ScalarShiftImm<u, opcode,
4559                          (outs FPR64:$Rd),
4560                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4561                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4562                          [], NoItinerary> {
4563     bits<6> Imm;
4564     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4565     let Inst{21-16} = Imm;
4566     let Constraints = "$Src = $Rd";
4567 }
4568
4569 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4570   : NeonI_ScalarShiftImm<u, opcode,
4571                          (outs FPR64:$Rd),
4572                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4573                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4574                          [], NoItinerary> {
4575     bits<6> Imm;
4576     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4577     let Inst{21-16} = Imm;
4578     let Constraints = "$Src = $Rd";
4579 }
4580
4581 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4582                                        RegisterClass FPRCD, RegisterClass FPRCS,
4583                                        Operand ImmTy>
4584   : NeonI_ScalarShiftImm<u, opcode,
4585                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4586                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4587                          [], NoItinerary>;
4588
4589 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4590                                                 string asmop> {
4591   def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4592                                              shr_imm8> {
4593     bits<3> Imm;
4594     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4595     let Inst{18-16} = Imm;
4596   }
4597   def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4598                                              shr_imm16> {
4599     bits<4> Imm;
4600     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4601     let Inst{19-16} = Imm;
4602   }
4603   def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4604                                              shr_imm32> {
4605     bits<5> Imm;
4606     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4607     let Inst{20-16} = Imm;
4608   }
4609 }
4610
4611 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4612   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4613     bits<5> Imm;
4614     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4615     let Inst{20-16} = Imm;
4616   }
4617   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4618     bits<6> Imm;
4619     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4620     let Inst{21-16} = Imm;
4621   }
4622 }
4623
4624 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4625                                                Instruction INSTD> {
4626   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4627                 (INSTD FPR64:$Rn, imm:$Imm)>;
4628 }
4629
4630 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4631                                                Instruction INSTD> {
4632   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4633                 (INSTD FPR64:$Rn, imm:$Imm)>;
4634 }
4635
4636 class Neon_ScalarShiftImm_V1_D_size_patterns<SDPatternOperator opnode,
4637                                              Instruction INSTD>
4638   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4639             (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4640         (INSTD FPR64:$Rn, imm:$Imm)>;
4641
4642 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4643                                                    Instruction INSTB,
4644                                                    Instruction INSTH,
4645                                                    Instruction INSTS,
4646                                                    Instruction INSTD>
4647   : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4648   def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4649                 (INSTB FPR8:$Rn, imm:$Imm)>;
4650   def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4651                 (INSTH FPR16:$Rn, imm:$Imm)>;
4652   def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4653                 (INSTS FPR32:$Rn, imm:$Imm)>;
4654 }
4655
4656 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4657                                                 Instruction INSTD>
4658   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4659             (i32 shl_imm64:$Imm))),
4660         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4661
4662 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4663                                                 Instruction INSTD>
4664   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4665             (i32 shr_imm64:$Imm))),
4666         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4667
4668 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4669                                                        SDPatternOperator opnode,
4670                                                        Instruction INSTH,
4671                                                        Instruction INSTS,
4672                                                        Instruction INSTD> {
4673   def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4674                 (INSTH FPR16:$Rn, imm:$Imm)>;
4675   def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4676                 (INSTS FPR32:$Rn, imm:$Imm)>;
4677   def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4678                 (INSTD FPR64:$Rn, imm:$Imm)>;
4679 }
4680
4681 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4682                                                       Instruction INSTS,
4683                                                       Instruction INSTD> {
4684   def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4685                 (INSTS FPR32:$Rn, imm:$Imm)>;
4686   def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4687                 (INSTD FPR64:$Rn, imm:$Imm)>;
4688 }
4689
4690 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4691                                                       Instruction INSTS,
4692                                                       Instruction INSTD> {
4693   def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4694                 (INSTS FPR32:$Rn, imm:$Imm)>;
4695   def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4696                 (INSTD FPR64:$Rn, imm:$Imm)>;
4697 }
4698
4699 // Scalar Signed Shift Right (Immediate)
4700 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4701 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4702 // Pattern to match llvm.arm.* intrinsic.
4703 def : Neon_ScalarShiftImm_V1_D_size_patterns<sra, SSHRddi>;
4704
4705 // Scalar Unsigned Shift Right (Immediate)
4706 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4707 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4708 // Pattern to match llvm.arm.* intrinsic.
4709 def : Neon_ScalarShiftImm_V1_D_size_patterns<srl, USHRddi>;
4710
4711 // Scalar Signed Rounding Shift Right (Immediate)
4712 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4713 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4714
4715 // Scalar Unigned Rounding Shift Right (Immediate)
4716 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4717 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4718
4719 // Scalar Signed Shift Right and Accumulate (Immediate)
4720 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4721 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4722           <int_aarch64_neon_vsrads_n, SSRA>;
4723
4724 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4725 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4726 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4727           <int_aarch64_neon_vsradu_n, USRA>;
4728
4729 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4730 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4731 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4732           <int_aarch64_neon_vrsrads_n, SRSRA>;
4733
4734 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4735 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4736 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4737           <int_aarch64_neon_vrsradu_n, URSRA>;
4738
4739 // Scalar Shift Left (Immediate)
4740 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4741 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4742 // Pattern to match llvm.arm.* intrinsic.
4743 def : Neon_ScalarShiftImm_V1_D_size_patterns<shl, SHLddi>;
4744
4745 // Signed Saturating Shift Left (Immediate)
4746 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4747 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4748                                                SQSHLbbi, SQSHLhhi,
4749                                                SQSHLssi, SQSHLddi>;
4750 // Pattern to match llvm.arm.* intrinsic.
4751 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4752
4753 // Unsigned Saturating Shift Left (Immediate)
4754 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4755 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4756                                                UQSHLbbi, UQSHLhhi,
4757                                                UQSHLssi, UQSHLddi>;
4758 // Pattern to match llvm.arm.* intrinsic.
4759 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4760
4761 // Signed Saturating Shift Left Unsigned (Immediate)
4762 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4763 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4764                                                SQSHLUbbi, SQSHLUhhi,
4765                                                SQSHLUssi, SQSHLUddi>;
4766
4767 // Shift Right And Insert (Immediate)
4768 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4769 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4770           <int_aarch64_neon_vsri, SRI>;
4771
4772 // Shift Left And Insert (Immediate)
4773 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4774 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4775           <int_aarch64_neon_vsli, SLI>;
4776
4777 // Signed Saturating Shift Right Narrow (Immediate)
4778 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4779 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4780                                                     SQSHRNbhi, SQSHRNhsi,
4781                                                     SQSHRNsdi>;
4782
4783 // Unsigned Saturating Shift Right Narrow (Immediate)
4784 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4785 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4786                                                     UQSHRNbhi, UQSHRNhsi,
4787                                                     UQSHRNsdi>;
4788
4789 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4790 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4791 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4792                                                     SQRSHRNbhi, SQRSHRNhsi,
4793                                                     SQRSHRNsdi>;
4794
4795 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4796 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4797 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4798                                                     UQRSHRNbhi, UQRSHRNhsi,
4799                                                     UQRSHRNsdi>;
4800
4801 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4802 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4803 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4804                                                     SQSHRUNbhi, SQSHRUNhsi,
4805                                                     SQSHRUNsdi>;
4806
4807 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4808 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4809 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4810                                                     SQRSHRUNbhi, SQRSHRUNhsi,
4811                                                     SQRSHRUNsdi>;
4812
4813 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4814 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4815 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
4816                                                   SCVTF_Nssi, SCVTF_Nddi>;
4817
4818 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4819 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4820 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
4821                                                   UCVTF_Nssi, UCVTF_Nddi>;
4822
4823 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4824 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4825 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
4826                                                   FCVTZS_Nssi, FCVTZS_Nddi>;
4827
4828 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4829 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4830 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
4831                                                   FCVTZU_Nssi, FCVTZU_Nddi>;
4832
4833 // Patterns For Convert Instructions Between v1f64 and v1i64
4834 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
4835                                              Instruction INST>
4836     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4837           (INST FPR64:$Rn, imm:$Imm)>;
4838
4839 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
4840                                              Instruction INST>
4841     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4842           (INST FPR64:$Rn, imm:$Imm)>;
4843
4844 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
4845                                              SCVTF_Nddi>;
4846
4847 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
4848                                              UCVTF_Nddi>;
4849
4850 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
4851                                              FCVTZS_Nddi>;
4852
4853 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
4854                                              FCVTZU_Nddi>;
4855
4856 // Scalar Integer Add
4857 let isCommutable = 1 in {
4858 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4859 }
4860
4861 // Scalar Integer Sub
4862 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4863
4864 // Pattern for Scalar Integer Add and Sub with D register only
4865 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4866 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4867
4868 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4869 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4870 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4871 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4872 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4873
4874 // Scalar Integer Saturating Add (Signed, Unsigned)
4875 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4876 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4877
4878 // Scalar Integer Saturating Sub (Signed, Unsigned)
4879 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4880 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4881
4882
4883 // Patterns to match llvm.aarch64.* intrinsic for
4884 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
4885 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
4886                                            SQADDhhh, SQADDsss, SQADDddd>;
4887 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
4888                                            UQADDhhh, UQADDsss, UQADDddd>;
4889 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
4890                                            SQSUBhhh, SQSUBsss, SQSUBddd>;
4891 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
4892                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
4893
4894 // Scalar Integer Saturating Doubling Multiply Half High
4895 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4896
4897 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4898 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4899
4900 // Patterns to match llvm.arm.* intrinsic for
4901 // Scalar Integer Saturating Doubling Multiply Half High and
4902 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4903 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4904                                                                SQDMULHsss>;
4905 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4906                                                                 SQRDMULHsss>;
4907
4908 // Scalar Floating-point Multiply Extended
4909 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4910
4911 // Scalar Floating-point Reciprocal Step
4912 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4913 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
4914                                          FRECPSsss, f64, f64, FRECPSddd>;
4915 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4916           (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
4917
4918 // Scalar Floating-point Reciprocal Square Root Step
4919 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4920 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
4921                                          FRSQRTSsss, f64, f64, FRSQRTSddd>;
4922 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4923           (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
4924 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
4925
4926 // Patterns to match llvm.aarch64.* intrinsic for
4927 // Scalar Floating-point Multiply Extended,
4928 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
4929                                                   Instruction INSTS,
4930                                                   Instruction INSTD> {
4931   def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4932             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4933   def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4934             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4935 }
4936
4937 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
4938                                               FMULXsss, FMULXddd>;
4939 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4940           (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
4941
4942 // Scalar Integer Shift Left (Signed, Unsigned)
4943 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4944 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4945
4946 // Patterns to match llvm.arm.* intrinsic for
4947 // Scalar Integer Shift Left (Signed, Unsigned)
4948 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4949 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4950
4951 // Patterns to match llvm.aarch64.* intrinsic for
4952 // Scalar Integer Shift Left (Signed, Unsigned)
4953 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4954 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4955
4956 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4957 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4958 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4959
4960 // Patterns to match llvm.aarch64.* intrinsic for
4961 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4962 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4963                                            SQSHLhhh, SQSHLsss, SQSHLddd>;
4964 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4965                                            UQSHLhhh, UQSHLsss, UQSHLddd>;
4966
4967 // Patterns to match llvm.arm.* intrinsic for
4968 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4969 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4970 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4971
4972 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4973 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4974 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4975
4976 // Patterns to match llvm.aarch64.* intrinsic for
4977 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4978 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4979 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4980
4981 // Patterns to match llvm.arm.* intrinsic for
4982 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4983 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4984 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4985
4986 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4987 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4988 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4989
4990 // Patterns to match llvm.aarch64.* intrinsic for
4991 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4992 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
4993                                            SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
4994 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
4995                                            UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
4996
4997 // Patterns to match llvm.arm.* intrinsic for
4998 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4999 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5000 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5001
5002 // Signed Saturating Doubling Multiply-Add Long
5003 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5004 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5005                                             SQDMLALshh, SQDMLALdss>;
5006
5007 // Signed Saturating Doubling Multiply-Subtract Long
5008 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5009 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5010                                             SQDMLSLshh, SQDMLSLdss>;
5011
5012 // Signed Saturating Doubling Multiply Long
5013 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5014 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5015                                          SQDMULLshh, SQDMULLdss>;
5016
5017 // Scalar Signed Integer Convert To Floating-point
5018 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5019 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5020                                                  SCVTFss, SCVTFdd>;
5021
5022 // Scalar Unsigned Integer Convert To Floating-point
5023 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5024 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5025                                                  UCVTFss, UCVTFdd>;
5026
5027 // Scalar Floating-point Converts
5028 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5029 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5030                                                   FCVTXN>;
5031
5032 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5033 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5034                                                   FCVTNSss, FCVTNSdd>;
5035 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5036
5037 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5038 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5039                                                   FCVTNUss, FCVTNUdd>;
5040 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5041
5042 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5043 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5044                                                   FCVTMSss, FCVTMSdd>;
5045 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5046
5047 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5048 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5049                                                   FCVTMUss, FCVTMUdd>;
5050 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5051
5052 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5053 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5054                                                   FCVTASss, FCVTASdd>;
5055 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5056
5057 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5058 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5059                                                   FCVTAUss, FCVTAUdd>;
5060 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5061
5062 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5063 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5064                                                   FCVTPSss, FCVTPSdd>;
5065 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5066
5067 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5068 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5069                                                   FCVTPUss, FCVTPUdd>;
5070 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5071
5072 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5073 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5074                                                   FCVTZSss, FCVTZSdd>;
5075 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5076                                                 FCVTZSdd>;
5077
5078 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5079 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5080                                                   FCVTZUss, FCVTZUdd>;
5081 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5082                                                 FCVTZUdd>;
5083
5084 // Patterns For Convert Instructions Between v1f64 and v1i64
5085 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5086                                               Instruction INST>
5087     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5088
5089 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5090                                               Instruction INST>
5091     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5092
5093 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5094 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5095
5096 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5097 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5098
5099 // Scalar Floating-point Reciprocal Estimate
5100 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5101 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5102                                              FRECPEss, FRECPEdd>;
5103 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5104                                               FRECPEdd>;
5105
5106 // Scalar Floating-point Reciprocal Exponent
5107 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5108 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5109                                              FRECPXss, FRECPXdd>;
5110
5111 // Scalar Floating-point Reciprocal Square Root Estimate
5112 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5113 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5114                                                  FRSQRTEss, FRSQRTEdd>;
5115 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5116                                               FRSQRTEdd>;
5117
5118 // Scalar Floating-point Round
5119 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5120     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5121
5122 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5123 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5124 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5125 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5126 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5127 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5128 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5129
5130 // Scalar Integer Compare
5131
5132 // Scalar Compare Bitwise Equal
5133 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5134 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5135
5136 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5137                                               Instruction INSTD,
5138                                               CondCode CC>
5139   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5140         (INSTD FPR64:$Rn, FPR64:$Rm)>;
5141
5142 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5143
5144 // Scalar Compare Signed Greather Than Or Equal
5145 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5146 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5147 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5148
5149 // Scalar Compare Unsigned Higher Or Same
5150 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5151 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5152 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5153
5154 // Scalar Compare Unsigned Higher
5155 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5156 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5157 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5158
5159 // Scalar Compare Signed Greater Than
5160 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5161 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5162 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5163
5164 // Scalar Compare Bitwise Test Bits
5165 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5166 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5167 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5168
5169 // Scalar Compare Bitwise Equal To Zero
5170 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5171 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5172                                                 CMEQddi>;
5173 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5174
5175 // Scalar Compare Signed Greather Than Or Equal To Zero
5176 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5177 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5178                                                 CMGEddi>;
5179 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5180
5181 // Scalar Compare Signed Greater Than Zero
5182 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5183 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5184                                                 CMGTddi>;
5185 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5186
5187 // Scalar Compare Signed Less Than Or Equal To Zero
5188 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5189 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5190                                                 CMLEddi>;
5191 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5192
5193 // Scalar Compare Less Than Zero
5194 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5195 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5196                                                 CMLTddi>;
5197 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5198
5199 // Scalar Floating-point Compare
5200
5201 // Scalar Floating-point Compare Mask Equal
5202 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5203 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5204                                          FCMEQsss, v1i64, f64, FCMEQddd>;
5205 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5206
5207 // Scalar Floating-point Compare Mask Equal To Zero
5208 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5209 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5210                                                   FCMEQZssi, FCMEQZddi>;
5211
5212 // Scalar Floating-point Compare Mask Greater Than Or Equal
5213 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5214 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5215                                          FCMGEsss, v1i64, f64, FCMGEddd>;
5216 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5217
5218 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5219 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5220 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5221                                                   FCMGEZssi, FCMGEZddi>;
5222
5223 // Scalar Floating-point Compare Mask Greather Than
5224 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5225 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5226                                          FCMGTsss, v1i64, f64, FCMGTddd>;
5227 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5228
5229 // Scalar Floating-point Compare Mask Greather Than Zero
5230 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5231 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5232                                                   FCMGTZssi, FCMGTZddi>;
5233
5234 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5235 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5236 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5237                                                   FCMLEZssi, FCMLEZddi>;
5238
5239 // Scalar Floating-point Compare Mask Less Than Zero
5240 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5241 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5242                                                   FCMLTZssi, FCMLTZddi>;
5243
5244 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5245 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5246 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5247                                          FACGEsss, v1i64, f64, FACGEddd>;
5248 def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5249           (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5250
5251 // Scalar Floating-point Absolute Compare Mask Greater Than
5252 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5253 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5254                                          FACGTsss, v1i64, f64, FACGTddd>;
5255 def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5256           (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5257
5258 // Scalar Floating-point Absolute Difference
5259 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5260 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5261                                          FABDsss, f64, f64, FABDddd>;
5262
5263 // Scalar Absolute Value
5264 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5265 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5266
5267 // Scalar Signed Saturating Absolute Value
5268 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5269 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5270                                                SQABSbb, SQABShh, SQABSss, SQABSdd>;
5271
5272 // Scalar Negate
5273 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5274 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5275
5276 // Scalar Signed Saturating Negate
5277 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5278 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5279                                                SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5280
5281 // Scalar Signed Saturating Accumulated of Unsigned Value
5282 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5283 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5284                                                      SUQADDbb, SUQADDhh,
5285                                                      SUQADDss, SUQADDdd>;
5286
5287 // Scalar Unsigned Saturating Accumulated of Signed Value
5288 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5289 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5290                                                      USQADDbb, USQADDhh,
5291                                                      USQADDss, USQADDdd>;
5292
5293 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5294                                           (v1i64 FPR64:$Rn))),
5295           (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5296
5297 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5298                                           (v1i64 FPR64:$Rn))),
5299           (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5300
5301 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5302           (ABSdd FPR64:$Rn)>;
5303
5304 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5305           (SQABSdd FPR64:$Rn)>;
5306
5307 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5308           (SQNEGdd FPR64:$Rn)>;
5309
5310 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5311                       (v1i64 FPR64:$Rn))),
5312           (NEGdd FPR64:$Rn)>;
5313
5314 // Scalar Signed Saturating Extract Unsigned Narrow
5315 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5316 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5317                                                      SQXTUNbh, SQXTUNhs,
5318                                                      SQXTUNsd>;
5319
5320 // Scalar Signed Saturating Extract Narrow
5321 defm SQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5322 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5323                                                      SQXTNbh, SQXTNhs,
5324                                                      SQXTNsd>;
5325
5326 // Scalar Unsigned Saturating Extract Narrow
5327 defm UQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5328 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5329                                                      UQXTNbh, UQXTNhs,
5330                                                      UQXTNsd>;
5331
5332 // Scalar Reduce Pairwise
5333
5334 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5335                                      string asmop, bit Commutable = 0> {
5336   let isCommutable = Commutable in {
5337     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5338                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
5339                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5340                                 [],
5341                                 NoItinerary>;
5342   }
5343 }
5344
5345 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5346                                      string asmop, bit Commutable = 0>
5347   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5348   let isCommutable = Commutable in {
5349     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5350                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
5351                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5352                                 [],
5353                                 NoItinerary>;
5354   }
5355 }
5356
5357 // Scalar Reduce Addition Pairwise (Integer) with
5358 // Pattern to match llvm.arm.* intrinsic
5359 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5360
5361 // Pattern to match llvm.aarch64.* intrinsic for
5362 // Scalar Reduce Addition Pairwise (Integer)
5363 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5364           (ADDPvv_D_2D VPR128:$Rn)>;
5365 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5366           (ADDPvv_D_2D VPR128:$Rn)>;
5367
5368 // Scalar Reduce Addition Pairwise (Floating Point)
5369 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5370
5371 // Scalar Reduce Maximum Pairwise (Floating Point)
5372 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5373
5374 // Scalar Reduce Minimum Pairwise (Floating Point)
5375 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5376
5377 // Scalar Reduce maxNum Pairwise (Floating Point)
5378 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5379
5380 // Scalar Reduce minNum Pairwise (Floating Point)
5381 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5382
5383 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5384                                             Instruction INSTS,
5385                                             Instruction INSTD> {
5386   def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5387             (INSTS VPR64:$Rn)>;
5388   def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5389             (INSTD VPR128:$Rn)>;
5390 }
5391
5392 // Patterns to match llvm.aarch64.* intrinsic for
5393 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5394 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5395                                         FADDPvv_S_2S, FADDPvv_D_2D>;
5396
5397 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5398                                         FMAXPvv_S_2S, FMAXPvv_D_2D>;
5399
5400 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5401                                         FMINPvv_S_2S, FMINPvv_D_2D>;
5402
5403 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5404                                         FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5405
5406 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5407                                         FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5408
5409 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5410           (FADDPvv_S_2S (v2f32
5411                (EXTRACT_SUBREG
5412                    (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5413                    sub_64)))>;
5414
5415 // Scalar by element Arithmetic
5416
5417 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5418                                     string rmlane, bit u, bit szhi, bit szlo,
5419                                     RegisterClass ResFPR, RegisterClass OpFPR,
5420                                     RegisterOperand OpVPR, Operand OpImm>
5421   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5422                              (outs ResFPR:$Rd),
5423                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5424                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5425                              [],
5426                              NoItinerary> {
5427   bits<3> Imm;
5428   bits<5> MRm;
5429 }
5430
5431 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5432                                                     string rmlane,
5433                                                     bit u, bit szhi, bit szlo,
5434                                                     RegisterClass ResFPR,
5435                                                     RegisterClass OpFPR,
5436                                                     RegisterOperand OpVPR,
5437                                                     Operand OpImm>
5438   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5439                              (outs ResFPR:$Rd),
5440                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5441                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5442                              [],
5443                              NoItinerary> {
5444   let Constraints = "$src = $Rd";
5445   bits<3> Imm;
5446   bits<5> MRm;
5447 }
5448
5449 // Scalar Floating Point  multiply (scalar, by element)
5450 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5451   0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5452   let Inst{11} = Imm{1}; // h
5453   let Inst{21} = Imm{0}; // l
5454   let Inst{20-16} = MRm;
5455 }
5456 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5457   0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5458   let Inst{11} = Imm{0}; // h
5459   let Inst{21} = 0b0;    // l
5460   let Inst{20-16} = MRm;
5461 }
5462
5463 // Scalar Floating Point  multiply extended (scalar, by element)
5464 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5465   0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5466   let Inst{11} = Imm{1}; // h
5467   let Inst{21} = Imm{0}; // l
5468   let Inst{20-16} = MRm;
5469 }
5470 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5471   0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5472   let Inst{11} = Imm{0}; // h
5473   let Inst{21} = 0b0;    // l
5474   let Inst{20-16} = MRm;
5475 }
5476
5477 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5478   SDPatternOperator opnode,
5479   Instruction INST,
5480   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5481   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5482
5483   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5484                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5485              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5486
5487   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5488                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5489              (ResTy (INST (ResTy FPRC:$Rn),
5490                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5491                OpNImm:$Imm))>;
5492
5493   // swapped operands
5494   def  : Pat<(ResTy (opnode
5495                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5496                (ResTy FPRC:$Rn))),
5497              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5498
5499   def  : Pat<(ResTy (opnode
5500                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5501                (ResTy FPRC:$Rn))),
5502              (ResTy (INST (ResTy FPRC:$Rn),
5503                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5504                OpNImm:$Imm))>;
5505 }
5506
5507 // Patterns for Scalar Floating Point  multiply (scalar, by element)
5508 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5509   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5510 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5511   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5512
5513 // Patterns for Scalar Floating Point  multiply extended (scalar, by element)
5514 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5515   FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5516   v2f32, v4f32, neon_uimm1_bare>;
5517 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5518   FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5519   v1f64, v2f64, neon_uimm0_bare>;
5520
5521 // Scalar Floating Point fused multiply-add (scalar, by element)
5522 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5523   0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5524   let Inst{11} = Imm{1}; // h
5525   let Inst{21} = Imm{0}; // l
5526   let Inst{20-16} = MRm;
5527 }
5528 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5529   0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5530   let Inst{11} = Imm{0}; // h
5531   let Inst{21} = 0b0;    // l
5532   let Inst{20-16} = MRm;
5533 }
5534
5535 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5536 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5537   0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5538   let Inst{11} = Imm{1}; // h
5539   let Inst{21} = Imm{0}; // l
5540   let Inst{20-16} = MRm;
5541 }
5542 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5543   0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5544   let Inst{11} = Imm{0}; // h
5545   let Inst{21} = 0b0;    // l
5546   let Inst{20-16} = MRm;
5547 }
5548 // We are allowed to match the fma instruction regardless of compile options.
5549 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5550   Instruction FMLAI, Instruction FMLSI,
5551   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5552   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5553   // fmla
5554   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5555                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5556                (ResTy FPRC:$Ra))),
5557              (ResTy (FMLAI (ResTy FPRC:$Ra),
5558                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5559
5560   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5561                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5562                (ResTy FPRC:$Ra))),
5563              (ResTy (FMLAI (ResTy FPRC:$Ra),
5564                (ResTy FPRC:$Rn),
5565                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5566                OpNImm:$Imm))>;
5567
5568   // swapped fmla operands
5569   def  : Pat<(ResTy (fma
5570                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5571                (ResTy FPRC:$Rn),
5572                (ResTy FPRC:$Ra))),
5573              (ResTy (FMLAI (ResTy FPRC:$Ra),
5574                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5575
5576   def  : Pat<(ResTy (fma
5577                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5578                (ResTy FPRC:$Rn),
5579                (ResTy FPRC:$Ra))),
5580              (ResTy (FMLAI (ResTy FPRC:$Ra),
5581                (ResTy FPRC:$Rn),
5582                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5583                OpNImm:$Imm))>;
5584
5585   // fmls
5586   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5587                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5588                (ResTy FPRC:$Ra))),
5589              (ResTy (FMLSI (ResTy FPRC:$Ra),
5590                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5591
5592   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5593                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5594                (ResTy FPRC:$Ra))),
5595              (ResTy (FMLSI (ResTy FPRC:$Ra),
5596                (ResTy FPRC:$Rn),
5597                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5598                OpNImm:$Imm))>;
5599
5600   // swapped fmls operands
5601   def  : Pat<(ResTy (fma
5602                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5603                (ResTy FPRC:$Rn),
5604                (ResTy FPRC:$Ra))),
5605              (ResTy (FMLSI (ResTy FPRC:$Ra),
5606                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5607
5608   def  : Pat<(ResTy (fma
5609                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5610                (ResTy FPRC:$Rn),
5611                (ResTy FPRC:$Ra))),
5612              (ResTy (FMLSI (ResTy FPRC:$Ra),
5613                (ResTy FPRC:$Rn),
5614                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5615                OpNImm:$Imm))>;
5616 }
5617
5618 // Scalar Floating Point fused multiply-add and
5619 // multiply-subtract (scalar, by element)
5620 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5621   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5622 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5623   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5624 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5625   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5626
5627 // Scalar Signed saturating doubling multiply long (scalar, by element)
5628 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5629   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5630   let Inst{11} = 0b0; // h
5631   let Inst{21} = Imm{1}; // l
5632   let Inst{20} = Imm{0}; // m
5633   let Inst{19-16} = MRm{3-0};
5634 }
5635 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5636   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5637   let Inst{11} = Imm{2}; // h
5638   let Inst{21} = Imm{1}; // l
5639   let Inst{20} = Imm{0}; // m
5640   let Inst{19-16} = MRm{3-0};
5641 }
5642 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5643   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5644   let Inst{11} = 0b0;    // h
5645   let Inst{21} = Imm{0}; // l
5646   let Inst{20-16} = MRm;
5647 }
5648 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5649   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5650   let Inst{11} = Imm{1};    // h
5651   let Inst{21} = Imm{0};    // l
5652   let Inst{20-16} = MRm;
5653 }
5654
5655 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5656   SDPatternOperator opnode,
5657   Instruction INST,
5658   ValueType ResTy, RegisterClass FPRC,
5659   ValueType OpVTy, ValueType OpTy,
5660   ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5661
5662   def  : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5663                (OpVTy (scalar_to_vector
5664                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5665              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5666
5667   //swapped operands
5668   def  : Pat<(ResTy (opnode
5669                (OpVTy (scalar_to_vector
5670                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5671                  (OpVTy FPRC:$Rn))),
5672              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5673 }
5674
5675
5676 // Patterns for Scalar Signed saturating doubling
5677 // multiply long (scalar, by element)
5678 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5679   SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5680   i32, VPR64Lo, neon_uimm2_bare>;
5681 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5682   SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5683   i32, VPR128Lo, neon_uimm3_bare>;
5684 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5685   SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5686   i32, VPR64Lo, neon_uimm1_bare>;
5687 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5688   SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5689   i32, VPR128Lo, neon_uimm2_bare>;
5690
5691 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5692 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5693   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5694   let Inst{11} = 0b0; // h
5695   let Inst{21} = Imm{1}; // l
5696   let Inst{20} = Imm{0}; // m
5697   let Inst{19-16} = MRm{3-0};
5698 }
5699 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5700   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5701   let Inst{11} = Imm{2}; // h
5702   let Inst{21} = Imm{1}; // l
5703   let Inst{20} = Imm{0}; // m
5704   let Inst{19-16} = MRm{3-0};
5705 }
5706 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5707   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5708   let Inst{11} = 0b0;    // h
5709   let Inst{21} = Imm{0}; // l
5710   let Inst{20-16} = MRm;
5711 }
5712 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5713   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5714   let Inst{11} = Imm{1};    // h
5715   let Inst{21} = Imm{0};    // l
5716   let Inst{20-16} = MRm;
5717 }
5718
5719 // Scalar Signed saturating doubling
5720 // multiply-subtract long (scalar, by element)
5721 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5722   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5723   let Inst{11} = 0b0; // h
5724   let Inst{21} = Imm{1}; // l
5725   let Inst{20} = Imm{0}; // m
5726   let Inst{19-16} = MRm{3-0};
5727 }
5728 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5729   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5730   let Inst{11} = Imm{2}; // h
5731   let Inst{21} = Imm{1}; // l
5732   let Inst{20} = Imm{0}; // m
5733   let Inst{19-16} = MRm{3-0};
5734 }
5735 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5736   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5737   let Inst{11} = 0b0;    // h
5738   let Inst{21} = Imm{0}; // l
5739   let Inst{20-16} = MRm;
5740 }
5741 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5742   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5743   let Inst{11} = Imm{1};    // h
5744   let Inst{21} = Imm{0};    // l
5745   let Inst{20-16} = MRm;
5746 }
5747
5748 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5749   SDPatternOperator opnode,
5750   SDPatternOperator coreopnode,
5751   Instruction INST,
5752   ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5753   ValueType OpTy,
5754   ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5755
5756   def  : Pat<(ResTy (opnode
5757                (ResTy ResFPRC:$Ra),
5758                (ResTy (coreopnode (OpTy FPRC:$Rn),
5759                  (OpTy (scalar_to_vector
5760                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5761              (ResTy (INST (ResTy ResFPRC:$Ra),
5762                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5763
5764   // swapped operands
5765   def  : Pat<(ResTy (opnode
5766                (ResTy ResFPRC:$Ra),
5767                (ResTy (coreopnode
5768                  (OpTy (scalar_to_vector
5769                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5770                  (OpTy FPRC:$Rn))))),
5771              (ResTy (INST (ResTy ResFPRC:$Ra),
5772                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5773 }
5774
5775 // Patterns for Scalar Signed saturating
5776 // doubling multiply-add long (scalar, by element)
5777 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5778   int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5779   i32, VPR64Lo, neon_uimm2_bare>;
5780 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5781   int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5782   i32, VPR128Lo, neon_uimm3_bare>;
5783 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5784   int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5785   i32, VPR64Lo, neon_uimm1_bare>;
5786 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5787   int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5788   i32, VPR128Lo, neon_uimm2_bare>;
5789
5790 // Patterns for Scalar Signed saturating
5791 // doubling multiply-sub long (scalar, by element)
5792 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5793   int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5794   i32, VPR64Lo, neon_uimm2_bare>;
5795 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5796   int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5797   i32, VPR128Lo, neon_uimm3_bare>;
5798 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5799   int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5800   i32, VPR64Lo, neon_uimm1_bare>;
5801 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5802   int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5803   i32, VPR128Lo, neon_uimm2_bare>;
5804
5805 // Scalar Signed saturating doubling multiply returning
5806 // high half (scalar, by element)
5807 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5808   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5809   let Inst{11} = 0b0; // h
5810   let Inst{21} = Imm{1}; // l
5811   let Inst{20} = Imm{0}; // m
5812   let Inst{19-16} = MRm{3-0};
5813 }
5814 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5815   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5816   let Inst{11} = Imm{2}; // h
5817   let Inst{21} = Imm{1}; // l
5818   let Inst{20} = Imm{0}; // m
5819   let Inst{19-16} = MRm{3-0};
5820 }
5821 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5822   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5823   let Inst{11} = 0b0;    // h
5824   let Inst{21} = Imm{0}; // l
5825   let Inst{20-16} = MRm;
5826 }
5827 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5828   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5829   let Inst{11} = Imm{1};    // h
5830   let Inst{21} = Imm{0};    // l
5831   let Inst{20-16} = MRm;
5832 }
5833
5834 // Patterns for Scalar Signed saturating doubling multiply returning
5835 // high half (scalar, by element)
5836 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5837   SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
5838   i32, VPR64Lo, neon_uimm2_bare>;
5839 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5840   SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
5841   i32, VPR128Lo, neon_uimm3_bare>;
5842 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5843   SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
5844   i32, VPR64Lo, neon_uimm1_bare>;
5845 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5846   SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
5847   i32, VPR128Lo, neon_uimm2_bare>;
5848
5849 // Scalar Signed saturating rounding doubling multiply
5850 // returning high half (scalar, by element)
5851 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5852   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5853   let Inst{11} = 0b0; // h
5854   let Inst{21} = Imm{1}; // l
5855   let Inst{20} = Imm{0}; // m
5856   let Inst{19-16} = MRm{3-0};
5857 }
5858 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5859   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5860   let Inst{11} = Imm{2}; // h
5861   let Inst{21} = Imm{1}; // l
5862   let Inst{20} = Imm{0}; // m
5863   let Inst{19-16} = MRm{3-0};
5864 }
5865 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5866   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5867   let Inst{11} = 0b0;    // h
5868   let Inst{21} = Imm{0}; // l
5869   let Inst{20-16} = MRm;
5870 }
5871 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5872   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5873   let Inst{11} = Imm{1};    // h
5874   let Inst{21} = Imm{0};    // l
5875   let Inst{20-16} = MRm;
5876 }
5877
5878 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5879   SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
5880   VPR64Lo, neon_uimm2_bare>;
5881 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5882   SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
5883   VPR128Lo, neon_uimm3_bare>;
5884 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5885   SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
5886   VPR64Lo, neon_uimm1_bare>;
5887 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5888   SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
5889   VPR128Lo, neon_uimm2_bare>;
5890
5891 // Scalar general arithmetic operation
5892 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
5893                                         Instruction INST> 
5894     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5895
5896 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
5897                                         Instruction INST> 
5898     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5899           (INST FPR64:$Rn, FPR64:$Rm)>;
5900
5901 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
5902                                         Instruction INST> 
5903     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
5904               (v1f64 FPR64:$Ra))),
5905           (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5906
5907 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
5908 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
5909 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
5910 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
5911 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
5912 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
5913 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
5914 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
5915 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
5916
5917 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
5918 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
5919
5920 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
5921 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
5922
5923 // Scalar Copy - DUP element to scalar
5924 class NeonI_Scalar_DUP<string asmop, string asmlane,
5925                        RegisterClass ResRC, RegisterOperand VPRC,
5926                        Operand OpImm>
5927   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
5928                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
5929                      [],
5930                      NoItinerary> {
5931   bits<4> Imm;
5932 }
5933
5934 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
5935   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5936 }
5937 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
5938   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5939 }
5940 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
5941   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5942 }
5943 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
5944   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5945 }
5946
5947 multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
5948   ValueType OpTy, Operand OpImm,
5949   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5950   def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5951             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5952
5953   def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5954             (ResTy (DUPI
5955               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5956                 OpNImm:$Imm))>;
5957 }
5958
5959 // Patterns for vector extract of FP data using scalar DUP instructions
5960 defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
5961   v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5962 defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
5963   v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5964
5965 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
5966   ValueType ResTy, ValueType OpTy,Operand OpLImm,
5967   ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
5968
5969   def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
5970             (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
5971
5972   def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
5973             (ResTy (DUPI
5974               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5975                 OpNImm:$Imm))>;
5976 }
5977
5978 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
5979 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
5980                                         v8i8, v16i8, neon_uimm3_bare>;
5981 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
5982                                         v4i16, v8i16, neon_uimm2_bare>;
5983 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
5984                                         v2i32, v4i32, neon_uimm1_bare>;
5985
5986 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
5987                                           ValueType OpTy, ValueType ElemTy,
5988                                           Operand OpImm, ValueType OpNTy,
5989                                           ValueType ExTy, Operand OpNImm> {
5990
5991   def : Pat<(ResTy (vector_insert (ResTy undef),
5992               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5993               (neon_uimm0_bare:$Imm))),
5994             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5995
5996   def : Pat<(ResTy (vector_insert (ResTy undef),
5997               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5998               (OpNImm:$Imm))),
5999             (ResTy (DUPI
6000               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6001               OpNImm:$Imm))>;
6002 }
6003
6004 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6005                                           ValueType OpTy, ValueType ElemTy,
6006                                           Operand OpImm, ValueType OpNTy,
6007                                           ValueType ExTy, Operand OpNImm> {
6008
6009   def : Pat<(ResTy (scalar_to_vector
6010               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6011             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6012
6013   def : Pat<(ResTy (scalar_to_vector
6014               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6015             (ResTy (DUPI
6016               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6017               OpNImm:$Imm))>;
6018 }
6019
6020 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6021 // instructions.
6022 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6023   v1i64, v2i64, i64, neon_uimm1_bare,
6024   v1i64, v2i64, neon_uimm0_bare>;
6025 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6026   v1i32, v4i32, i32, neon_uimm2_bare,
6027   v2i32, v4i32, neon_uimm1_bare>;
6028 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6029   v1i16, v8i16, i32, neon_uimm3_bare,
6030   v4i16, v8i16, neon_uimm2_bare>;
6031 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6032   v1i8, v16i8, i32, neon_uimm4_bare,
6033   v8i8, v16i8, neon_uimm3_bare>;
6034 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6035   v1i64, v2i64, i64, neon_uimm1_bare,
6036   v1i64, v2i64, neon_uimm0_bare>;
6037 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6038   v1i32, v4i32, i32, neon_uimm2_bare,
6039   v2i32, v4i32, neon_uimm1_bare>;
6040 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6041   v1i16, v8i16, i32, neon_uimm3_bare,
6042   v4i16, v8i16, neon_uimm2_bare>;
6043 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6044   v1i8, v16i8, i32, neon_uimm4_bare,
6045   v8i8, v16i8, neon_uimm3_bare>;
6046
6047 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6048                                   Instruction DUPI, Operand OpImm,
6049                                   RegisterClass ResRC> {
6050   def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6051           (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6052 }
6053
6054 // Aliases for Scalar copy - DUP element (scalar)
6055 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6056 // custom printing of aliases.
6057 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6058 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6059 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6060 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6061
6062 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6063                       ValueType OpTy> {
6064   def : Pat<(ResTy (GetLow VPR128:$Rn)),
6065             (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6066   def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6067             (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6068 }
6069
6070 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6071 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6072 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6073 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6074 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6075 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6076
6077 //===----------------------------------------------------------------------===//
6078 // Non-Instruction Patterns
6079 //===----------------------------------------------------------------------===//
6080
6081 // 64-bit vector bitcasts...
6082
6083 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
6084 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
6085 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
6086 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
6087
6088 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
6089 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
6090 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
6091 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
6092
6093 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
6094 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
6095 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
6096 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
6097
6098 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
6099 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
6100 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
6101 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
6102
6103 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
6104 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6105 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6106 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6107
6108 // ..and 128-bit vector bitcasts...
6109
6110 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
6111 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
6112 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
6113 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
6114 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
6115
6116 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
6117 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
6118 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
6119 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
6120 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
6121
6122 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
6123 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
6124 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
6125 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
6126 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
6127
6128 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
6129 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
6130 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
6131 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
6132 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
6133
6134 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
6135 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
6136 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
6137 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
6138 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
6139
6140 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
6141 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
6142 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
6143 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
6144 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
6145
6146 // ...and scalar bitcasts...
6147 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
6148 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
6149 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
6150 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
6151
6152 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
6153 def : Pat<(i64 (bitconvert (v1f64  FPR64:$src))), (FMOVxd $src)>;
6154 def : Pat<(i64 (bitconvert (v2i32  FPR64:$src))), (FMOVxd $src)>;
6155 def : Pat<(i64 (bitconvert (v2f32  FPR64:$src))), (FMOVxd $src)>;
6156 def : Pat<(i64 (bitconvert (v4i16  FPR64:$src))), (FMOVxd $src)>;
6157 def : Pat<(i64 (bitconvert (v8i8  FPR64:$src))), (FMOVxd $src)>;
6158
6159 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
6160
6161 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6162 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6163 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6164
6165 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
6166 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
6167 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
6168 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
6169 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
6170
6171 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
6172 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
6173 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
6174 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
6175 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
6176 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
6177
6178 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
6179 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
6180 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
6181 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
6182
6183 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6184 def : Pat<(v1f64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6185 def : Pat<(v2i32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6186 def : Pat<(v2f32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6187 def : Pat<(v4i16 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6188 def : Pat<(v8i8 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6189
6190 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
6191
6192 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
6193 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
6194 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
6195 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
6196 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
6197
6198 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
6199 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
6200 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
6201 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
6202 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
6203 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
6204
6205 // Scalar Three Same
6206
6207 def neon_uimm3 : Operand<i64>,
6208                    ImmLeaf<i64, [{return Imm < 8;}]> {
6209   let ParserMatchClass = uimm3_asmoperand;
6210   let PrintMethod = "printUImmHexOperand";
6211 }
6212
6213 def neon_uimm4 : Operand<i64>,
6214                    ImmLeaf<i64, [{return Imm < 16;}]> {
6215   let ParserMatchClass = uimm4_asmoperand;
6216   let PrintMethod = "printUImmHexOperand";
6217 }
6218
6219 // Bitwise Extract
6220 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6221                     string OpS, RegisterOperand OpVPR, Operand OpImm>
6222   : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6223                      (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6224                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6225                      ", $Rm." # OpS # ", $Index",
6226                      [],
6227                      NoItinerary>{
6228   bits<4> Index;
6229 }
6230
6231 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6232                                VPR64, neon_uimm3> {
6233   let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6234 }
6235
6236 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6237                                VPR128, neon_uimm4> {
6238   let Inst{14-11} = Index;
6239 }
6240
6241 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6242                  Operand OpImm>
6243   : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6244                                  (i64 OpImm:$Imm))),
6245               (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6246
6247 def : NI_Extract<v8i8,  VPR64,  EXTvvvi_8b,  neon_uimm3>;
6248 def : NI_Extract<v4i16, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6249 def : NI_Extract<v2i32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6250 def : NI_Extract<v1i64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6251 def : NI_Extract<v2f32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6252 def : NI_Extract<v1f64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6253 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6254 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6255 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6256 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6257 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6258 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6259
6260 // Table lookup
6261 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6262              string asmop, string OpS, RegisterOperand OpVPR,
6263              RegisterOperand VecList>
6264   : NeonI_TBL<q, op2, len, op,
6265               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6266               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6267               [],
6268               NoItinerary>;
6269
6270 // The vectors in look up table are always 16b
6271 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6272   def _8b  : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6273                     !cast<RegisterOperand>(List # "16B_operand")>;
6274
6275   def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6276                     !cast<RegisterOperand>(List # "16B_operand")>;
6277 }
6278
6279 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6280 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6281 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6282 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6283
6284 // Table lookup extention
6285 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6286              string asmop, string OpS, RegisterOperand OpVPR,
6287              RegisterOperand VecList>
6288   : NeonI_TBL<q, op2, len, op,
6289               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6290               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6291               [],
6292               NoItinerary> {
6293   let Constraints = "$src = $Rd";
6294 }
6295
6296 // The vectors in look up table are always 16b
6297 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6298   def _8b  : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6299                     !cast<RegisterOperand>(List # "16B_operand")>;
6300
6301   def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6302                     !cast<RegisterOperand>(List # "16B_operand")>;
6303 }
6304
6305 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6306 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6307 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6308 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6309
6310 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6311                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6312   : NeonI_copy<0b1, 0b0, 0b0011,
6313                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6314                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6315                [(set (ResTy VPR128:$Rd),
6316                  (ResTy (vector_insert
6317                    (ResTy VPR128:$src),
6318                    (OpTy OpGPR:$Rn),
6319                    (OpImm:$Imm))))],
6320                NoItinerary> {
6321   bits<4> Imm;
6322   let Constraints = "$src = $Rd";
6323 }
6324
6325 //Insert element (vector, from main)
6326 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6327                            neon_uimm4_bare> {
6328   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6329 }
6330 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6331                            neon_uimm3_bare> {
6332   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6333 }
6334 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6335                            neon_uimm2_bare> {
6336   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6337 }
6338 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6339                            neon_uimm1_bare> {
6340   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6341 }
6342
6343 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6344                     (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6345 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6346                     (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6347 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6348                     (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6349 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6350                     (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6351
6352 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6353                              RegisterClass OpGPR, ValueType OpTy,
6354                              Operand OpImm, Instruction INS>
6355   : Pat<(ResTy (vector_insert
6356               (ResTy VPR64:$src),
6357               (OpTy OpGPR:$Rn),
6358               (OpImm:$Imm))),
6359         (ResTy (EXTRACT_SUBREG
6360           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6361             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6362
6363 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6364                                           neon_uimm3_bare, INSbw>;
6365 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6366                                           neon_uimm2_bare, INShw>;
6367 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6368                                           neon_uimm1_bare, INSsw>;
6369 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6370                                           neon_uimm0_bare, INSdx>;
6371
6372 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6373   : NeonI_insert<0b1, 0b1,
6374                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6375                  ResImm:$Immd, ResImm:$Immn),
6376                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6377                  [],
6378                  NoItinerary> {
6379   let Constraints = "$src = $Rd";
6380   bits<4> Immd;
6381   bits<4> Immn;
6382 }
6383
6384 //Insert element (vector, from element)
6385 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6386   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6387   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6388 }
6389 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6390   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6391   let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6392   // bit 11 is unspecified, but should be set to zero.
6393 }
6394 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6395   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6396   let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6397   // bits 11-12 are unspecified, but should be set to zero.
6398 }
6399 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6400   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6401   let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6402   // bits 11-13 are unspecified, but should be set to zero.
6403 }
6404
6405 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6406                     (INSELb VPR128:$Rd, VPR128:$Rn,
6407                       neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6408 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6409                     (INSELh VPR128:$Rd, VPR128:$Rn,
6410                       neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6411 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6412                     (INSELs VPR128:$Rd, VPR128:$Rn,
6413                       neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6414 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6415                     (INSELd VPR128:$Rd, VPR128:$Rn,
6416                       neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6417
6418 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6419                                 ValueType MidTy, Operand StImm, Operand NaImm,
6420                                 Instruction INS> {
6421 def : Pat<(ResTy (vector_insert
6422             (ResTy VPR128:$src),
6423             (MidTy (vector_extract
6424               (ResTy VPR128:$Rn),
6425               (StImm:$Immn))),
6426             (StImm:$Immd))),
6427           (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6428               StImm:$Immd, StImm:$Immn)>;
6429
6430 def : Pat <(ResTy (vector_insert
6431              (ResTy VPR128:$src),
6432              (MidTy (vector_extract
6433                (NaTy VPR64:$Rn),
6434                (NaImm:$Immn))),
6435              (StImm:$Immd))),
6436            (INS (ResTy VPR128:$src),
6437              (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6438              StImm:$Immd, NaImm:$Immn)>;
6439
6440 def : Pat <(NaTy (vector_insert
6441              (NaTy VPR64:$src),
6442              (MidTy (vector_extract
6443                (ResTy VPR128:$Rn),
6444                (StImm:$Immn))),
6445              (NaImm:$Immd))),
6446            (NaTy (EXTRACT_SUBREG
6447              (ResTy (INS
6448                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6449                (ResTy VPR128:$Rn),
6450                NaImm:$Immd, StImm:$Immn)),
6451              sub_64))>;
6452
6453 def : Pat <(NaTy (vector_insert
6454              (NaTy VPR64:$src),
6455              (MidTy (vector_extract
6456                (NaTy VPR64:$Rn),
6457                (NaImm:$Immn))),
6458              (NaImm:$Immd))),
6459            (NaTy (EXTRACT_SUBREG
6460              (ResTy (INS
6461                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6462                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6463                NaImm:$Immd, NaImm:$Immn)),
6464              sub_64))>;
6465 }
6466
6467 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6468                             neon_uimm1_bare, INSELs>;
6469 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6470                             neon_uimm0_bare, INSELd>;
6471 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6472                             neon_uimm3_bare, INSELb>;
6473 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6474                             neon_uimm2_bare, INSELh>;
6475 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6476                             neon_uimm1_bare, INSELs>;
6477 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6478                             neon_uimm0_bare, INSELd>;
6479
6480 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6481                                       ValueType MidTy,
6482                                       RegisterClass OpFPR, Operand ResImm,
6483                                       SubRegIndex SubIndex, Instruction INS> {
6484 def : Pat <(ResTy (vector_insert
6485              (ResTy VPR128:$src),
6486              (MidTy OpFPR:$Rn),
6487              (ResImm:$Imm))),
6488            (INS (ResTy VPR128:$src),
6489              (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6490              ResImm:$Imm,
6491              (i64 0))>;
6492
6493 def : Pat <(NaTy (vector_insert
6494              (NaTy VPR64:$src),
6495              (MidTy OpFPR:$Rn),
6496              (ResImm:$Imm))),
6497            (NaTy (EXTRACT_SUBREG
6498              (ResTy (INS
6499                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6500                (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6501                ResImm:$Imm,
6502                (i64 0))),
6503              sub_64))>;
6504 }
6505
6506 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6507                                   sub_32, INSELs>;
6508 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6509                                   sub_64, INSELd>;
6510
6511 class NeonI_SMOV<string asmop, string Res, bit Q,
6512                  ValueType OpTy, ValueType eleTy,
6513                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6514   : NeonI_copy<Q, 0b0, 0b0101,
6515                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6516                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6517                [(set (ResTy ResGPR:$Rd),
6518                  (ResTy (sext_inreg
6519                    (ResTy (vector_extract
6520                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
6521                    eleTy)))],
6522                NoItinerary> {
6523   bits<4> Imm;
6524 }
6525
6526 //Signed integer move (main, from element)
6527 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6528                         GPR32, i32> {
6529   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6530 }
6531 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6532                         GPR32, i32> {
6533   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6534 }
6535 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6536                         GPR64, i64> {
6537   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6538 }
6539 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6540                         GPR64, i64> {
6541   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6542 }
6543 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6544                         GPR64, i64> {
6545   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6546 }
6547
6548 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6549                                ValueType eleTy, Operand StImm,  Operand NaImm,
6550                                Instruction SMOVI> {
6551   def : Pat<(i64 (sext_inreg
6552               (i64 (anyext
6553                 (i32 (vector_extract
6554                   (StTy VPR128:$Rn), (StImm:$Imm))))),
6555               eleTy)),
6556             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6557
6558   def : Pat<(i64 (sext
6559               (i32 (vector_extract
6560                 (StTy VPR128:$Rn), (StImm:$Imm))))),
6561             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6562
6563   def : Pat<(i64 (sext_inreg
6564               (i64 (vector_extract
6565                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6566               eleTy)),
6567             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6568               NaImm:$Imm)>;
6569
6570   def : Pat<(i64 (sext_inreg
6571               (i64 (anyext
6572                 (i32 (vector_extract
6573                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6574               eleTy)),
6575             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6576               NaImm:$Imm)>;
6577
6578   def : Pat<(i64 (sext
6579               (i32 (vector_extract
6580                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6581             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6582               NaImm:$Imm)>;
6583 }
6584
6585 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6586                           neon_uimm3_bare, SMOVxb>;
6587 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6588                           neon_uimm2_bare, SMOVxh>;
6589 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6590                           neon_uimm1_bare, SMOVxs>;
6591
6592 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6593                           ValueType eleTy, Operand StImm,  Operand NaImm,
6594                           Instruction SMOVI>
6595   : Pat<(i32 (sext_inreg
6596           (i32 (vector_extract
6597             (NaTy VPR64:$Rn), (NaImm:$Imm))),
6598           eleTy)),
6599         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6600           NaImm:$Imm)>;
6601
6602 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6603                          neon_uimm3_bare, SMOVwb>;
6604 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6605                          neon_uimm2_bare, SMOVwh>;
6606
6607 class NeonI_UMOV<string asmop, string Res, bit Q,
6608                  ValueType OpTy, Operand OpImm,
6609                  RegisterClass ResGPR, ValueType ResTy>
6610   : NeonI_copy<Q, 0b0, 0b0111,
6611                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6612                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6613                [(set (ResTy ResGPR:$Rd),
6614                   (ResTy (vector_extract
6615                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6616                NoItinerary> {
6617   bits<4> Imm;
6618 }
6619
6620 //Unsigned integer move (main, from element)
6621 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6622                          GPR32, i32> {
6623   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6624 }
6625 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6626                          GPR32, i32> {
6627   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6628 }
6629 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6630                          GPR32, i32> {
6631   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6632 }
6633 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6634                          GPR64, i64> {
6635   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6636 }
6637
6638 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6639                     (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6640 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6641                     (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6642
6643 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6644                          Operand StImm,  Operand NaImm,
6645                          Instruction SMOVI>
6646   : Pat<(ResTy (vector_extract
6647           (NaTy VPR64:$Rn), NaImm:$Imm)),
6648         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6649           NaImm:$Imm)>;
6650
6651 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6652                         neon_uimm3_bare, UMOVwb>;
6653 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6654                         neon_uimm2_bare, UMOVwh>;
6655 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6656                         neon_uimm1_bare, UMOVws>;
6657
6658 def : Pat<(i32 (and
6659             (i32 (vector_extract
6660               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6661             255)),
6662           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6663
6664 def : Pat<(i32 (and
6665             (i32 (vector_extract
6666               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6667             65535)),
6668           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6669
6670 def : Pat<(i64 (zext
6671             (i32 (vector_extract
6672               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6673           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6674
6675 def : Pat<(i32 (and
6676             (i32 (vector_extract
6677               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6678             255)),
6679           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6680             neon_uimm3_bare:$Imm)>;
6681
6682 def : Pat<(i32 (and
6683             (i32 (vector_extract
6684               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6685             65535)),
6686           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6687             neon_uimm2_bare:$Imm)>;
6688
6689 def : Pat<(i64 (zext
6690             (i32 (vector_extract
6691               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6692           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6693             neon_uimm0_bare:$Imm)>;
6694
6695 // Additional copy patterns for scalar types
6696 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6697           (UMOVwb (v16i8
6698             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6699
6700 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6701           (UMOVwh (v8i16
6702             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6703
6704 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6705           (FMOVws FPR32:$Rn)>;
6706
6707 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6708           (FMOVxd FPR64:$Rn)>;
6709
6710 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6711           (f64 FPR64:$Rn)>;
6712
6713 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6714           (v1i8 (EXTRACT_SUBREG (v16i8
6715             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6716             sub_8))>;
6717
6718 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6719           (v1i16 (EXTRACT_SUBREG (v8i16
6720             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6721             sub_16))>;
6722
6723 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6724           (FMOVsw $src)>;
6725
6726 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6727           (FMOVdx $src)>;
6728
6729 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6730           (v8i8 (EXTRACT_SUBREG (v16i8
6731             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6732             sub_64))>;
6733
6734 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6735           (v4i16 (EXTRACT_SUBREG (v8i16
6736             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6737             sub_64))>;
6738
6739 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6740           (v2i32 (EXTRACT_SUBREG (v16i8
6741             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6742             sub_64))>;
6743
6744 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6745           (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6746
6747 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6748           (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6749
6750 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
6751           (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6752
6753 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
6754           (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6755
6756 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6757           (v2i32 (EXTRACT_SUBREG (v16i8
6758             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6759             sub_64))>;
6760
6761 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6762           (v2i32 (EXTRACT_SUBREG (v16i8
6763             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6764             sub_64))>;
6765
6766 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6767           (v1f64 FPR64:$Rn)>;
6768
6769 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6770           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6771                          (f64 FPR64:$src), sub_64)>;
6772
6773 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
6774                     RegisterOperand ResVPR, Operand OpImm>
6775   : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6776                (ins VPR128:$Rn, OpImm:$Imm),
6777                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6778                [],
6779                NoItinerary> {
6780   bits<4> Imm;
6781 }
6782
6783 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6784                               neon_uimm4_bare> {
6785   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6786 }
6787
6788 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6789                               neon_uimm3_bare> {
6790   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6791 }
6792
6793 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6794                               neon_uimm2_bare> {
6795   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6796 }
6797
6798 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6799                               neon_uimm1_bare> {
6800   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6801 }
6802
6803 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6804                               neon_uimm4_bare> {
6805   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6806 }
6807
6808 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6809                               neon_uimm3_bare> {
6810   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6811 }
6812
6813 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6814                               neon_uimm2_bare> {
6815   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6816 }
6817
6818 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6819                                        ValueType OpTy,ValueType NaTy,
6820                                        ValueType ExTy, Operand OpLImm,
6821                                        Operand OpNImm> {
6822 def  : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6823         (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6824
6825 def : Pat<(ResTy (Neon_vduplane
6826             (NaTy VPR64:$Rn), OpNImm:$Imm)),
6827           (ResTy (DUPELT
6828             (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6829 }
6830 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6831                              neon_uimm4_bare, neon_uimm3_bare>;
6832 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6833                              neon_uimm4_bare, neon_uimm3_bare>;
6834 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6835                              neon_uimm3_bare, neon_uimm2_bare>;
6836 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6837                              neon_uimm3_bare, neon_uimm2_bare>;
6838 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6839                              neon_uimm2_bare, neon_uimm1_bare>;
6840 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6841                              neon_uimm2_bare, neon_uimm1_bare>;
6842 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6843                              neon_uimm1_bare, neon_uimm0_bare>;
6844 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6845                              neon_uimm2_bare, neon_uimm1_bare>;
6846 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6847                              neon_uimm2_bare, neon_uimm1_bare>;
6848 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6849                              neon_uimm1_bare, neon_uimm0_bare>;
6850
6851 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6852           (v2f32 (DUPELT2s
6853             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6854             (i64 0)))>;
6855 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6856           (v4f32 (DUPELT4s
6857             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6858             (i64 0)))>;
6859 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6860           (v2f64 (DUPELT2d
6861             (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6862             (i64 0)))>;
6863
6864 class NeonI_DUP<bit Q, string asmop, string rdlane,
6865                 RegisterOperand ResVPR, ValueType ResTy,
6866                 RegisterClass OpGPR, ValueType OpTy>
6867   : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6868                asmop # "\t$Rd" # rdlane # ", $Rn",
6869                [(set (ResTy ResVPR:$Rd),
6870                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6871                NoItinerary>;
6872
6873 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6874   let Inst{20-16} = 0b00001;
6875   // bits 17-20 are unspecified, but should be set to zero.
6876 }
6877
6878 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6879   let Inst{20-16} = 0b00010;
6880   // bits 18-20 are unspecified, but should be set to zero.
6881 }
6882
6883 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6884   let Inst{20-16} = 0b00100;
6885   // bits 19-20 are unspecified, but should be set to zero.
6886 }
6887
6888 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6889   let Inst{20-16} = 0b01000;
6890   // bit 20 is unspecified, but should be set to zero.
6891 }
6892
6893 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6894   let Inst{20-16} = 0b00001;
6895   // bits 17-20 are unspecified, but should be set to zero.
6896 }
6897
6898 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6899   let Inst{20-16} = 0b00010;
6900   // bits 18-20 are unspecified, but should be set to zero.
6901 }
6902
6903 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6904   let Inst{20-16} = 0b00100;
6905   // bits 19-20 are unspecified, but should be set to zero.
6906 }
6907
6908 // patterns for CONCAT_VECTORS
6909 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6910 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6911           (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6912 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6913           (INSELd
6914             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6915             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6916             (i64 1),
6917             (i64 0))>;
6918 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6919           (DUPELT2d
6920             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6921             (i64 0))> ;
6922 }
6923
6924 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6925 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6926 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6927 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6928 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6929 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6930
6931 //patterns for EXTRACT_SUBVECTOR
6932 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6933           (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6934 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6935           (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6936 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6937           (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6938 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6939           (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6940 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6941           (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6942 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6943           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6944
6945 // The followings are for instruction class (3V Elem)
6946
6947 // Variant 1
6948
6949 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
6950              string asmop, string ResS, string OpS, string EleOpS,
6951              Operand OpImm, RegisterOperand ResVPR,
6952              RegisterOperand OpVPR, RegisterOperand EleOpVPR>
6953   : NeonI_2VElem<q, u, size, opcode,
6954                  (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
6955                                          EleOpVPR:$Re, OpImm:$Index),
6956                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
6957                  ", $Re." # EleOpS # "[$Index]",
6958                  [],
6959                  NoItinerary> {
6960   bits<3> Index;
6961   bits<5> Re;
6962
6963   let Constraints = "$src = $Rd";
6964 }
6965
6966 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
6967   // vector register class for element is always 128-bit to cover the max index
6968   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
6969                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
6970     let Inst{11} = {Index{1}};
6971     let Inst{21} = {Index{0}};
6972     let Inst{20-16} = Re;
6973   }
6974
6975   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
6976                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
6977     let Inst{11} = {Index{1}};
6978     let Inst{21} = {Index{0}};
6979     let Inst{20-16} = Re;
6980   }
6981
6982   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
6983   def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
6984                      neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
6985     let Inst{11} = {Index{2}};
6986     let Inst{21} = {Index{1}};
6987     let Inst{20} = {Index{0}};
6988     let Inst{19-16} = Re{3-0};
6989   }
6990
6991   def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
6992                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
6993     let Inst{11} = {Index{2}};
6994     let Inst{21} = {Index{1}};
6995     let Inst{20} = {Index{0}};
6996     let Inst{19-16} = Re{3-0};
6997   }
6998 }
6999
7000 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7001 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7002
7003 // Pattern for lane in 128-bit vector
7004 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7005                    RegisterOperand ResVPR, RegisterOperand OpVPR,
7006                    RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7007                    ValueType EleOpTy>
7008   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7009           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7010         (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7011
7012 // Pattern for lane in 64-bit vector
7013 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7014                   RegisterOperand ResVPR, RegisterOperand OpVPR,
7015                   RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7016                   ValueType EleOpTy>
7017   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7018           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7019         (INST ResVPR:$src, OpVPR:$Rn,
7020           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7021
7022 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7023 {
7024   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7025                      op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7026
7027   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7028                      op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7029
7030   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7031                      op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7032
7033   def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7034                      op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7035
7036   // Index can only be half of the max value for lane in 64-bit vector
7037
7038   def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7039                     op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7040
7041   def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7042                     op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7043 }
7044
7045 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7046 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7047
7048 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7049                  string asmop, string ResS, string OpS, string EleOpS,
7050                  Operand OpImm, RegisterOperand ResVPR,
7051                  RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7052   : NeonI_2VElem<q, u, size, opcode,
7053                  (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7054                                          EleOpVPR:$Re, OpImm:$Index),
7055                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7056                  ", $Re." # EleOpS # "[$Index]",
7057                  [],
7058                  NoItinerary> {
7059   bits<3> Index;
7060   bits<5> Re;
7061 }
7062
7063 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7064   // vector register class for element is always 128-bit to cover the max index
7065   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7066                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7067     let Inst{11} = {Index{1}};
7068     let Inst{21} = {Index{0}};
7069     let Inst{20-16} = Re;
7070   }
7071
7072   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7073                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7074     let Inst{11} = {Index{1}};
7075     let Inst{21} = {Index{0}};
7076     let Inst{20-16} = Re;
7077   }
7078
7079   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7080   def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7081                          neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7082     let Inst{11} = {Index{2}};
7083     let Inst{21} = {Index{1}};
7084     let Inst{20} = {Index{0}};
7085     let Inst{19-16} = Re{3-0};
7086   }
7087
7088   def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7089                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7090     let Inst{11} = {Index{2}};
7091     let Inst{21} = {Index{1}};
7092     let Inst{20} = {Index{0}};
7093     let Inst{19-16} = Re{3-0};
7094   }
7095 }
7096
7097 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7098 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7099 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7100
7101 // Pattern for lane in 128-bit vector
7102 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7103                        RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7104                        ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7105   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7106           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7107         (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7108
7109 // Pattern for lane in 64-bit vector
7110 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7111                       RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7112                       ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7113   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7114           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7115         (INST OpVPR:$Rn,
7116           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7117
7118 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7119   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7120                          op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7121
7122   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7123                          op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7124
7125   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7126                          op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7127
7128   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7129                          op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7130
7131   // Index can only be half of the max value for lane in 64-bit vector
7132
7133   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7134                         op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7135
7136   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7137                         op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7138 }
7139
7140 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7141 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7142 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7143
7144 // Variant 2
7145
7146 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7147   // vector register class for element is always 128-bit to cover the max index
7148   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7149                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7150     let Inst{11} = {Index{1}};
7151     let Inst{21} = {Index{0}};
7152     let Inst{20-16} = Re;
7153   }
7154
7155   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7156                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7157     let Inst{11} = {Index{1}};
7158     let Inst{21} = {Index{0}};
7159     let Inst{20-16} = Re;
7160   }
7161
7162   // _1d2d doesn't exist!
7163
7164   def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7165                          neon_uimm1_bare, VPR128, VPR128, VPR128> {
7166     let Inst{11} = {Index{0}};
7167     let Inst{21} = 0b0;
7168     let Inst{20-16} = Re;
7169   }
7170 }
7171
7172 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7173 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7174
7175 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7176                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7177                          ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7178                          SDPatternOperator coreop>
7179   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7180           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7181         (INST OpVPR:$Rn,
7182           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7183
7184 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7185   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7186                          op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7187
7188   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7189                          op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7190
7191   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7192                          op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7193
7194   // Index can only be half of the max value for lane in 64-bit vector
7195
7196   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7197                         op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7198
7199   def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7200                            op, VPR128, VPR64, v2f64, v2f64, v1f64,
7201                            BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7202 }
7203
7204 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7205 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7206
7207 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7208                        (v2f32 VPR64:$Rn))),
7209           (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7210
7211 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7212                        (v4f32 VPR128:$Rn))),
7213           (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7214
7215 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7216                        (v2f64 VPR128:$Rn))),
7217           (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7218
7219 // The followings are patterns using fma
7220 // -ffp-contract=fast generates fma
7221
7222 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7223   // vector register class for element is always 128-bit to cover the max index
7224   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7225                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7226     let Inst{11} = {Index{1}};
7227     let Inst{21} = {Index{0}};
7228     let Inst{20-16} = Re;
7229   }
7230
7231   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7232                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7233     let Inst{11} = {Index{1}};
7234     let Inst{21} = {Index{0}};
7235     let Inst{20-16} = Re;
7236   }
7237
7238   // _1d2d doesn't exist!
7239
7240   def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7241                      neon_uimm1_bare, VPR128, VPR128, VPR128> {
7242     let Inst{11} = {Index{0}};
7243     let Inst{21} = 0b0;
7244     let Inst{20-16} = Re;
7245   }
7246 }
7247
7248 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7249 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7250
7251 // Pattern for lane in 128-bit vector
7252 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7253                        RegisterOperand ResVPR, RegisterOperand OpVPR,
7254                        ValueType ResTy, ValueType OpTy,
7255                        SDPatternOperator coreop>
7256   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7257                    (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
7258         (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7259
7260 // Pattern for lane 0
7261 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7262                       RegisterOperand ResVPR, ValueType ResTy>
7263   : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7264                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7265                    (ResTy ResVPR:$src))),
7266         (INST ResVPR:$src, ResVPR:$Rn,
7267               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7268
7269 // Pattern for lane in 64-bit vector
7270 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7271                       RegisterOperand ResVPR, RegisterOperand OpVPR,
7272                       ValueType ResTy, ValueType OpTy,
7273                       SDPatternOperator coreop>
7274   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7275                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7276         (INST ResVPR:$src, ResVPR:$Rn,
7277           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7278
7279 // Pattern for lane in 64-bit vector
7280 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7281                            SDPatternOperator op,
7282                            RegisterOperand ResVPR, RegisterOperand OpVPR,
7283                            ValueType ResTy, ValueType OpTy,
7284                            SDPatternOperator coreop>
7285   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7286                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7287         (INST ResVPR:$src, ResVPR:$Rn,
7288           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7289
7290
7291 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7292   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7293                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7294                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7295
7296   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7297                         op, VPR64, v2f32>;
7298
7299   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7300                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7301                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7302
7303   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7304                         op, VPR128, v4f32>;
7305
7306   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7307                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7308                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7309
7310   // Index can only be half of the max value for lane in 64-bit vector
7311
7312   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7313                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7314                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7315
7316   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7317                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7318                              BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7319 }
7320
7321 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7322
7323 // Pattern for lane 0
7324 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7325                       RegisterOperand ResVPR, ValueType ResTy>
7326   : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7327                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7328                    (ResTy ResVPR:$src))),
7329         (INST ResVPR:$src, ResVPR:$Rn,
7330               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7331
7332 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7333 {
7334   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7335                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7336                          BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7337
7338   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7339                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7340                          BinOpFrag<(Neon_vduplane
7341                                      (fneg node:$LHS), node:$RHS)>>;
7342
7343   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7344                         op, VPR64, v2f32>;
7345
7346   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7347                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7348                          BinOpFrag<(fneg (Neon_vduplane
7349                                      node:$LHS, node:$RHS))>>;
7350
7351   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7352                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7353                          BinOpFrag<(Neon_vduplane
7354                                      (fneg node:$LHS), node:$RHS)>>;
7355
7356   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7357                         op, VPR128, v4f32>;
7358
7359   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7360                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7361                          BinOpFrag<(fneg (Neon_vduplane
7362                                      node:$LHS, node:$RHS))>>;
7363
7364   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7365                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7366                          BinOpFrag<(Neon_vduplane
7367                                      (fneg node:$LHS), node:$RHS)>>;
7368
7369   // Index can only be half of the max value for lane in 64-bit vector
7370
7371   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7372                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7373                         BinOpFrag<(fneg (Neon_vduplane
7374                                     node:$LHS, node:$RHS))>>;
7375
7376   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7377                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7378                         BinOpFrag<(Neon_vduplane
7379                                     (fneg node:$LHS), node:$RHS)>>;
7380
7381   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7382                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7383                         BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7384
7385   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7386                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7387                         BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7388
7389   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7390                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7391                              BinOpFrag<(fneg (Neon_combine_2d
7392                                          node:$LHS, node:$RHS))>>;
7393
7394   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7395                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7396                              BinOpFrag<(Neon_combine_2d
7397                                          (fneg node:$LHS), (fneg node:$RHS))>>;
7398 }
7399
7400 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7401
7402 // Variant 3: Long type
7403 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7404 //      SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7405
7406 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7407   // vector register class for element is always 128-bit to cover the max index
7408   def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7409                      neon_uimm2_bare, VPR128, VPR64, VPR128> {
7410     let Inst{11} = {Index{1}};
7411     let Inst{21} = {Index{0}};
7412     let Inst{20-16} = Re;
7413   }
7414
7415   def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7416                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7417     let Inst{11} = {Index{1}};
7418     let Inst{21} = {Index{0}};
7419     let Inst{20-16} = Re;
7420   }
7421
7422   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7423   def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7424                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7425     let Inst{11} = {Index{2}};
7426     let Inst{21} = {Index{1}};
7427     let Inst{20} = {Index{0}};
7428     let Inst{19-16} = Re{3-0};
7429   }
7430
7431   def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7432                      neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7433     let Inst{11} = {Index{2}};
7434     let Inst{21} = {Index{1}};
7435     let Inst{20} = {Index{0}};
7436     let Inst{19-16} = Re{3-0};
7437   }
7438 }
7439
7440 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7441 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7442 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7443 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7444 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7445 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7446
7447 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7448   // vector register class for element is always 128-bit to cover the max index
7449   def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7450                          neon_uimm2_bare, VPR128, VPR64, VPR128> {
7451     let Inst{11} = {Index{1}};
7452     let Inst{21} = {Index{0}};
7453     let Inst{20-16} = Re;
7454   }
7455
7456   def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7457                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7458     let Inst{11} = {Index{1}};
7459     let Inst{21} = {Index{0}};
7460     let Inst{20-16} = Re;
7461   }
7462
7463   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7464   def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7465                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7466     let Inst{11} = {Index{2}};
7467     let Inst{21} = {Index{1}};
7468     let Inst{20} = {Index{0}};
7469     let Inst{19-16} = Re{3-0};
7470   }
7471
7472   def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7473                          neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7474     let Inst{11} = {Index{2}};
7475     let Inst{21} = {Index{1}};
7476     let Inst{20} = {Index{0}};
7477     let Inst{19-16} = Re{3-0};
7478   }
7479 }
7480
7481 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7482 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7483 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7484
7485 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7486           (FMOVdd $src)>;
7487
7488 // Pattern for lane in 128-bit vector
7489 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7490                      RegisterOperand EleOpVPR, ValueType ResTy,
7491                      ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7492                      SDPatternOperator hiop>
7493   : Pat<(ResTy (op (ResTy VPR128:$src),
7494           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7495           (HalfOpTy (Neon_vduplane
7496                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7497         (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7498
7499 // Pattern for lane in 64-bit vector
7500 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7501                     RegisterOperand EleOpVPR, ValueType ResTy,
7502                     ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7503                     SDPatternOperator hiop>
7504   : Pat<(ResTy (op (ResTy VPR128:$src),
7505           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7506           (HalfOpTy (Neon_vduplane
7507                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7508         (INST VPR128:$src, VPR128:$Rn,
7509           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7510
7511 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7512                      ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7513                      SDPatternOperator hiop, Instruction DupInst>
7514   : Pat<(ResTy (op (ResTy VPR128:$src),
7515           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7516           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7517         (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7518
7519 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7520   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7521                      op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7522
7523   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7524                      op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7525
7526   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7527                        op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7528
7529   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7530                        op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7531
7532   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7533                        op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7534
7535   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7536                        op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7537
7538   // Index can only be half of the max value for lane in 64-bit vector
7539
7540   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7541                     op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7542
7543   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7544                     op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7545
7546   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7547                       op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7548
7549   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7550                       op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7551 }
7552
7553 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7554 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7555 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7556 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7557
7558 // Pattern for lane in 128-bit vector
7559 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7560                          RegisterOperand EleOpVPR, ValueType ResTy,
7561                          ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7562                          SDPatternOperator hiop>
7563   : Pat<(ResTy (op
7564           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7565           (HalfOpTy (Neon_vduplane
7566                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7567         (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7568
7569 // Pattern for lane in 64-bit vector
7570 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7571                         RegisterOperand EleOpVPR, ValueType ResTy,
7572                         ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7573                         SDPatternOperator hiop>
7574   : Pat<(ResTy (op
7575           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7576           (HalfOpTy (Neon_vduplane
7577                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7578         (INST VPR128:$Rn,
7579           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7580
7581 // Pattern for fixed lane 0
7582 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7583                          ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7584                          SDPatternOperator hiop, Instruction DupInst>
7585   : Pat<(ResTy (op
7586           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7587           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7588         (INST VPR128:$Rn, (DupInst $Re), 0)>;
7589
7590 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7591   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7592                          op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7593
7594   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7595                          op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7596
7597   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7598                          op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7599
7600   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7601                            op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7602
7603   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7604                            op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7605
7606   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7607                            op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7608
7609   // Index can only be half of the max value for lane in 64-bit vector
7610
7611   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7612                         op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7613
7614   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7615                         op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7616
7617   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7618                           op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7619
7620   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7621                           op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7622 }
7623
7624 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7625 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7626 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7627
7628 multiclass NI_qdma<SDPatternOperator op> {
7629   def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7630                     (op node:$Ra,
7631                       (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7632
7633   def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7634                     (op node:$Ra,
7635                       (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7636 }
7637
7638 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7639 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7640
7641 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7642   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7643                      !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7644                      v4i32, v4i16, v8i16>;
7645
7646   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7647                      !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7648                      v2i64, v2i32, v4i32>;
7649
7650   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7651                        !cast<PatFrag>(op # "_4s"), VPR128Lo,
7652                        v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7653
7654   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7655                        !cast<PatFrag>(op # "_2d"), VPR128,
7656                        v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7657
7658   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7659                        !cast<PatFrag>(op # "_4s"),
7660                        v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7661
7662   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7663                        !cast<PatFrag>(op # "_2d"),
7664                        v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7665
7666   // Index can only be half of the max value for lane in 64-bit vector
7667
7668   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7669                     !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
7670                     v4i32, v4i16, v4i16>;
7671
7672   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7673                     !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
7674                     v2i64, v2i32, v2i32>;
7675
7676   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7677                       !cast<PatFrag>(op # "_4s"), VPR64Lo,
7678                       v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7679
7680   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7681                       !cast<PatFrag>(op # "_2d"), VPR64,
7682                       v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7683 }
7684
7685 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
7686 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
7687
7688 // End of implementation for instruction class (3V Elem)
7689
7690 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
7691                 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
7692                 SDPatternOperator Neon_Rev>
7693   : NeonI_2VMisc<Q, U, size, opcode,
7694                (outs ResVPR:$Rd), (ins ResVPR:$Rn),
7695                asmop # "\t$Rd." # Res # ", $Rn." # Res,
7696                [(set (ResTy ResVPR:$Rd),
7697                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
7698                NoItinerary> ;
7699
7700 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
7701                           v16i8, Neon_rev64>;
7702 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
7703                          v8i16, Neon_rev64>;
7704 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
7705                          v4i32, Neon_rev64>;
7706 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
7707                          v8i8, Neon_rev64>;
7708 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
7709                          v4i16, Neon_rev64>;
7710 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
7711                          v2i32, Neon_rev64>;
7712
7713 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
7714 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
7715
7716 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
7717                           v16i8, Neon_rev32>;
7718 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
7719                           v8i16, Neon_rev32>;
7720 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
7721                          v8i8, Neon_rev32>;
7722 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
7723                          v4i16, Neon_rev32>;
7724
7725 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
7726                           v16i8, Neon_rev16>;
7727 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
7728                          v8i8, Neon_rev16>;
7729
7730 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
7731                              SDPatternOperator Neon_Padd> {
7732   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7733                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7734                            asmop # "\t$Rd.8h, $Rn.16b",
7735                            [(set (v8i16 VPR128:$Rd),
7736                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
7737                            NoItinerary>;
7738
7739   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7740                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7741                           asmop # "\t$Rd.4h, $Rn.8b",
7742                           [(set (v4i16 VPR64:$Rd),
7743                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
7744                           NoItinerary>;
7745
7746   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7747                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7748                            asmop # "\t$Rd.4s, $Rn.8h",
7749                            [(set (v4i32 VPR128:$Rd),
7750                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
7751                            NoItinerary>;
7752
7753   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7754                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7755                           asmop # "\t$Rd.2s, $Rn.4h",
7756                           [(set (v2i32 VPR64:$Rd),
7757                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
7758                           NoItinerary>;
7759
7760   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7761                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7762                            asmop # "\t$Rd.2d, $Rn.4s",
7763                            [(set (v2i64 VPR128:$Rd),
7764                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
7765                            NoItinerary>;
7766
7767   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7768                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7769                           asmop # "\t$Rd.1d, $Rn.2s",
7770                           [(set (v1i64 VPR64:$Rd),
7771                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
7772                           NoItinerary>;
7773 }
7774
7775 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
7776                                 int_arm_neon_vpaddls>;
7777 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
7778                                 int_arm_neon_vpaddlu>;
7779
7780 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
7781           (SADDLP2s1d $Rn)>;
7782 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
7783           (UADDLP2s1d $Rn)>;
7784
7785 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
7786                              SDPatternOperator Neon_Padd> {
7787   let Constraints = "$src = $Rd" in {
7788     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7789                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7790                              asmop # "\t$Rd.8h, $Rn.16b",
7791                              [(set (v8i16 VPR128:$Rd),
7792                                 (v8i16 (Neon_Padd
7793                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
7794                              NoItinerary>;
7795
7796     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7797                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7798                             asmop # "\t$Rd.4h, $Rn.8b",
7799                             [(set (v4i16 VPR64:$Rd),
7800                                (v4i16 (Neon_Padd
7801                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
7802                             NoItinerary>;
7803
7804     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7805                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7806                             asmop # "\t$Rd.4s, $Rn.8h",
7807                             [(set (v4i32 VPR128:$Rd),
7808                                (v4i32 (Neon_Padd
7809                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
7810                             NoItinerary>;
7811
7812     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7813                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7814                             asmop # "\t$Rd.2s, $Rn.4h",
7815                             [(set (v2i32 VPR64:$Rd),
7816                                (v2i32 (Neon_Padd
7817                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
7818                             NoItinerary>;
7819
7820     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7821                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7822                             asmop # "\t$Rd.2d, $Rn.4s",
7823                             [(set (v2i64 VPR128:$Rd),
7824                                (v2i64 (Neon_Padd
7825                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
7826                             NoItinerary>;
7827
7828     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7829                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7830                             asmop # "\t$Rd.1d, $Rn.2s",
7831                             [(set (v1i64 VPR64:$Rd),
7832                                (v1i64 (Neon_Padd
7833                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
7834                             NoItinerary>;
7835   }
7836 }
7837
7838 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
7839                                    int_arm_neon_vpadals>;
7840 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
7841                                    int_arm_neon_vpadalu>;
7842
7843 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
7844   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7845                          (outs VPR128:$Rd), (ins VPR128:$Rn),
7846                          asmop # "\t$Rd.16b, $Rn.16b",
7847                          [], NoItinerary>;
7848
7849   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7850                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7851                         asmop # "\t$Rd.8h, $Rn.8h",
7852                         [], NoItinerary>;
7853
7854   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7855                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7856                         asmop # "\t$Rd.4s, $Rn.4s",
7857                         [], NoItinerary>;
7858
7859   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7860                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7861                         asmop # "\t$Rd.2d, $Rn.2d",
7862                         [], NoItinerary>;
7863
7864   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7865                          (outs VPR64:$Rd), (ins VPR64:$Rn),
7866                          asmop # "\t$Rd.8b, $Rn.8b",
7867                          [], NoItinerary>;
7868
7869   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7870                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7871                         asmop # "\t$Rd.4h, $Rn.4h",
7872                         [], NoItinerary>;
7873
7874   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7875                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7876                         asmop # "\t$Rd.2s, $Rn.2s",
7877                         [], NoItinerary>;
7878 }
7879
7880 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
7881 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
7882 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
7883 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
7884
7885 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
7886                                           SDPatternOperator Neon_Op> {
7887   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
7888             (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
7889
7890   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
7891             (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
7892
7893   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
7894             (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
7895
7896   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
7897             (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
7898
7899   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
7900             (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
7901
7902   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
7903             (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
7904
7905   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
7906             (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
7907 }
7908
7909 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
7910 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
7911 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
7912
7913 def : Pat<(v16i8 (sub
7914             (v16i8 Neon_AllZero),
7915             (v16i8 VPR128:$Rn))),
7916           (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
7917 def : Pat<(v8i8 (sub
7918             (v8i8 Neon_AllZero),
7919             (v8i8 VPR64:$Rn))),
7920           (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
7921 def : Pat<(v8i16 (sub
7922             (v8i16 (bitconvert (v16i8 Neon_AllZero))),
7923             (v8i16 VPR128:$Rn))),
7924           (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
7925 def : Pat<(v4i16 (sub
7926             (v4i16 (bitconvert (v8i8 Neon_AllZero))),
7927             (v4i16 VPR64:$Rn))),
7928           (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
7929 def : Pat<(v4i32 (sub
7930             (v4i32 (bitconvert (v16i8 Neon_AllZero))),
7931             (v4i32 VPR128:$Rn))),
7932           (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
7933 def : Pat<(v2i32 (sub
7934             (v2i32 (bitconvert (v8i8 Neon_AllZero))),
7935             (v2i32 VPR64:$Rn))),
7936           (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
7937 def : Pat<(v2i64 (sub
7938             (v2i64 (bitconvert (v16i8 Neon_AllZero))),
7939             (v2i64 VPR128:$Rn))),
7940           (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
7941
7942 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
7943   let Constraints = "$src = $Rd" in {
7944     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7945                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7946                            asmop # "\t$Rd.16b, $Rn.16b",
7947                            [], NoItinerary>;
7948
7949     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7950                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7951                           asmop # "\t$Rd.8h, $Rn.8h",
7952                           [], NoItinerary>;
7953
7954     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7955                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7956                           asmop # "\t$Rd.4s, $Rn.4s",
7957                           [], NoItinerary>;
7958
7959     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7960                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7961                           asmop # "\t$Rd.2d, $Rn.2d",
7962                           [], NoItinerary>;
7963
7964     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7965                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7966                           asmop # "\t$Rd.8b, $Rn.8b",
7967                           [], NoItinerary>;
7968
7969     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7970                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7971                           asmop # "\t$Rd.4h, $Rn.4h",
7972                           [], NoItinerary>;
7973
7974     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7975                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7976                           asmop # "\t$Rd.2s, $Rn.2s",
7977                           [], NoItinerary>;
7978   }
7979 }
7980
7981 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
7982 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
7983
7984 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
7985                                            SDPatternOperator Neon_Op> {
7986   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
7987             (v16i8 (!cast<Instruction>(Prefix # 16b)
7988               (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
7989
7990   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
7991             (v8i16 (!cast<Instruction>(Prefix # 8h)
7992               (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
7993
7994   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
7995             (v4i32 (!cast<Instruction>(Prefix # 4s)
7996               (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
7997
7998   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
7999             (v2i64 (!cast<Instruction>(Prefix # 2d)
8000               (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8001
8002   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8003             (v8i8 (!cast<Instruction>(Prefix # 8b)
8004               (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8005
8006   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8007             (v4i16 (!cast<Instruction>(Prefix # 4h)
8008               (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8009
8010   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8011             (v2i32 (!cast<Instruction>(Prefix # 2s)
8012               (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8013 }
8014
8015 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8016 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8017
8018 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8019                           SDPatternOperator Neon_Op> {
8020   def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8021                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8022                          asmop # "\t$Rd.16b, $Rn.16b",
8023                          [(set (v16i8 VPR128:$Rd),
8024                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8025                          NoItinerary>;
8026
8027   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8028                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8029                         asmop # "\t$Rd.8h, $Rn.8h",
8030                         [(set (v8i16 VPR128:$Rd),
8031                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8032                         NoItinerary>;
8033
8034   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8035                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8036                         asmop # "\t$Rd.4s, $Rn.4s",
8037                         [(set (v4i32 VPR128:$Rd),
8038                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8039                         NoItinerary>;
8040
8041   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8042                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8043                         asmop # "\t$Rd.8b, $Rn.8b",
8044                         [(set (v8i8 VPR64:$Rd),
8045                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8046                         NoItinerary>;
8047
8048   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8049                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8050                         asmop # "\t$Rd.4h, $Rn.4h",
8051                         [(set (v4i16 VPR64:$Rd),
8052                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8053                         NoItinerary>;
8054
8055   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8056                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8057                         asmop # "\t$Rd.2s, $Rn.2s",
8058                         [(set (v2i32 VPR64:$Rd),
8059                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8060                         NoItinerary>;
8061 }
8062
8063 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8064 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8065
8066 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8067                               bits<5> Opcode> {
8068   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8069                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8070                          asmop # "\t$Rd.16b, $Rn.16b",
8071                          [], NoItinerary>;
8072
8073   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8074                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8075                         asmop # "\t$Rd.8b, $Rn.8b",
8076                         [], NoItinerary>;
8077 }
8078
8079 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8080 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8081 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8082
8083 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8084                     (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8085 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8086                     (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8087
8088 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8089           (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8090 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8091           (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8092
8093 def : Pat<(v16i8 (xor
8094             (v16i8 VPR128:$Rn),
8095             (v16i8 Neon_AllOne))),
8096           (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8097 def : Pat<(v8i8 (xor
8098             (v8i8 VPR64:$Rn),
8099             (v8i8 Neon_AllOne))),
8100           (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8101 def : Pat<(v8i16 (xor
8102             (v8i16 VPR128:$Rn),
8103             (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8104           (NOT16b VPR128:$Rn)>;
8105 def : Pat<(v4i16 (xor
8106             (v4i16 VPR64:$Rn),
8107             (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8108           (NOT8b VPR64:$Rn)>;
8109 def : Pat<(v4i32 (xor
8110             (v4i32 VPR128:$Rn),
8111             (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8112           (NOT16b VPR128:$Rn)>;
8113 def : Pat<(v2i32 (xor
8114             (v2i32 VPR64:$Rn),
8115             (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8116           (NOT8b VPR64:$Rn)>;
8117 def : Pat<(v2i64 (xor
8118             (v2i64 VPR128:$Rn),
8119             (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8120           (NOT16b VPR128:$Rn)>;
8121
8122 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8123           (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8124 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8125           (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8126
8127 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8128                                 SDPatternOperator Neon_Op> {
8129   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8130                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8131                         asmop # "\t$Rd.4s, $Rn.4s",
8132                         [(set (v4f32 VPR128:$Rd),
8133                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8134                         NoItinerary>;
8135
8136   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8137                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8138                         asmop # "\t$Rd.2d, $Rn.2d",
8139                         [(set (v2f64 VPR128:$Rd),
8140                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8141                         NoItinerary>;
8142
8143   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8144                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8145                         asmop # "\t$Rd.2s, $Rn.2s",
8146                         [(set (v2f32 VPR64:$Rd),
8147                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8148                         NoItinerary>;
8149 }
8150
8151 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8152 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8153
8154 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8155   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8156                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8157                           asmop # "\t$Rd.8b, $Rn.8h",
8158                           [], NoItinerary>;
8159
8160   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8161                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8162                           asmop # "\t$Rd.4h, $Rn.4s",
8163                           [], NoItinerary>;
8164
8165   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8166                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8167                           asmop # "\t$Rd.2s, $Rn.2d",
8168                           [], NoItinerary>;
8169
8170   let Constraints = "$Rd = $src" in {
8171     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8172                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8173                              asmop # "2\t$Rd.16b, $Rn.8h",
8174                              [], NoItinerary>;
8175
8176     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8177                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8178                             asmop # "2\t$Rd.8h, $Rn.4s",
8179                             [], NoItinerary>;
8180
8181     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8182                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8183                             asmop # "2\t$Rd.4s, $Rn.2d",
8184                             [], NoItinerary>;
8185   }
8186 }
8187
8188 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8189 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8190 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8191 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8192
8193 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8194                                         SDPatternOperator Neon_Op> {
8195   def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8196             (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8197
8198   def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8199             (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8200
8201   def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8202             (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8203
8204   def : Pat<(v16i8 (concat_vectors
8205               (v8i8 VPR64:$src),
8206               (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8207             (!cast<Instruction>(Prefix # 8h16b)
8208               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8209               VPR128:$Rn)>;
8210
8211   def : Pat<(v8i16 (concat_vectors
8212               (v4i16 VPR64:$src),
8213               (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8214             (!cast<Instruction>(Prefix # 4s8h)
8215               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8216               VPR128:$Rn)>;
8217
8218   def : Pat<(v4i32 (concat_vectors
8219               (v2i32 VPR64:$src),
8220               (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8221             (!cast<Instruction>(Prefix # 2d4s)
8222               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8223               VPR128:$Rn)>;
8224 }
8225
8226 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8227 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8228 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8229 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8230
8231 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8232   let DecoderMethod = "DecodeSHLLInstruction" in {
8233     def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8234                             (outs VPR128:$Rd),
8235                             (ins VPR64:$Rn, uimm_exact8:$Imm),
8236                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8237                             [], NoItinerary>;
8238
8239     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8240                             (outs VPR128:$Rd),
8241                             (ins VPR64:$Rn, uimm_exact16:$Imm),
8242                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8243                             [], NoItinerary>;
8244
8245     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8246                             (outs VPR128:$Rd),
8247                             (ins VPR64:$Rn, uimm_exact32:$Imm),
8248                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8249                             [], NoItinerary>;
8250
8251     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8252                             (outs VPR128:$Rd),
8253                             (ins VPR128:$Rn, uimm_exact8:$Imm),
8254                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8255                             [], NoItinerary>;
8256
8257     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8258                             (outs VPR128:$Rd),
8259                             (ins VPR128:$Rn, uimm_exact16:$Imm),
8260                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8261                             [], NoItinerary>;
8262
8263     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8264                             (outs VPR128:$Rd),
8265                             (ins VPR128:$Rn, uimm_exact32:$Imm),
8266                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8267                             [], NoItinerary>;
8268   }
8269 }
8270
8271 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8272
8273 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8274                           SDPatternOperator ExtOp, Operand Neon_Imm,
8275                           string suffix>
8276   : Pat<(DesTy (shl
8277           (DesTy (ExtOp (OpTy VPR64:$Rn))),
8278             (DesTy (Neon_vdup
8279               (i32 Neon_Imm:$Imm))))),
8280         (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8281
8282 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8283                                SDPatternOperator ExtOp, Operand Neon_Imm,
8284                                string suffix, PatFrag GetHigh>
8285   : Pat<(DesTy (shl
8286           (DesTy (ExtOp
8287             (OpTy (GetHigh VPR128:$Rn)))),
8288               (DesTy (Neon_vdup
8289                 (i32 Neon_Imm:$Imm))))),
8290         (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8291
8292 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8293 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8294 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8295 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8296 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8297 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8298 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8299                                Neon_High16B>;
8300 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8301                                Neon_High16B>;
8302 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8303                                Neon_High8H>;
8304 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8305                                Neon_High8H>;
8306 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8307                                Neon_High4S>;
8308 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8309                                Neon_High4S>;
8310
8311 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8312   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8313                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8314                           asmop # "\t$Rd.4h, $Rn.4s",
8315                           [], NoItinerary>;
8316
8317   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8318                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8319                           asmop # "\t$Rd.2s, $Rn.2d",
8320                           [], NoItinerary>;
8321
8322   let Constraints = "$src = $Rd" in {
8323     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8324                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8325                             asmop # "2\t$Rd.8h, $Rn.4s",
8326                             [], NoItinerary>;
8327
8328     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8329                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8330                             asmop # "2\t$Rd.4s, $Rn.2d",
8331                             [], NoItinerary>;
8332   }
8333 }
8334
8335 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8336
8337 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8338                                        SDPatternOperator f32_to_f16_Op,
8339                                        SDPatternOperator f64_to_f32_Op> {
8340
8341   def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8342               (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8343
8344   def : Pat<(v8i16 (concat_vectors
8345                 (v4i16 VPR64:$src),
8346                 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8347                   (!cast<Instruction>(prefix # "4s8h")
8348                     (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8349                     (v4f32 VPR128:$Rn))>;
8350
8351   def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8352             (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8353
8354   def : Pat<(v4f32 (concat_vectors
8355               (v2f32 VPR64:$src),
8356               (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8357                 (!cast<Instruction>(prefix # "2d4s")
8358                   (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8359                   (v2f64 VPR128:$Rn))>;
8360 }
8361
8362 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8363
8364 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8365                                  bits<5> opcode> {
8366   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8367                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8368                           asmop # "\t$Rd.2s, $Rn.2d",
8369                           [], NoItinerary>;
8370
8371   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8372                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8373                           asmop # "2\t$Rd.4s, $Rn.2d",
8374                           [], NoItinerary> {
8375     let Constraints = "$src = $Rd";
8376   }
8377
8378   def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8379             (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8380
8381   def : Pat<(v4f32 (concat_vectors
8382               (v2f32 VPR64:$src),
8383               (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8384             (!cast<Instruction>(prefix # "2d4s")
8385                (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8386                VPR128:$Rn)>;
8387 }
8388
8389 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8390
8391 def Neon_High4Float : PatFrag<(ops node:$in),
8392                               (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8393
8394 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8395   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8396                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8397                           asmop # "\t$Rd.4s, $Rn.4h",
8398                           [], NoItinerary>;
8399
8400   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8401                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8402                           asmop # "\t$Rd.2d, $Rn.2s",
8403                           [], NoItinerary>;
8404
8405   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8406                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8407                           asmop # "2\t$Rd.4s, $Rn.8h",
8408                           [], NoItinerary>;
8409
8410   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8411                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8412                           asmop # "2\t$Rd.2d, $Rn.4s",
8413                           [], NoItinerary>;
8414 }
8415
8416 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8417
8418 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8419   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8420             (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8421
8422   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8423               (v4i16 (Neon_High8H
8424                 (v8i16 VPR128:$Rn))))),
8425             (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8426
8427   def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8428             (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8429
8430   def : Pat<(v2f64 (fextend
8431               (v2f32 (Neon_High4Float
8432                 (v4f32 VPR128:$Rn))))),
8433             (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8434 }
8435
8436 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8437
8438 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8439                                 ValueType ResTy4s, ValueType OpTy4s,
8440                                 ValueType ResTy2d, ValueType OpTy2d,
8441                                 ValueType ResTy2s, ValueType OpTy2s,
8442                                 SDPatternOperator Neon_Op> {
8443
8444   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8445                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8446                         asmop # "\t$Rd.4s, $Rn.4s",
8447                         [(set (ResTy4s VPR128:$Rd),
8448                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8449                         NoItinerary>;
8450
8451   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8452                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8453                         asmop # "\t$Rd.2d, $Rn.2d",
8454                         [(set (ResTy2d VPR128:$Rd),
8455                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8456                         NoItinerary>;
8457
8458   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8459                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8460                         asmop # "\t$Rd.2s, $Rn.2s",
8461                         [(set (ResTy2s VPR64:$Rd),
8462                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8463                         NoItinerary>;
8464 }
8465
8466 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8467                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8468   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8469                                 v2f64, v2i32, v2f32, Neon_Op>;
8470 }
8471
8472 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8473                                      int_arm_neon_vcvtns>;
8474 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8475                                      int_arm_neon_vcvtnu>;
8476 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8477                                      int_arm_neon_vcvtps>;
8478 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8479                                      int_arm_neon_vcvtpu>;
8480 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8481                                      int_arm_neon_vcvtms>;
8482 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8483                                      int_arm_neon_vcvtmu>;
8484 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8485 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8486 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8487                                      int_arm_neon_vcvtas>;
8488 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8489                                      int_arm_neon_vcvtau>;
8490
8491 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8492                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8493   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8494                                 v2i64, v2f32, v2i32, Neon_Op>;
8495 }
8496
8497 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8498 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8499
8500 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8501                                  bits<5> opcode, SDPatternOperator Neon_Op> {
8502   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8503                                 v2f64, v2f32, v2f32, Neon_Op>;
8504 }
8505
8506 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8507                                      int_aarch64_neon_frintn>;
8508 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8509 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8510 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8511 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8512 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8513 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8514 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8515                                     int_arm_neon_vrecpe>;
8516 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8517                                      int_arm_neon_vrsqrte>;
8518 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8519
8520 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8521                                bits<5> opcode, SDPatternOperator Neon_Op> {
8522   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8523                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8524                         asmop # "\t$Rd.4s, $Rn.4s",
8525                         [(set (v4i32 VPR128:$Rd),
8526                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8527                         NoItinerary>;
8528
8529   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8530                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8531                         asmop # "\t$Rd.2s, $Rn.2s",
8532                         [(set (v2i32 VPR64:$Rd),
8533                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8534                         NoItinerary>;
8535 }
8536
8537 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8538                                   int_arm_neon_vrecpe>;
8539 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8540                                    int_arm_neon_vrsqrte>;
8541
8542 // Crypto Class
8543 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8544                          string asmop, SDPatternOperator opnode>
8545   : NeonI_Crypto_AES<size, opcode,
8546                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8547                      asmop # "\t$Rd.16b, $Rn.16b",
8548                      [(set (v16i8 VPR128:$Rd),
8549                         (v16i8 (opnode (v16i8 VPR128:$src),
8550                                        (v16i8 VPR128:$Rn))))],
8551                      NoItinerary>{
8552   let Constraints = "$src = $Rd";
8553   let Predicates = [HasNEON, HasCrypto];
8554 }
8555
8556 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8557 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8558
8559 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8560                       string asmop, SDPatternOperator opnode>
8561   : NeonI_Crypto_AES<size, opcode,
8562                      (outs VPR128:$Rd), (ins VPR128:$Rn),
8563                      asmop # "\t$Rd.16b, $Rn.16b",
8564                      [(set (v16i8 VPR128:$Rd),
8565                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8566                      NoItinerary>;
8567
8568 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8569 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8570
8571 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8572                          string asmop, SDPatternOperator opnode>
8573   : NeonI_Crypto_SHA<size, opcode,
8574                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8575                      asmop # "\t$Rd.4s, $Rn.4s",
8576                      [(set (v4i32 VPR128:$Rd),
8577                         (v4i32 (opnode (v4i32 VPR128:$src),
8578                                        (v4i32 VPR128:$Rn))))],
8579                      NoItinerary> {
8580   let Constraints = "$src = $Rd";
8581   let Predicates = [HasNEON, HasCrypto];
8582 }
8583
8584 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8585                                  int_arm_neon_sha1su1>;
8586 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8587                                    int_arm_neon_sha256su0>;
8588
8589 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8590                          string asmop, SDPatternOperator opnode>
8591   : NeonI_Crypto_SHA<size, opcode,
8592                      (outs FPR32:$Rd), (ins FPR32:$Rn),
8593                      asmop # "\t$Rd, $Rn",
8594                      [(set (v1i32 FPR32:$Rd),
8595                         (v1i32 (opnode (v1i32 FPR32:$Rn))))],
8596                      NoItinerary> {
8597   let Predicates = [HasNEON, HasCrypto];
8598 }
8599
8600 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8601
8602 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8603                            SDPatternOperator opnode>
8604   : NeonI_Crypto_3VSHA<size, opcode,
8605                        (outs VPR128:$Rd),
8606                        (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8607                        asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8608                        [(set (v4i32 VPR128:$Rd),
8609                           (v4i32 (opnode (v4i32 VPR128:$src),
8610                                          (v4i32 VPR128:$Rn),
8611                                          (v4i32 VPR128:$Rm))))],
8612                        NoItinerary> {
8613   let Constraints = "$src = $Rd";
8614   let Predicates = [HasNEON, HasCrypto];
8615 }
8616
8617 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8618                                    int_arm_neon_sha1su0>;
8619 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8620                                      int_arm_neon_sha256su1>;
8621
8622 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8623                            SDPatternOperator opnode>
8624   : NeonI_Crypto_3VSHA<size, opcode,
8625                        (outs FPR128:$Rd),
8626                        (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8627                        asmop # "\t$Rd, $Rn, $Rm.4s",
8628                        [(set (v4i32 FPR128:$Rd),
8629                           (v4i32 (opnode (v4i32 FPR128:$src),
8630                                          (v4i32 FPR128:$Rn),
8631                                          (v4i32 VPR128:$Rm))))],
8632                        NoItinerary> {
8633   let Constraints = "$src = $Rd";
8634   let Predicates = [HasNEON, HasCrypto];
8635 }
8636
8637 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8638                                    int_arm_neon_sha256h>;
8639 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8640                                     int_arm_neon_sha256h2>;
8641
8642 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
8643                            SDPatternOperator opnode>
8644   : NeonI_Crypto_3VSHA<size, opcode,
8645                        (outs FPR128:$Rd),
8646                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8647                        asmop # "\t$Rd, $Rn, $Rm.4s",
8648                        [(set (v4i32 FPR128:$Rd),
8649                           (v4i32 (opnode (v4i32 FPR128:$src),
8650                                          (v1i32 FPR32:$Rn),
8651                                          (v4i32 VPR128:$Rm))))],
8652                        NoItinerary> {
8653   let Constraints = "$src = $Rd";
8654   let Predicates = [HasNEON, HasCrypto];
8655 }
8656
8657 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
8658 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
8659 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
8660
8661 //
8662 // Patterns for handling half-precision values
8663 //
8664
8665 // Convert f16 value coming in as i16 value to f32
8666 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
8667           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8668 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
8669           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8670
8671 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
8672             f32_to_f16 (f32 FPR32:$Rn))))))),
8673           (f32 FPR32:$Rn)>;
8674
8675 // Patterns for vector extract of half-precision FP value in i16 storage type
8676 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8677             (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
8678           (FCVTsh (f16 (DUPhv_H
8679             (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8680             neon_uimm2_bare:$Imm)))>;
8681
8682 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8683             (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
8684           (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
8685
8686 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
8687 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8688             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8689             (neon_uimm3_bare:$Imm))),
8690           (v8i16 (INSELh (v8i16 VPR128:$Rn),
8691             (v8i16 (SUBREG_TO_REG (i64 0),
8692               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8693               sub_16)),
8694             neon_uimm3_bare:$Imm, 0))>;
8695
8696 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8697             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8698             (neon_uimm2_bare:$Imm))),
8699           (v4i16 (EXTRACT_SUBREG
8700             (v8i16 (INSELh
8701               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8702               (v8i16 (SUBREG_TO_REG (i64 0),
8703                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8704                 sub_16)),
8705               neon_uimm2_bare:$Imm, 0)),
8706             sub_64))>;
8707
8708 // Patterns for vector insert of half-precision FP value in i16 storage type
8709 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8710             (i32 (assertsext (i32 (fp_to_sint
8711               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8712             (neon_uimm3_bare:$Imm))),
8713           (v8i16 (INSELh (v8i16 VPR128:$Rn),
8714             (v8i16 (SUBREG_TO_REG (i64 0),
8715               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8716               sub_16)),
8717             neon_uimm3_bare:$Imm, 0))>;
8718
8719 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8720             (i32 (assertsext (i32 (fp_to_sint
8721               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8722             (neon_uimm2_bare:$Imm))),
8723           (v4i16 (EXTRACT_SUBREG
8724             (v8i16 (INSELh
8725               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8726               (v8i16 (SUBREG_TO_REG (i64 0),
8727                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8728                 sub_16)),
8729               neon_uimm2_bare:$Imm, 0)),
8730             sub_64))>;
8731
8732 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8733             (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8734               (neon_uimm3_bare:$Imm1))),
8735           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8736             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8737
8738 // Patterns for vector copy of half-precision FP value in i16 storage type
8739 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8740             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8741               (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8742               65535)))))))),
8743             (neon_uimm3_bare:$Imm1))),
8744           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8745             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8746
8747 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8748             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8749               (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
8750               65535)))))))),
8751             (neon_uimm3_bare:$Imm1))),
8752           (v4i16 (EXTRACT_SUBREG
8753             (v8i16 (INSELh
8754               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8755               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
8756               neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
8757             sub_64))>;
8758
8759